1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 wmb(); 806 807 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 808 return; 809 810 i915_gem_chipset_flush(dev_priv); 811 812 intel_runtime_pm_get(dev_priv); 813 spin_lock_irq(&dev_priv->uncore.lock); 814 815 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 816 817 spin_unlock_irq(&dev_priv->uncore.lock); 818 intel_runtime_pm_put(dev_priv); 819 } 820 821 static void 822 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 823 { 824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 825 struct i915_vma *vma; 826 827 if (!(obj->write_domain & flush_domains)) 828 return; 829 830 switch (obj->write_domain) { 831 case I915_GEM_DOMAIN_GTT: 832 i915_gem_flush_ggtt_writes(dev_priv); 833 834 intel_fb_obj_flush(obj, 835 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 836 837 for_each_ggtt_vma(vma, obj) { 838 if (vma->iomap) 839 continue; 840 841 i915_vma_unset_ggtt_write(vma); 842 } 843 break; 844 845 case I915_GEM_DOMAIN_WC: 846 wmb(); 847 break; 848 849 case I915_GEM_DOMAIN_CPU: 850 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 851 break; 852 853 case I915_GEM_DOMAIN_RENDER: 854 if (gpu_write_needs_clflush(obj)) 855 obj->cache_dirty = true; 856 break; 857 } 858 859 obj->write_domain = 0; 860 } 861 862 static inline int 863 __copy_to_user_swizzled(char __user *cpu_vaddr, 864 const char *gpu_vaddr, int gpu_offset, 865 int length) 866 { 867 int ret, cpu_offset = 0; 868 869 while (length > 0) { 870 int cacheline_end = ALIGN(gpu_offset + 1, 64); 871 int this_length = min(cacheline_end - gpu_offset, length); 872 int swizzled_gpu_offset = gpu_offset ^ 64; 873 874 ret = __copy_to_user(cpu_vaddr + cpu_offset, 875 gpu_vaddr + swizzled_gpu_offset, 876 this_length); 877 if (ret) 878 return ret + length; 879 880 cpu_offset += this_length; 881 gpu_offset += this_length; 882 length -= this_length; 883 } 884 885 return 0; 886 } 887 888 static inline int 889 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 890 const char __user *cpu_vaddr, 891 int length) 892 { 893 int ret, cpu_offset = 0; 894 895 while (length > 0) { 896 int cacheline_end = ALIGN(gpu_offset + 1, 64); 897 int this_length = min(cacheline_end - gpu_offset, length); 898 int swizzled_gpu_offset = gpu_offset ^ 64; 899 900 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 901 cpu_vaddr + cpu_offset, 902 this_length); 903 if (ret) 904 return ret + length; 905 906 cpu_offset += this_length; 907 gpu_offset += this_length; 908 length -= this_length; 909 } 910 911 return 0; 912 } 913 914 /* 915 * Pins the specified object's pages and synchronizes the object with 916 * GPU accesses. Sets needs_clflush to non-zero if the caller should 917 * flush the object from the CPU cache. 918 */ 919 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 920 unsigned int *needs_clflush) 921 { 922 int ret; 923 924 lockdep_assert_held(&obj->base.dev->struct_mutex); 925 926 *needs_clflush = 0; 927 if (!i915_gem_object_has_struct_page(obj)) 928 return -ENODEV; 929 930 ret = i915_gem_object_wait(obj, 931 I915_WAIT_INTERRUPTIBLE | 932 I915_WAIT_LOCKED, 933 MAX_SCHEDULE_TIMEOUT, 934 NULL); 935 if (ret) 936 return ret; 937 938 ret = i915_gem_object_pin_pages(obj); 939 if (ret) 940 return ret; 941 942 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 943 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 944 ret = i915_gem_object_set_to_cpu_domain(obj, false); 945 if (ret) 946 goto err_unpin; 947 else 948 goto out; 949 } 950 951 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 952 953 /* If we're not in the cpu read domain, set ourself into the gtt 954 * read domain and manually flush cachelines (if required). This 955 * optimizes for the case when the gpu will dirty the data 956 * anyway again before the next pread happens. 957 */ 958 if (!obj->cache_dirty && 959 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 960 *needs_clflush = CLFLUSH_BEFORE; 961 962 out: 963 /* return with the pages pinned */ 964 return 0; 965 966 err_unpin: 967 i915_gem_object_unpin_pages(obj); 968 return ret; 969 } 970 971 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 972 unsigned int *needs_clflush) 973 { 974 int ret; 975 976 lockdep_assert_held(&obj->base.dev->struct_mutex); 977 978 *needs_clflush = 0; 979 if (!i915_gem_object_has_struct_page(obj)) 980 return -ENODEV; 981 982 ret = i915_gem_object_wait(obj, 983 I915_WAIT_INTERRUPTIBLE | 984 I915_WAIT_LOCKED | 985 I915_WAIT_ALL, 986 MAX_SCHEDULE_TIMEOUT, 987 NULL); 988 if (ret) 989 return ret; 990 991 ret = i915_gem_object_pin_pages(obj); 992 if (ret) 993 return ret; 994 995 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 996 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 997 ret = i915_gem_object_set_to_cpu_domain(obj, true); 998 if (ret) 999 goto err_unpin; 1000 else 1001 goto out; 1002 } 1003 1004 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1005 1006 /* If we're not in the cpu write domain, set ourself into the 1007 * gtt write domain and manually flush cachelines (as required). 1008 * This optimizes for the case when the gpu will use the data 1009 * right away and we therefore have to clflush anyway. 1010 */ 1011 if (!obj->cache_dirty) { 1012 *needs_clflush |= CLFLUSH_AFTER; 1013 1014 /* 1015 * Same trick applies to invalidate partially written 1016 * cachelines read before writing. 1017 */ 1018 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1019 *needs_clflush |= CLFLUSH_BEFORE; 1020 } 1021 1022 out: 1023 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1024 obj->mm.dirty = true; 1025 /* return with the pages pinned */ 1026 return 0; 1027 1028 err_unpin: 1029 i915_gem_object_unpin_pages(obj); 1030 return ret; 1031 } 1032 1033 static void 1034 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1035 bool swizzled) 1036 { 1037 if (unlikely(swizzled)) { 1038 unsigned long start = (unsigned long) addr; 1039 unsigned long end = (unsigned long) addr + length; 1040 1041 /* For swizzling simply ensure that we always flush both 1042 * channels. Lame, but simple and it works. Swizzled 1043 * pwrite/pread is far from a hotpath - current userspace 1044 * doesn't use it at all. */ 1045 start = round_down(start, 128); 1046 end = round_up(end, 128); 1047 1048 drm_clflush_virt_range((void *)start, end - start); 1049 } else { 1050 drm_clflush_virt_range(addr, length); 1051 } 1052 1053 } 1054 1055 /* Only difference to the fast-path function is that this can handle bit17 1056 * and uses non-atomic copy and kmap functions. */ 1057 static int 1058 shmem_pread_slow(struct page *page, int offset, int length, 1059 char __user *user_data, 1060 bool page_do_bit17_swizzling, bool needs_clflush) 1061 { 1062 char *vaddr; 1063 int ret; 1064 1065 vaddr = kmap(page); 1066 if (needs_clflush) 1067 shmem_clflush_swizzled_range(vaddr + offset, length, 1068 page_do_bit17_swizzling); 1069 1070 if (page_do_bit17_swizzling) 1071 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1072 else 1073 ret = __copy_to_user(user_data, vaddr + offset, length); 1074 kunmap(page); 1075 1076 return ret ? - EFAULT : 0; 1077 } 1078 1079 static int 1080 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1081 bool page_do_bit17_swizzling, bool needs_clflush) 1082 { 1083 int ret; 1084 1085 ret = -ENODEV; 1086 if (!page_do_bit17_swizzling) { 1087 char *vaddr = kmap_atomic(page); 1088 1089 if (needs_clflush) 1090 drm_clflush_virt_range(vaddr + offset, length); 1091 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1092 kunmap_atomic(vaddr); 1093 } 1094 if (ret == 0) 1095 return 0; 1096 1097 return shmem_pread_slow(page, offset, length, user_data, 1098 page_do_bit17_swizzling, needs_clflush); 1099 } 1100 1101 static int 1102 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1103 struct drm_i915_gem_pread *args) 1104 { 1105 char __user *user_data; 1106 u64 remain; 1107 unsigned int obj_do_bit17_swizzling; 1108 unsigned int needs_clflush; 1109 unsigned int idx, offset; 1110 int ret; 1111 1112 obj_do_bit17_swizzling = 0; 1113 if (i915_gem_object_needs_bit17_swizzle(obj)) 1114 obj_do_bit17_swizzling = BIT(17); 1115 1116 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1121 mutex_unlock(&obj->base.dev->struct_mutex); 1122 if (ret) 1123 return ret; 1124 1125 remain = args->size; 1126 user_data = u64_to_user_ptr(args->data_ptr); 1127 offset = offset_in_page(args->offset); 1128 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1129 struct page *page = i915_gem_object_get_page(obj, idx); 1130 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1131 1132 ret = shmem_pread(page, offset, length, user_data, 1133 page_to_phys(page) & obj_do_bit17_swizzling, 1134 needs_clflush); 1135 if (ret) 1136 break; 1137 1138 remain -= length; 1139 user_data += length; 1140 offset = 0; 1141 } 1142 1143 i915_gem_obj_finish_shmem_access(obj); 1144 return ret; 1145 } 1146 1147 static inline bool 1148 gtt_user_read(struct io_mapping *mapping, 1149 loff_t base, int offset, 1150 char __user *user_data, int length) 1151 { 1152 void __iomem *vaddr; 1153 unsigned long unwritten; 1154 1155 /* We can use the cpu mem copy function because this is X86. */ 1156 vaddr = io_mapping_map_atomic_wc(mapping, base); 1157 unwritten = __copy_to_user_inatomic(user_data, 1158 (void __force *)vaddr + offset, 1159 length); 1160 io_mapping_unmap_atomic(vaddr); 1161 if (unwritten) { 1162 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1163 unwritten = copy_to_user(user_data, 1164 (void __force *)vaddr + offset, 1165 length); 1166 io_mapping_unmap(vaddr); 1167 } 1168 return unwritten; 1169 } 1170 1171 static int 1172 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1173 const struct drm_i915_gem_pread *args) 1174 { 1175 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1176 struct i915_ggtt *ggtt = &i915->ggtt; 1177 struct drm_mm_node node; 1178 struct i915_vma *vma; 1179 void __user *user_data; 1180 u64 remain, offset; 1181 int ret; 1182 1183 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1184 if (ret) 1185 return ret; 1186 1187 intel_runtime_pm_get(i915); 1188 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1189 PIN_MAPPABLE | 1190 PIN_NONFAULT | 1191 PIN_NONBLOCK); 1192 if (!IS_ERR(vma)) { 1193 node.start = i915_ggtt_offset(vma); 1194 node.allocated = false; 1195 ret = i915_vma_put_fence(vma); 1196 if (ret) { 1197 i915_vma_unpin(vma); 1198 vma = ERR_PTR(ret); 1199 } 1200 } 1201 if (IS_ERR(vma)) { 1202 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1203 if (ret) 1204 goto out_unlock; 1205 GEM_BUG_ON(!node.allocated); 1206 } 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1209 if (ret) 1210 goto out_unpin; 1211 1212 mutex_unlock(&i915->drm.struct_mutex); 1213 1214 user_data = u64_to_user_ptr(args->data_ptr); 1215 remain = args->size; 1216 offset = args->offset; 1217 1218 while (remain > 0) { 1219 /* Operation in this page 1220 * 1221 * page_base = page offset within aperture 1222 * page_offset = offset within page 1223 * page_length = bytes to copy for this page 1224 */ 1225 u32 page_base = node.start; 1226 unsigned page_offset = offset_in_page(offset); 1227 unsigned page_length = PAGE_SIZE - page_offset; 1228 page_length = remain < page_length ? remain : page_length; 1229 if (node.allocated) { 1230 wmb(); 1231 ggtt->vm.insert_page(&ggtt->vm, 1232 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1233 node.start, I915_CACHE_NONE, 0); 1234 wmb(); 1235 } else { 1236 page_base += offset & PAGE_MASK; 1237 } 1238 1239 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1240 user_data, page_length)) { 1241 ret = -EFAULT; 1242 break; 1243 } 1244 1245 remain -= page_length; 1246 user_data += page_length; 1247 offset += page_length; 1248 } 1249 1250 mutex_lock(&i915->drm.struct_mutex); 1251 out_unpin: 1252 if (node.allocated) { 1253 wmb(); 1254 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1255 remove_mappable_node(&node); 1256 } else { 1257 i915_vma_unpin(vma); 1258 } 1259 out_unlock: 1260 intel_runtime_pm_put(i915); 1261 mutex_unlock(&i915->drm.struct_mutex); 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * Reads data from the object referenced by handle. 1268 * @dev: drm device pointer 1269 * @data: ioctl data blob 1270 * @file: drm file pointer 1271 * 1272 * On error, the contents of *data are undefined. 1273 */ 1274 int 1275 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1276 struct drm_file *file) 1277 { 1278 struct drm_i915_gem_pread *args = data; 1279 struct drm_i915_gem_object *obj; 1280 int ret; 1281 1282 if (args->size == 0) 1283 return 0; 1284 1285 if (!access_ok(VERIFY_WRITE, 1286 u64_to_user_ptr(args->data_ptr), 1287 args->size)) 1288 return -EFAULT; 1289 1290 obj = i915_gem_object_lookup(file, args->handle); 1291 if (!obj) 1292 return -ENOENT; 1293 1294 /* Bounds check source. */ 1295 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1296 ret = -EINVAL; 1297 goto out; 1298 } 1299 1300 trace_i915_gem_object_pread(obj, args->offset, args->size); 1301 1302 ret = i915_gem_object_wait(obj, 1303 I915_WAIT_INTERRUPTIBLE, 1304 MAX_SCHEDULE_TIMEOUT, 1305 to_rps_client(file)); 1306 if (ret) 1307 goto out; 1308 1309 ret = i915_gem_object_pin_pages(obj); 1310 if (ret) 1311 goto out; 1312 1313 ret = i915_gem_shmem_pread(obj, args); 1314 if (ret == -EFAULT || ret == -ENODEV) 1315 ret = i915_gem_gtt_pread(obj, args); 1316 1317 i915_gem_object_unpin_pages(obj); 1318 out: 1319 i915_gem_object_put(obj); 1320 return ret; 1321 } 1322 1323 /* This is the fast write path which cannot handle 1324 * page faults in the source data 1325 */ 1326 1327 static inline bool 1328 ggtt_write(struct io_mapping *mapping, 1329 loff_t base, int offset, 1330 char __user *user_data, int length) 1331 { 1332 void __iomem *vaddr; 1333 unsigned long unwritten; 1334 1335 /* We can use the cpu mem copy function because this is X86. */ 1336 vaddr = io_mapping_map_atomic_wc(mapping, base); 1337 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1338 user_data, length); 1339 io_mapping_unmap_atomic(vaddr); 1340 if (unwritten) { 1341 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1342 unwritten = copy_from_user((void __force *)vaddr + offset, 1343 user_data, length); 1344 io_mapping_unmap(vaddr); 1345 } 1346 1347 return unwritten; 1348 } 1349 1350 /** 1351 * This is the fast pwrite path, where we copy the data directly from the 1352 * user into the GTT, uncached. 1353 * @obj: i915 GEM object 1354 * @args: pwrite arguments structure 1355 */ 1356 static int 1357 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1358 const struct drm_i915_gem_pwrite *args) 1359 { 1360 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1361 struct i915_ggtt *ggtt = &i915->ggtt; 1362 struct drm_mm_node node; 1363 struct i915_vma *vma; 1364 u64 remain, offset; 1365 void __user *user_data; 1366 int ret; 1367 1368 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1369 if (ret) 1370 return ret; 1371 1372 if (i915_gem_object_has_struct_page(obj)) { 1373 /* 1374 * Avoid waking the device up if we can fallback, as 1375 * waking/resuming is very slow (worst-case 10-100 ms 1376 * depending on PCI sleeps and our own resume time). 1377 * This easily dwarfs any performance advantage from 1378 * using the cache bypass of indirect GGTT access. 1379 */ 1380 if (!intel_runtime_pm_get_if_in_use(i915)) { 1381 ret = -EFAULT; 1382 goto out_unlock; 1383 } 1384 } else { 1385 /* No backing pages, no fallback, we must force GGTT access */ 1386 intel_runtime_pm_get(i915); 1387 } 1388 1389 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1390 PIN_MAPPABLE | 1391 PIN_NONFAULT | 1392 PIN_NONBLOCK); 1393 if (!IS_ERR(vma)) { 1394 node.start = i915_ggtt_offset(vma); 1395 node.allocated = false; 1396 ret = i915_vma_put_fence(vma); 1397 if (ret) { 1398 i915_vma_unpin(vma); 1399 vma = ERR_PTR(ret); 1400 } 1401 } 1402 if (IS_ERR(vma)) { 1403 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1404 if (ret) 1405 goto out_rpm; 1406 GEM_BUG_ON(!node.allocated); 1407 } 1408 1409 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1410 if (ret) 1411 goto out_unpin; 1412 1413 mutex_unlock(&i915->drm.struct_mutex); 1414 1415 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1416 1417 user_data = u64_to_user_ptr(args->data_ptr); 1418 offset = args->offset; 1419 remain = args->size; 1420 while (remain) { 1421 /* Operation in this page 1422 * 1423 * page_base = page offset within aperture 1424 * page_offset = offset within page 1425 * page_length = bytes to copy for this page 1426 */ 1427 u32 page_base = node.start; 1428 unsigned int page_offset = offset_in_page(offset); 1429 unsigned int page_length = PAGE_SIZE - page_offset; 1430 page_length = remain < page_length ? remain : page_length; 1431 if (node.allocated) { 1432 wmb(); /* flush the write before we modify the GGTT */ 1433 ggtt->vm.insert_page(&ggtt->vm, 1434 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1435 node.start, I915_CACHE_NONE, 0); 1436 wmb(); /* flush modifications to the GGTT (insert_page) */ 1437 } else { 1438 page_base += offset & PAGE_MASK; 1439 } 1440 /* If we get a fault while copying data, then (presumably) our 1441 * source page isn't available. Return the error and we'll 1442 * retry in the slow path. 1443 * If the object is non-shmem backed, we retry again with the 1444 * path that handles page fault. 1445 */ 1446 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1447 user_data, page_length)) { 1448 ret = -EFAULT; 1449 break; 1450 } 1451 1452 remain -= page_length; 1453 user_data += page_length; 1454 offset += page_length; 1455 } 1456 intel_fb_obj_flush(obj, ORIGIN_CPU); 1457 1458 mutex_lock(&i915->drm.struct_mutex); 1459 out_unpin: 1460 if (node.allocated) { 1461 wmb(); 1462 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1463 remove_mappable_node(&node); 1464 } else { 1465 i915_vma_unpin(vma); 1466 } 1467 out_rpm: 1468 intel_runtime_pm_put(i915); 1469 out_unlock: 1470 mutex_unlock(&i915->drm.struct_mutex); 1471 return ret; 1472 } 1473 1474 static int 1475 shmem_pwrite_slow(struct page *page, int offset, int length, 1476 char __user *user_data, 1477 bool page_do_bit17_swizzling, 1478 bool needs_clflush_before, 1479 bool needs_clflush_after) 1480 { 1481 char *vaddr; 1482 int ret; 1483 1484 vaddr = kmap(page); 1485 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1486 shmem_clflush_swizzled_range(vaddr + offset, length, 1487 page_do_bit17_swizzling); 1488 if (page_do_bit17_swizzling) 1489 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1490 length); 1491 else 1492 ret = __copy_from_user(vaddr + offset, user_data, length); 1493 if (needs_clflush_after) 1494 shmem_clflush_swizzled_range(vaddr + offset, length, 1495 page_do_bit17_swizzling); 1496 kunmap(page); 1497 1498 return ret ? -EFAULT : 0; 1499 } 1500 1501 /* Per-page copy function for the shmem pwrite fastpath. 1502 * Flushes invalid cachelines before writing to the target if 1503 * needs_clflush_before is set and flushes out any written cachelines after 1504 * writing if needs_clflush is set. 1505 */ 1506 static int 1507 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1508 bool page_do_bit17_swizzling, 1509 bool needs_clflush_before, 1510 bool needs_clflush_after) 1511 { 1512 int ret; 1513 1514 ret = -ENODEV; 1515 if (!page_do_bit17_swizzling) { 1516 char *vaddr = kmap_atomic(page); 1517 1518 if (needs_clflush_before) 1519 drm_clflush_virt_range(vaddr + offset, len); 1520 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1521 if (needs_clflush_after) 1522 drm_clflush_virt_range(vaddr + offset, len); 1523 1524 kunmap_atomic(vaddr); 1525 } 1526 if (ret == 0) 1527 return ret; 1528 1529 return shmem_pwrite_slow(page, offset, len, user_data, 1530 page_do_bit17_swizzling, 1531 needs_clflush_before, 1532 needs_clflush_after); 1533 } 1534 1535 static int 1536 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1537 const struct drm_i915_gem_pwrite *args) 1538 { 1539 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1540 void __user *user_data; 1541 u64 remain; 1542 unsigned int obj_do_bit17_swizzling; 1543 unsigned int partial_cacheline_write; 1544 unsigned int needs_clflush; 1545 unsigned int offset, idx; 1546 int ret; 1547 1548 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1549 if (ret) 1550 return ret; 1551 1552 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1553 mutex_unlock(&i915->drm.struct_mutex); 1554 if (ret) 1555 return ret; 1556 1557 obj_do_bit17_swizzling = 0; 1558 if (i915_gem_object_needs_bit17_swizzle(obj)) 1559 obj_do_bit17_swizzling = BIT(17); 1560 1561 /* If we don't overwrite a cacheline completely we need to be 1562 * careful to have up-to-date data by first clflushing. Don't 1563 * overcomplicate things and flush the entire patch. 1564 */ 1565 partial_cacheline_write = 0; 1566 if (needs_clflush & CLFLUSH_BEFORE) 1567 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1568 1569 user_data = u64_to_user_ptr(args->data_ptr); 1570 remain = args->size; 1571 offset = offset_in_page(args->offset); 1572 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1573 struct page *page = i915_gem_object_get_page(obj, idx); 1574 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1575 1576 ret = shmem_pwrite(page, offset, length, user_data, 1577 page_to_phys(page) & obj_do_bit17_swizzling, 1578 (offset | length) & partial_cacheline_write, 1579 needs_clflush & CLFLUSH_AFTER); 1580 if (ret) 1581 break; 1582 1583 remain -= length; 1584 user_data += length; 1585 offset = 0; 1586 } 1587 1588 intel_fb_obj_flush(obj, ORIGIN_CPU); 1589 i915_gem_obj_finish_shmem_access(obj); 1590 return ret; 1591 } 1592 1593 /** 1594 * Writes data to the object referenced by handle. 1595 * @dev: drm device 1596 * @data: ioctl data blob 1597 * @file: drm file 1598 * 1599 * On error, the contents of the buffer that were to be modified are undefined. 1600 */ 1601 int 1602 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1603 struct drm_file *file) 1604 { 1605 struct drm_i915_gem_pwrite *args = data; 1606 struct drm_i915_gem_object *obj; 1607 int ret; 1608 1609 if (args->size == 0) 1610 return 0; 1611 1612 if (!access_ok(VERIFY_READ, 1613 u64_to_user_ptr(args->data_ptr), 1614 args->size)) 1615 return -EFAULT; 1616 1617 obj = i915_gem_object_lookup(file, args->handle); 1618 if (!obj) 1619 return -ENOENT; 1620 1621 /* Bounds check destination. */ 1622 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1623 ret = -EINVAL; 1624 goto err; 1625 } 1626 1627 /* Writes not allowed into this read-only object */ 1628 if (i915_gem_object_is_readonly(obj)) { 1629 ret = -EINVAL; 1630 goto err; 1631 } 1632 1633 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1634 1635 ret = -ENODEV; 1636 if (obj->ops->pwrite) 1637 ret = obj->ops->pwrite(obj, args); 1638 if (ret != -ENODEV) 1639 goto err; 1640 1641 ret = i915_gem_object_wait(obj, 1642 I915_WAIT_INTERRUPTIBLE | 1643 I915_WAIT_ALL, 1644 MAX_SCHEDULE_TIMEOUT, 1645 to_rps_client(file)); 1646 if (ret) 1647 goto err; 1648 1649 ret = i915_gem_object_pin_pages(obj); 1650 if (ret) 1651 goto err; 1652 1653 ret = -EFAULT; 1654 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1655 * it would end up going through the fenced access, and we'll get 1656 * different detiling behavior between reading and writing. 1657 * pread/pwrite currently are reading and writing from the CPU 1658 * perspective, requiring manual detiling by the client. 1659 */ 1660 if (!i915_gem_object_has_struct_page(obj) || 1661 cpu_write_needs_clflush(obj)) 1662 /* Note that the gtt paths might fail with non-page-backed user 1663 * pointers (e.g. gtt mappings when moving data between 1664 * textures). Fallback to the shmem path in that case. 1665 */ 1666 ret = i915_gem_gtt_pwrite_fast(obj, args); 1667 1668 if (ret == -EFAULT || ret == -ENOSPC) { 1669 if (obj->phys_handle) 1670 ret = i915_gem_phys_pwrite(obj, args, file); 1671 else 1672 ret = i915_gem_shmem_pwrite(obj, args); 1673 } 1674 1675 i915_gem_object_unpin_pages(obj); 1676 err: 1677 i915_gem_object_put(obj); 1678 return ret; 1679 } 1680 1681 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1682 { 1683 struct drm_i915_private *i915; 1684 struct list_head *list; 1685 struct i915_vma *vma; 1686 1687 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1688 1689 for_each_ggtt_vma(vma, obj) { 1690 if (i915_vma_is_active(vma)) 1691 continue; 1692 1693 if (!drm_mm_node_allocated(&vma->node)) 1694 continue; 1695 1696 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1697 } 1698 1699 i915 = to_i915(obj->base.dev); 1700 spin_lock(&i915->mm.obj_lock); 1701 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1702 list_move_tail(&obj->mm.link, list); 1703 spin_unlock(&i915->mm.obj_lock); 1704 } 1705 1706 /** 1707 * Called when user space prepares to use an object with the CPU, either 1708 * through the mmap ioctl's mapping or a GTT mapping. 1709 * @dev: drm device 1710 * @data: ioctl data blob 1711 * @file: drm file 1712 */ 1713 int 1714 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1715 struct drm_file *file) 1716 { 1717 struct drm_i915_gem_set_domain *args = data; 1718 struct drm_i915_gem_object *obj; 1719 uint32_t read_domains = args->read_domains; 1720 uint32_t write_domain = args->write_domain; 1721 int err; 1722 1723 /* Only handle setting domains to types used by the CPU. */ 1724 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1725 return -EINVAL; 1726 1727 /* Having something in the write domain implies it's in the read 1728 * domain, and only that read domain. Enforce that in the request. 1729 */ 1730 if (write_domain != 0 && read_domains != write_domain) 1731 return -EINVAL; 1732 1733 obj = i915_gem_object_lookup(file, args->handle); 1734 if (!obj) 1735 return -ENOENT; 1736 1737 /* Try to flush the object off the GPU without holding the lock. 1738 * We will repeat the flush holding the lock in the normal manner 1739 * to catch cases where we are gazumped. 1740 */ 1741 err = i915_gem_object_wait(obj, 1742 I915_WAIT_INTERRUPTIBLE | 1743 I915_WAIT_PRIORITY | 1744 (write_domain ? I915_WAIT_ALL : 0), 1745 MAX_SCHEDULE_TIMEOUT, 1746 to_rps_client(file)); 1747 if (err) 1748 goto out; 1749 1750 /* 1751 * Proxy objects do not control access to the backing storage, ergo 1752 * they cannot be used as a means to manipulate the cache domain 1753 * tracking for that backing storage. The proxy object is always 1754 * considered to be outside of any cache domain. 1755 */ 1756 if (i915_gem_object_is_proxy(obj)) { 1757 err = -ENXIO; 1758 goto out; 1759 } 1760 1761 /* 1762 * Flush and acquire obj->pages so that we are coherent through 1763 * direct access in memory with previous cached writes through 1764 * shmemfs and that our cache domain tracking remains valid. 1765 * For example, if the obj->filp was moved to swap without us 1766 * being notified and releasing the pages, we would mistakenly 1767 * continue to assume that the obj remained out of the CPU cached 1768 * domain. 1769 */ 1770 err = i915_gem_object_pin_pages(obj); 1771 if (err) 1772 goto out; 1773 1774 err = i915_mutex_lock_interruptible(dev); 1775 if (err) 1776 goto out_unpin; 1777 1778 if (read_domains & I915_GEM_DOMAIN_WC) 1779 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1780 else if (read_domains & I915_GEM_DOMAIN_GTT) 1781 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1782 else 1783 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1784 1785 /* And bump the LRU for this access */ 1786 i915_gem_object_bump_inactive_ggtt(obj); 1787 1788 mutex_unlock(&dev->struct_mutex); 1789 1790 if (write_domain != 0) 1791 intel_fb_obj_invalidate(obj, 1792 fb_write_origin(obj, write_domain)); 1793 1794 out_unpin: 1795 i915_gem_object_unpin_pages(obj); 1796 out: 1797 i915_gem_object_put(obj); 1798 return err; 1799 } 1800 1801 /** 1802 * Called when user space has done writes to this buffer 1803 * @dev: drm device 1804 * @data: ioctl data blob 1805 * @file: drm file 1806 */ 1807 int 1808 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1809 struct drm_file *file) 1810 { 1811 struct drm_i915_gem_sw_finish *args = data; 1812 struct drm_i915_gem_object *obj; 1813 1814 obj = i915_gem_object_lookup(file, args->handle); 1815 if (!obj) 1816 return -ENOENT; 1817 1818 /* 1819 * Proxy objects are barred from CPU access, so there is no 1820 * need to ban sw_finish as it is a nop. 1821 */ 1822 1823 /* Pinned buffers may be scanout, so flush the cache */ 1824 i915_gem_object_flush_if_display(obj); 1825 i915_gem_object_put(obj); 1826 1827 return 0; 1828 } 1829 1830 /** 1831 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1832 * it is mapped to. 1833 * @dev: drm device 1834 * @data: ioctl data blob 1835 * @file: drm file 1836 * 1837 * While the mapping holds a reference on the contents of the object, it doesn't 1838 * imply a ref on the object itself. 1839 * 1840 * IMPORTANT: 1841 * 1842 * DRM driver writers who look a this function as an example for how to do GEM 1843 * mmap support, please don't implement mmap support like here. The modern way 1844 * to implement DRM mmap support is with an mmap offset ioctl (like 1845 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1846 * That way debug tooling like valgrind will understand what's going on, hiding 1847 * the mmap call in a driver private ioctl will break that. The i915 driver only 1848 * does cpu mmaps this way because we didn't know better. 1849 */ 1850 int 1851 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1852 struct drm_file *file) 1853 { 1854 struct drm_i915_gem_mmap *args = data; 1855 struct drm_i915_gem_object *obj; 1856 unsigned long addr; 1857 1858 if (args->flags & ~(I915_MMAP_WC)) 1859 return -EINVAL; 1860 1861 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1862 return -ENODEV; 1863 1864 obj = i915_gem_object_lookup(file, args->handle); 1865 if (!obj) 1866 return -ENOENT; 1867 1868 /* prime objects have no backing filp to GEM mmap 1869 * pages from. 1870 */ 1871 if (!obj->base.filp) { 1872 i915_gem_object_put(obj); 1873 return -ENXIO; 1874 } 1875 1876 addr = vm_mmap(obj->base.filp, 0, args->size, 1877 PROT_READ | PROT_WRITE, MAP_SHARED, 1878 args->offset); 1879 if (args->flags & I915_MMAP_WC) { 1880 struct mm_struct *mm = current->mm; 1881 struct vm_area_struct *vma; 1882 1883 if (down_write_killable(&mm->mmap_sem)) { 1884 i915_gem_object_put(obj); 1885 return -EINTR; 1886 } 1887 vma = find_vma(mm, addr); 1888 if (vma) 1889 vma->vm_page_prot = 1890 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1891 else 1892 addr = -ENOMEM; 1893 up_write(&mm->mmap_sem); 1894 1895 /* This may race, but that's ok, it only gets set */ 1896 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1897 } 1898 i915_gem_object_put(obj); 1899 if (IS_ERR((void *)addr)) 1900 return addr; 1901 1902 args->addr_ptr = (uint64_t) addr; 1903 1904 return 0; 1905 } 1906 1907 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1908 { 1909 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1910 } 1911 1912 /** 1913 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1914 * 1915 * A history of the GTT mmap interface: 1916 * 1917 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1918 * aligned and suitable for fencing, and still fit into the available 1919 * mappable space left by the pinned display objects. A classic problem 1920 * we called the page-fault-of-doom where we would ping-pong between 1921 * two objects that could not fit inside the GTT and so the memcpy 1922 * would page one object in at the expense of the other between every 1923 * single byte. 1924 * 1925 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1926 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1927 * object is too large for the available space (or simply too large 1928 * for the mappable aperture!), a view is created instead and faulted 1929 * into userspace. (This view is aligned and sized appropriately for 1930 * fenced access.) 1931 * 1932 * 2 - Recognise WC as a separate cache domain so that we can flush the 1933 * delayed writes via GTT before performing direct access via WC. 1934 * 1935 * Restrictions: 1936 * 1937 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1938 * hangs on some architectures, corruption on others. An attempt to service 1939 * a GTT page fault from a snoopable object will generate a SIGBUS. 1940 * 1941 * * the object must be able to fit into RAM (physical memory, though no 1942 * limited to the mappable aperture). 1943 * 1944 * 1945 * Caveats: 1946 * 1947 * * a new GTT page fault will synchronize rendering from the GPU and flush 1948 * all data to system memory. Subsequent access will not be synchronized. 1949 * 1950 * * all mappings are revoked on runtime device suspend. 1951 * 1952 * * there are only 8, 16 or 32 fence registers to share between all users 1953 * (older machines require fence register for display and blitter access 1954 * as well). Contention of the fence registers will cause the previous users 1955 * to be unmapped and any new access will generate new page faults. 1956 * 1957 * * running out of memory while servicing a fault may generate a SIGBUS, 1958 * rather than the expected SIGSEGV. 1959 */ 1960 int i915_gem_mmap_gtt_version(void) 1961 { 1962 return 2; 1963 } 1964 1965 static inline struct i915_ggtt_view 1966 compute_partial_view(const struct drm_i915_gem_object *obj, 1967 pgoff_t page_offset, 1968 unsigned int chunk) 1969 { 1970 struct i915_ggtt_view view; 1971 1972 if (i915_gem_object_is_tiled(obj)) 1973 chunk = roundup(chunk, tile_row_pages(obj)); 1974 1975 view.type = I915_GGTT_VIEW_PARTIAL; 1976 view.partial.offset = rounddown(page_offset, chunk); 1977 view.partial.size = 1978 min_t(unsigned int, chunk, 1979 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1980 1981 /* If the partial covers the entire object, just create a normal VMA. */ 1982 if (chunk >= obj->base.size >> PAGE_SHIFT) 1983 view.type = I915_GGTT_VIEW_NORMAL; 1984 1985 return view; 1986 } 1987 1988 /** 1989 * i915_gem_fault - fault a page into the GTT 1990 * @vmf: fault info 1991 * 1992 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1993 * from userspace. The fault handler takes care of binding the object to 1994 * the GTT (if needed), allocating and programming a fence register (again, 1995 * only if needed based on whether the old reg is still valid or the object 1996 * is tiled) and inserting a new PTE into the faulting process. 1997 * 1998 * Note that the faulting process may involve evicting existing objects 1999 * from the GTT and/or fence registers to make room. So performance may 2000 * suffer if the GTT working set is large or there are few fence registers 2001 * left. 2002 * 2003 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2004 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2005 */ 2006 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2007 { 2008 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2009 struct vm_area_struct *area = vmf->vma; 2010 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2011 struct drm_device *dev = obj->base.dev; 2012 struct drm_i915_private *dev_priv = to_i915(dev); 2013 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2014 bool write = area->vm_flags & VM_WRITE; 2015 struct i915_vma *vma; 2016 pgoff_t page_offset; 2017 int ret; 2018 2019 /* Sanity check that we allow writing into this object */ 2020 if (i915_gem_object_is_readonly(obj) && write) 2021 return VM_FAULT_SIGBUS; 2022 2023 /* We don't use vmf->pgoff since that has the fake offset */ 2024 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2025 2026 trace_i915_gem_object_fault(obj, page_offset, true, write); 2027 2028 /* Try to flush the object off the GPU first without holding the lock. 2029 * Upon acquiring the lock, we will perform our sanity checks and then 2030 * repeat the flush holding the lock in the normal manner to catch cases 2031 * where we are gazumped. 2032 */ 2033 ret = i915_gem_object_wait(obj, 2034 I915_WAIT_INTERRUPTIBLE, 2035 MAX_SCHEDULE_TIMEOUT, 2036 NULL); 2037 if (ret) 2038 goto err; 2039 2040 ret = i915_gem_object_pin_pages(obj); 2041 if (ret) 2042 goto err; 2043 2044 intel_runtime_pm_get(dev_priv); 2045 2046 ret = i915_mutex_lock_interruptible(dev); 2047 if (ret) 2048 goto err_rpm; 2049 2050 /* Access to snoopable pages through the GTT is incoherent. */ 2051 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2052 ret = -EFAULT; 2053 goto err_unlock; 2054 } 2055 2056 2057 /* Now pin it into the GTT as needed */ 2058 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2059 PIN_MAPPABLE | 2060 PIN_NONBLOCK | 2061 PIN_NONFAULT); 2062 if (IS_ERR(vma)) { 2063 /* Use a partial view if it is bigger than available space */ 2064 struct i915_ggtt_view view = 2065 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2066 unsigned int flags; 2067 2068 flags = PIN_MAPPABLE; 2069 if (view.type == I915_GGTT_VIEW_NORMAL) 2070 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2071 2072 /* 2073 * Userspace is now writing through an untracked VMA, abandon 2074 * all hope that the hardware is able to track future writes. 2075 */ 2076 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2077 2078 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2079 if (IS_ERR(vma) && !view.type) { 2080 flags = PIN_MAPPABLE; 2081 view.type = I915_GGTT_VIEW_PARTIAL; 2082 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2083 } 2084 } 2085 if (IS_ERR(vma)) { 2086 ret = PTR_ERR(vma); 2087 goto err_unlock; 2088 } 2089 2090 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2091 if (ret) 2092 goto err_unpin; 2093 2094 ret = i915_vma_pin_fence(vma); 2095 if (ret) 2096 goto err_unpin; 2097 2098 /* Finally, remap it using the new GTT offset */ 2099 ret = remap_io_mapping(area, 2100 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2101 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2102 min_t(u64, vma->size, area->vm_end - area->vm_start), 2103 &ggtt->iomap); 2104 if (ret) 2105 goto err_fence; 2106 2107 /* Mark as being mmapped into userspace for later revocation */ 2108 assert_rpm_wakelock_held(dev_priv); 2109 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2110 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2111 GEM_BUG_ON(!obj->userfault_count); 2112 2113 i915_vma_set_ggtt_write(vma); 2114 2115 err_fence: 2116 i915_vma_unpin_fence(vma); 2117 err_unpin: 2118 __i915_vma_unpin(vma); 2119 err_unlock: 2120 mutex_unlock(&dev->struct_mutex); 2121 err_rpm: 2122 intel_runtime_pm_put(dev_priv); 2123 i915_gem_object_unpin_pages(obj); 2124 err: 2125 switch (ret) { 2126 case -EIO: 2127 /* 2128 * We eat errors when the gpu is terminally wedged to avoid 2129 * userspace unduly crashing (gl has no provisions for mmaps to 2130 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2131 * and so needs to be reported. 2132 */ 2133 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2134 return VM_FAULT_SIGBUS; 2135 /* else: fall through */ 2136 case -EAGAIN: 2137 /* 2138 * EAGAIN means the gpu is hung and we'll wait for the error 2139 * handler to reset everything when re-faulting in 2140 * i915_mutex_lock_interruptible. 2141 */ 2142 case 0: 2143 case -ERESTARTSYS: 2144 case -EINTR: 2145 case -EBUSY: 2146 /* 2147 * EBUSY is ok: this just means that another thread 2148 * already did the job. 2149 */ 2150 return VM_FAULT_NOPAGE; 2151 case -ENOMEM: 2152 return VM_FAULT_OOM; 2153 case -ENOSPC: 2154 case -EFAULT: 2155 return VM_FAULT_SIGBUS; 2156 default: 2157 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2158 return VM_FAULT_SIGBUS; 2159 } 2160 } 2161 2162 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2163 { 2164 struct i915_vma *vma; 2165 2166 GEM_BUG_ON(!obj->userfault_count); 2167 2168 obj->userfault_count = 0; 2169 list_del(&obj->userfault_link); 2170 drm_vma_node_unmap(&obj->base.vma_node, 2171 obj->base.dev->anon_inode->i_mapping); 2172 2173 for_each_ggtt_vma(vma, obj) 2174 i915_vma_unset_userfault(vma); 2175 } 2176 2177 /** 2178 * i915_gem_release_mmap - remove physical page mappings 2179 * @obj: obj in question 2180 * 2181 * Preserve the reservation of the mmapping with the DRM core code, but 2182 * relinquish ownership of the pages back to the system. 2183 * 2184 * It is vital that we remove the page mapping if we have mapped a tiled 2185 * object through the GTT and then lose the fence register due to 2186 * resource pressure. Similarly if the object has been moved out of the 2187 * aperture, than pages mapped into userspace must be revoked. Removing the 2188 * mapping will then trigger a page fault on the next user access, allowing 2189 * fixup by i915_gem_fault(). 2190 */ 2191 void 2192 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2193 { 2194 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2195 2196 /* Serialisation between user GTT access and our code depends upon 2197 * revoking the CPU's PTE whilst the mutex is held. The next user 2198 * pagefault then has to wait until we release the mutex. 2199 * 2200 * Note that RPM complicates somewhat by adding an additional 2201 * requirement that operations to the GGTT be made holding the RPM 2202 * wakeref. 2203 */ 2204 lockdep_assert_held(&i915->drm.struct_mutex); 2205 intel_runtime_pm_get(i915); 2206 2207 if (!obj->userfault_count) 2208 goto out; 2209 2210 __i915_gem_object_release_mmap(obj); 2211 2212 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2213 * memory transactions from userspace before we return. The TLB 2214 * flushing implied above by changing the PTE above *should* be 2215 * sufficient, an extra barrier here just provides us with a bit 2216 * of paranoid documentation about our requirement to serialise 2217 * memory writes before touching registers / GSM. 2218 */ 2219 wmb(); 2220 2221 out: 2222 intel_runtime_pm_put(i915); 2223 } 2224 2225 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2226 { 2227 struct drm_i915_gem_object *obj, *on; 2228 int i; 2229 2230 /* 2231 * Only called during RPM suspend. All users of the userfault_list 2232 * must be holding an RPM wakeref to ensure that this can not 2233 * run concurrently with themselves (and use the struct_mutex for 2234 * protection between themselves). 2235 */ 2236 2237 list_for_each_entry_safe(obj, on, 2238 &dev_priv->mm.userfault_list, userfault_link) 2239 __i915_gem_object_release_mmap(obj); 2240 2241 /* The fence will be lost when the device powers down. If any were 2242 * in use by hardware (i.e. they are pinned), we should not be powering 2243 * down! All other fences will be reacquired by the user upon waking. 2244 */ 2245 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2246 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2247 2248 /* Ideally we want to assert that the fence register is not 2249 * live at this point (i.e. that no piece of code will be 2250 * trying to write through fence + GTT, as that both violates 2251 * our tracking of activity and associated locking/barriers, 2252 * but also is illegal given that the hw is powered down). 2253 * 2254 * Previously we used reg->pin_count as a "liveness" indicator. 2255 * That is not sufficient, and we need a more fine-grained 2256 * tool if we want to have a sanity check here. 2257 */ 2258 2259 if (!reg->vma) 2260 continue; 2261 2262 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2263 reg->dirty = true; 2264 } 2265 } 2266 2267 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2268 { 2269 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2270 int err; 2271 2272 err = drm_gem_create_mmap_offset(&obj->base); 2273 if (likely(!err)) 2274 return 0; 2275 2276 /* Attempt to reap some mmap space from dead objects */ 2277 do { 2278 err = i915_gem_wait_for_idle(dev_priv, 2279 I915_WAIT_INTERRUPTIBLE, 2280 MAX_SCHEDULE_TIMEOUT); 2281 if (err) 2282 break; 2283 2284 i915_gem_drain_freed_objects(dev_priv); 2285 err = drm_gem_create_mmap_offset(&obj->base); 2286 if (!err) 2287 break; 2288 2289 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2290 2291 return err; 2292 } 2293 2294 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2295 { 2296 drm_gem_free_mmap_offset(&obj->base); 2297 } 2298 2299 int 2300 i915_gem_mmap_gtt(struct drm_file *file, 2301 struct drm_device *dev, 2302 uint32_t handle, 2303 uint64_t *offset) 2304 { 2305 struct drm_i915_gem_object *obj; 2306 int ret; 2307 2308 obj = i915_gem_object_lookup(file, handle); 2309 if (!obj) 2310 return -ENOENT; 2311 2312 ret = i915_gem_object_create_mmap_offset(obj); 2313 if (ret == 0) 2314 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2315 2316 i915_gem_object_put(obj); 2317 return ret; 2318 } 2319 2320 /** 2321 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2322 * @dev: DRM device 2323 * @data: GTT mapping ioctl data 2324 * @file: GEM object info 2325 * 2326 * Simply returns the fake offset to userspace so it can mmap it. 2327 * The mmap call will end up in drm_gem_mmap(), which will set things 2328 * up so we can get faults in the handler above. 2329 * 2330 * The fault handler will take care of binding the object into the GTT 2331 * (since it may have been evicted to make room for something), allocating 2332 * a fence register, and mapping the appropriate aperture address into 2333 * userspace. 2334 */ 2335 int 2336 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2337 struct drm_file *file) 2338 { 2339 struct drm_i915_gem_mmap_gtt *args = data; 2340 2341 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2342 } 2343 2344 /* Immediately discard the backing storage */ 2345 static void 2346 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2347 { 2348 i915_gem_object_free_mmap_offset(obj); 2349 2350 if (obj->base.filp == NULL) 2351 return; 2352 2353 /* Our goal here is to return as much of the memory as 2354 * is possible back to the system as we are called from OOM. 2355 * To do this we must instruct the shmfs to drop all of its 2356 * backing pages, *now*. 2357 */ 2358 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2359 obj->mm.madv = __I915_MADV_PURGED; 2360 obj->mm.pages = ERR_PTR(-EFAULT); 2361 } 2362 2363 /* Try to discard unwanted pages */ 2364 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2365 { 2366 struct address_space *mapping; 2367 2368 lockdep_assert_held(&obj->mm.lock); 2369 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2370 2371 switch (obj->mm.madv) { 2372 case I915_MADV_DONTNEED: 2373 i915_gem_object_truncate(obj); 2374 case __I915_MADV_PURGED: 2375 return; 2376 } 2377 2378 if (obj->base.filp == NULL) 2379 return; 2380 2381 mapping = obj->base.filp->f_mapping, 2382 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2383 } 2384 2385 static void 2386 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2387 struct sg_table *pages) 2388 { 2389 struct sgt_iter sgt_iter; 2390 struct page *page; 2391 2392 __i915_gem_object_release_shmem(obj, pages, true); 2393 2394 i915_gem_gtt_finish_pages(obj, pages); 2395 2396 if (i915_gem_object_needs_bit17_swizzle(obj)) 2397 i915_gem_object_save_bit_17_swizzle(obj, pages); 2398 2399 for_each_sgt_page(page, sgt_iter, pages) { 2400 if (obj->mm.dirty) 2401 set_page_dirty(page); 2402 2403 if (obj->mm.madv == I915_MADV_WILLNEED) 2404 mark_page_accessed(page); 2405 2406 put_page(page); 2407 } 2408 obj->mm.dirty = false; 2409 2410 sg_free_table(pages); 2411 kfree(pages); 2412 } 2413 2414 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2415 { 2416 struct radix_tree_iter iter; 2417 void __rcu **slot; 2418 2419 rcu_read_lock(); 2420 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2421 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2422 rcu_read_unlock(); 2423 } 2424 2425 static struct sg_table * 2426 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2427 { 2428 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2429 struct sg_table *pages; 2430 2431 pages = fetch_and_zero(&obj->mm.pages); 2432 if (!pages) 2433 return NULL; 2434 2435 spin_lock(&i915->mm.obj_lock); 2436 list_del(&obj->mm.link); 2437 spin_unlock(&i915->mm.obj_lock); 2438 2439 if (obj->mm.mapping) { 2440 void *ptr; 2441 2442 ptr = page_mask_bits(obj->mm.mapping); 2443 if (is_vmalloc_addr(ptr)) 2444 vunmap(ptr); 2445 else 2446 kunmap(kmap_to_page(ptr)); 2447 2448 obj->mm.mapping = NULL; 2449 } 2450 2451 __i915_gem_object_reset_page_iter(obj); 2452 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2453 2454 return pages; 2455 } 2456 2457 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2458 enum i915_mm_subclass subclass) 2459 { 2460 struct sg_table *pages; 2461 2462 if (i915_gem_object_has_pinned_pages(obj)) 2463 return; 2464 2465 GEM_BUG_ON(obj->bind_count); 2466 if (!i915_gem_object_has_pages(obj)) 2467 return; 2468 2469 /* May be called by shrinker from within get_pages() (on another bo) */ 2470 mutex_lock_nested(&obj->mm.lock, subclass); 2471 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2472 goto unlock; 2473 2474 /* 2475 * ->put_pages might need to allocate memory for the bit17 swizzle 2476 * array, hence protect them from being reaped by removing them from gtt 2477 * lists early. 2478 */ 2479 pages = __i915_gem_object_unset_pages(obj); 2480 if (!IS_ERR(pages)) 2481 obj->ops->put_pages(obj, pages); 2482 2483 unlock: 2484 mutex_unlock(&obj->mm.lock); 2485 } 2486 2487 bool i915_sg_trim(struct sg_table *orig_st) 2488 { 2489 struct sg_table new_st; 2490 struct scatterlist *sg, *new_sg; 2491 unsigned int i; 2492 2493 if (orig_st->nents == orig_st->orig_nents) 2494 return false; 2495 2496 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2497 return false; 2498 2499 new_sg = new_st.sgl; 2500 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2501 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2502 sg_dma_address(new_sg) = sg_dma_address(sg); 2503 sg_dma_len(new_sg) = sg_dma_len(sg); 2504 2505 new_sg = sg_next(new_sg); 2506 } 2507 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2508 2509 sg_free_table(orig_st); 2510 2511 *orig_st = new_st; 2512 return true; 2513 } 2514 2515 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2516 { 2517 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2518 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2519 unsigned long i; 2520 struct address_space *mapping; 2521 struct sg_table *st; 2522 struct scatterlist *sg; 2523 struct sgt_iter sgt_iter; 2524 struct page *page; 2525 unsigned long last_pfn = 0; /* suppress gcc warning */ 2526 unsigned int max_segment = i915_sg_segment_size(); 2527 unsigned int sg_page_sizes; 2528 gfp_t noreclaim; 2529 int ret; 2530 2531 /* 2532 * Assert that the object is not currently in any GPU domain. As it 2533 * wasn't in the GTT, there shouldn't be any way it could have been in 2534 * a GPU cache 2535 */ 2536 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2537 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2538 2539 /* 2540 * If there's no chance of allocating enough pages for the whole 2541 * object, bail early. 2542 */ 2543 if (page_count > totalram_pages) 2544 return -ENOMEM; 2545 2546 st = kmalloc(sizeof(*st), GFP_KERNEL); 2547 if (st == NULL) 2548 return -ENOMEM; 2549 2550 rebuild_st: 2551 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2552 kfree(st); 2553 return -ENOMEM; 2554 } 2555 2556 /* 2557 * Get the list of pages out of our struct file. They'll be pinned 2558 * at this point until we release them. 2559 * 2560 * Fail silently without starting the shrinker 2561 */ 2562 mapping = obj->base.filp->f_mapping; 2563 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2564 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2565 2566 sg = st->sgl; 2567 st->nents = 0; 2568 sg_page_sizes = 0; 2569 for (i = 0; i < page_count; i++) { 2570 const unsigned int shrink[] = { 2571 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2572 0, 2573 }, *s = shrink; 2574 gfp_t gfp = noreclaim; 2575 2576 do { 2577 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2578 if (likely(!IS_ERR(page))) 2579 break; 2580 2581 if (!*s) { 2582 ret = PTR_ERR(page); 2583 goto err_sg; 2584 } 2585 2586 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2587 cond_resched(); 2588 2589 /* 2590 * We've tried hard to allocate the memory by reaping 2591 * our own buffer, now let the real VM do its job and 2592 * go down in flames if truly OOM. 2593 * 2594 * However, since graphics tend to be disposable, 2595 * defer the oom here by reporting the ENOMEM back 2596 * to userspace. 2597 */ 2598 if (!*s) { 2599 /* reclaim and warn, but no oom */ 2600 gfp = mapping_gfp_mask(mapping); 2601 2602 /* 2603 * Our bo are always dirty and so we require 2604 * kswapd to reclaim our pages (direct reclaim 2605 * does not effectively begin pageout of our 2606 * buffers on its own). However, direct reclaim 2607 * only waits for kswapd when under allocation 2608 * congestion. So as a result __GFP_RECLAIM is 2609 * unreliable and fails to actually reclaim our 2610 * dirty pages -- unless you try over and over 2611 * again with !__GFP_NORETRY. However, we still 2612 * want to fail this allocation rather than 2613 * trigger the out-of-memory killer and for 2614 * this we want __GFP_RETRY_MAYFAIL. 2615 */ 2616 gfp |= __GFP_RETRY_MAYFAIL; 2617 } 2618 } while (1); 2619 2620 if (!i || 2621 sg->length >= max_segment || 2622 page_to_pfn(page) != last_pfn + 1) { 2623 if (i) { 2624 sg_page_sizes |= sg->length; 2625 sg = sg_next(sg); 2626 } 2627 st->nents++; 2628 sg_set_page(sg, page, PAGE_SIZE, 0); 2629 } else { 2630 sg->length += PAGE_SIZE; 2631 } 2632 last_pfn = page_to_pfn(page); 2633 2634 /* Check that the i965g/gm workaround works. */ 2635 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2636 } 2637 if (sg) { /* loop terminated early; short sg table */ 2638 sg_page_sizes |= sg->length; 2639 sg_mark_end(sg); 2640 } 2641 2642 /* Trim unused sg entries to avoid wasting memory. */ 2643 i915_sg_trim(st); 2644 2645 ret = i915_gem_gtt_prepare_pages(obj, st); 2646 if (ret) { 2647 /* 2648 * DMA remapping failed? One possible cause is that 2649 * it could not reserve enough large entries, asking 2650 * for PAGE_SIZE chunks instead may be helpful. 2651 */ 2652 if (max_segment > PAGE_SIZE) { 2653 for_each_sgt_page(page, sgt_iter, st) 2654 put_page(page); 2655 sg_free_table(st); 2656 2657 max_segment = PAGE_SIZE; 2658 goto rebuild_st; 2659 } else { 2660 dev_warn(&dev_priv->drm.pdev->dev, 2661 "Failed to DMA remap %lu pages\n", 2662 page_count); 2663 goto err_pages; 2664 } 2665 } 2666 2667 if (i915_gem_object_needs_bit17_swizzle(obj)) 2668 i915_gem_object_do_bit_17_swizzle(obj, st); 2669 2670 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2671 2672 return 0; 2673 2674 err_sg: 2675 sg_mark_end(sg); 2676 err_pages: 2677 for_each_sgt_page(page, sgt_iter, st) 2678 put_page(page); 2679 sg_free_table(st); 2680 kfree(st); 2681 2682 /* 2683 * shmemfs first checks if there is enough memory to allocate the page 2684 * and reports ENOSPC should there be insufficient, along with the usual 2685 * ENOMEM for a genuine allocation failure. 2686 * 2687 * We use ENOSPC in our driver to mean that we have run out of aperture 2688 * space and so want to translate the error from shmemfs back to our 2689 * usual understanding of ENOMEM. 2690 */ 2691 if (ret == -ENOSPC) 2692 ret = -ENOMEM; 2693 2694 return ret; 2695 } 2696 2697 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2698 struct sg_table *pages, 2699 unsigned int sg_page_sizes) 2700 { 2701 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2702 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2703 int i; 2704 2705 lockdep_assert_held(&obj->mm.lock); 2706 2707 obj->mm.get_page.sg_pos = pages->sgl; 2708 obj->mm.get_page.sg_idx = 0; 2709 2710 obj->mm.pages = pages; 2711 2712 if (i915_gem_object_is_tiled(obj) && 2713 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2714 GEM_BUG_ON(obj->mm.quirked); 2715 __i915_gem_object_pin_pages(obj); 2716 obj->mm.quirked = true; 2717 } 2718 2719 GEM_BUG_ON(!sg_page_sizes); 2720 obj->mm.page_sizes.phys = sg_page_sizes; 2721 2722 /* 2723 * Calculate the supported page-sizes which fit into the given 2724 * sg_page_sizes. This will give us the page-sizes which we may be able 2725 * to use opportunistically when later inserting into the GTT. For 2726 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2727 * 64K or 4K pages, although in practice this will depend on a number of 2728 * other factors. 2729 */ 2730 obj->mm.page_sizes.sg = 0; 2731 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2732 if (obj->mm.page_sizes.phys & ~0u << i) 2733 obj->mm.page_sizes.sg |= BIT(i); 2734 } 2735 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2736 2737 spin_lock(&i915->mm.obj_lock); 2738 list_add(&obj->mm.link, &i915->mm.unbound_list); 2739 spin_unlock(&i915->mm.obj_lock); 2740 } 2741 2742 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2743 { 2744 int err; 2745 2746 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2747 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2748 return -EFAULT; 2749 } 2750 2751 err = obj->ops->get_pages(obj); 2752 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2753 2754 return err; 2755 } 2756 2757 /* Ensure that the associated pages are gathered from the backing storage 2758 * and pinned into our object. i915_gem_object_pin_pages() may be called 2759 * multiple times before they are released by a single call to 2760 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2761 * either as a result of memory pressure (reaping pages under the shrinker) 2762 * or as the object is itself released. 2763 */ 2764 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2765 { 2766 int err; 2767 2768 err = mutex_lock_interruptible(&obj->mm.lock); 2769 if (err) 2770 return err; 2771 2772 if (unlikely(!i915_gem_object_has_pages(obj))) { 2773 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2774 2775 err = ____i915_gem_object_get_pages(obj); 2776 if (err) 2777 goto unlock; 2778 2779 smp_mb__before_atomic(); 2780 } 2781 atomic_inc(&obj->mm.pages_pin_count); 2782 2783 unlock: 2784 mutex_unlock(&obj->mm.lock); 2785 return err; 2786 } 2787 2788 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2789 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2790 enum i915_map_type type) 2791 { 2792 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2793 struct sg_table *sgt = obj->mm.pages; 2794 struct sgt_iter sgt_iter; 2795 struct page *page; 2796 struct page *stack_pages[32]; 2797 struct page **pages = stack_pages; 2798 unsigned long i = 0; 2799 pgprot_t pgprot; 2800 void *addr; 2801 2802 /* A single page can always be kmapped */ 2803 if (n_pages == 1 && type == I915_MAP_WB) 2804 return kmap(sg_page(sgt->sgl)); 2805 2806 if (n_pages > ARRAY_SIZE(stack_pages)) { 2807 /* Too big for stack -- allocate temporary array instead */ 2808 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2809 if (!pages) 2810 return NULL; 2811 } 2812 2813 for_each_sgt_page(page, sgt_iter, sgt) 2814 pages[i++] = page; 2815 2816 /* Check that we have the expected number of pages */ 2817 GEM_BUG_ON(i != n_pages); 2818 2819 switch (type) { 2820 default: 2821 MISSING_CASE(type); 2822 /* fallthrough to use PAGE_KERNEL anyway */ 2823 case I915_MAP_WB: 2824 pgprot = PAGE_KERNEL; 2825 break; 2826 case I915_MAP_WC: 2827 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2828 break; 2829 } 2830 addr = vmap(pages, n_pages, 0, pgprot); 2831 2832 if (pages != stack_pages) 2833 kvfree(pages); 2834 2835 return addr; 2836 } 2837 2838 /* get, pin, and map the pages of the object into kernel space */ 2839 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2840 enum i915_map_type type) 2841 { 2842 enum i915_map_type has_type; 2843 bool pinned; 2844 void *ptr; 2845 int ret; 2846 2847 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2848 return ERR_PTR(-ENXIO); 2849 2850 ret = mutex_lock_interruptible(&obj->mm.lock); 2851 if (ret) 2852 return ERR_PTR(ret); 2853 2854 pinned = !(type & I915_MAP_OVERRIDE); 2855 type &= ~I915_MAP_OVERRIDE; 2856 2857 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2858 if (unlikely(!i915_gem_object_has_pages(obj))) { 2859 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2860 2861 ret = ____i915_gem_object_get_pages(obj); 2862 if (ret) 2863 goto err_unlock; 2864 2865 smp_mb__before_atomic(); 2866 } 2867 atomic_inc(&obj->mm.pages_pin_count); 2868 pinned = false; 2869 } 2870 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2871 2872 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2873 if (ptr && has_type != type) { 2874 if (pinned) { 2875 ret = -EBUSY; 2876 goto err_unpin; 2877 } 2878 2879 if (is_vmalloc_addr(ptr)) 2880 vunmap(ptr); 2881 else 2882 kunmap(kmap_to_page(ptr)); 2883 2884 ptr = obj->mm.mapping = NULL; 2885 } 2886 2887 if (!ptr) { 2888 ptr = i915_gem_object_map(obj, type); 2889 if (!ptr) { 2890 ret = -ENOMEM; 2891 goto err_unpin; 2892 } 2893 2894 obj->mm.mapping = page_pack_bits(ptr, type); 2895 } 2896 2897 out_unlock: 2898 mutex_unlock(&obj->mm.lock); 2899 return ptr; 2900 2901 err_unpin: 2902 atomic_dec(&obj->mm.pages_pin_count); 2903 err_unlock: 2904 ptr = ERR_PTR(ret); 2905 goto out_unlock; 2906 } 2907 2908 static int 2909 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2910 const struct drm_i915_gem_pwrite *arg) 2911 { 2912 struct address_space *mapping = obj->base.filp->f_mapping; 2913 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2914 u64 remain, offset; 2915 unsigned int pg; 2916 2917 /* Before we instantiate/pin the backing store for our use, we 2918 * can prepopulate the shmemfs filp efficiently using a write into 2919 * the pagecache. We avoid the penalty of instantiating all the 2920 * pages, important if the user is just writing to a few and never 2921 * uses the object on the GPU, and using a direct write into shmemfs 2922 * allows it to avoid the cost of retrieving a page (either swapin 2923 * or clearing-before-use) before it is overwritten. 2924 */ 2925 if (i915_gem_object_has_pages(obj)) 2926 return -ENODEV; 2927 2928 if (obj->mm.madv != I915_MADV_WILLNEED) 2929 return -EFAULT; 2930 2931 /* Before the pages are instantiated the object is treated as being 2932 * in the CPU domain. The pages will be clflushed as required before 2933 * use, and we can freely write into the pages directly. If userspace 2934 * races pwrite with any other operation; corruption will ensue - 2935 * that is userspace's prerogative! 2936 */ 2937 2938 remain = arg->size; 2939 offset = arg->offset; 2940 pg = offset_in_page(offset); 2941 2942 do { 2943 unsigned int len, unwritten; 2944 struct page *page; 2945 void *data, *vaddr; 2946 int err; 2947 2948 len = PAGE_SIZE - pg; 2949 if (len > remain) 2950 len = remain; 2951 2952 err = pagecache_write_begin(obj->base.filp, mapping, 2953 offset, len, 0, 2954 &page, &data); 2955 if (err < 0) 2956 return err; 2957 2958 vaddr = kmap(page); 2959 unwritten = copy_from_user(vaddr + pg, user_data, len); 2960 kunmap(page); 2961 2962 err = pagecache_write_end(obj->base.filp, mapping, 2963 offset, len, len - unwritten, 2964 page, data); 2965 if (err < 0) 2966 return err; 2967 2968 if (unwritten) 2969 return -EFAULT; 2970 2971 remain -= len; 2972 user_data += len; 2973 offset += len; 2974 pg = 0; 2975 } while (remain); 2976 2977 return 0; 2978 } 2979 2980 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 2981 const struct i915_gem_context *ctx) 2982 { 2983 unsigned int score; 2984 unsigned long prev_hang; 2985 2986 if (i915_gem_context_is_banned(ctx)) 2987 score = I915_CLIENT_SCORE_CONTEXT_BAN; 2988 else 2989 score = 0; 2990 2991 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 2992 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 2993 score += I915_CLIENT_SCORE_HANG_FAST; 2994 2995 if (score) { 2996 atomic_add(score, &file_priv->ban_score); 2997 2998 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 2999 ctx->name, score, 3000 atomic_read(&file_priv->ban_score)); 3001 } 3002 } 3003 3004 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3005 { 3006 unsigned int score; 3007 bool banned, bannable; 3008 3009 atomic_inc(&ctx->guilty_count); 3010 3011 bannable = i915_gem_context_is_bannable(ctx); 3012 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3013 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3014 3015 /* Cool contexts don't accumulate client ban score */ 3016 if (!bannable) 3017 return; 3018 3019 if (banned) { 3020 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3021 ctx->name, atomic_read(&ctx->guilty_count), 3022 score); 3023 i915_gem_context_set_banned(ctx); 3024 } 3025 3026 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3027 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3028 } 3029 3030 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3031 { 3032 atomic_inc(&ctx->active_count); 3033 } 3034 3035 struct i915_request * 3036 i915_gem_find_active_request(struct intel_engine_cs *engine) 3037 { 3038 struct i915_request *request, *active = NULL; 3039 unsigned long flags; 3040 3041 /* 3042 * We are called by the error capture, reset and to dump engine 3043 * state at random points in time. In particular, note that neither is 3044 * crucially ordered with an interrupt. After a hang, the GPU is dead 3045 * and we assume that no more writes can happen (we waited long enough 3046 * for all writes that were in transaction to be flushed) - adding an 3047 * extra delay for a recent interrupt is pointless. Hence, we do 3048 * not need an engine->irq_seqno_barrier() before the seqno reads. 3049 * At all other times, we must assume the GPU is still running, but 3050 * we only care about the snapshot of this moment. 3051 */ 3052 spin_lock_irqsave(&engine->timeline.lock, flags); 3053 list_for_each_entry(request, &engine->timeline.requests, link) { 3054 if (__i915_request_completed(request, request->global_seqno)) 3055 continue; 3056 3057 active = request; 3058 break; 3059 } 3060 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3061 3062 return active; 3063 } 3064 3065 /* 3066 * Ensure irq handler finishes, and not run again. 3067 * Also return the active request so that we only search for it once. 3068 */ 3069 struct i915_request * 3070 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3071 { 3072 struct i915_request *request; 3073 3074 /* 3075 * During the reset sequence, we must prevent the engine from 3076 * entering RC6. As the context state is undefined until we restart 3077 * the engine, if it does enter RC6 during the reset, the state 3078 * written to the powercontext is undefined and so we may lose 3079 * GPU state upon resume, i.e. fail to restart after a reset. 3080 */ 3081 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3082 3083 request = engine->reset.prepare(engine); 3084 if (request && request->fence.error == -EIO) 3085 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3086 3087 return request; 3088 } 3089 3090 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3091 { 3092 struct intel_engine_cs *engine; 3093 struct i915_request *request; 3094 enum intel_engine_id id; 3095 int err = 0; 3096 3097 for_each_engine(engine, dev_priv, id) { 3098 request = i915_gem_reset_prepare_engine(engine); 3099 if (IS_ERR(request)) { 3100 err = PTR_ERR(request); 3101 continue; 3102 } 3103 3104 engine->hangcheck.active_request = request; 3105 } 3106 3107 i915_gem_revoke_fences(dev_priv); 3108 intel_uc_sanitize(dev_priv); 3109 3110 return err; 3111 } 3112 3113 static void engine_skip_context(struct i915_request *request) 3114 { 3115 struct intel_engine_cs *engine = request->engine; 3116 struct i915_gem_context *hung_ctx = request->gem_context; 3117 struct i915_timeline *timeline = request->timeline; 3118 unsigned long flags; 3119 3120 GEM_BUG_ON(timeline == &engine->timeline); 3121 3122 spin_lock_irqsave(&engine->timeline.lock, flags); 3123 spin_lock(&timeline->lock); 3124 3125 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3126 if (request->gem_context == hung_ctx) 3127 i915_request_skip(request, -EIO); 3128 3129 list_for_each_entry(request, &timeline->requests, link) 3130 i915_request_skip(request, -EIO); 3131 3132 spin_unlock(&timeline->lock); 3133 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3134 } 3135 3136 /* Returns the request if it was guilty of the hang */ 3137 static struct i915_request * 3138 i915_gem_reset_request(struct intel_engine_cs *engine, 3139 struct i915_request *request, 3140 bool stalled) 3141 { 3142 /* The guilty request will get skipped on a hung engine. 3143 * 3144 * Users of client default contexts do not rely on logical 3145 * state preserved between batches so it is safe to execute 3146 * queued requests following the hang. Non default contexts 3147 * rely on preserved state, so skipping a batch loses the 3148 * evolution of the state and it needs to be considered corrupted. 3149 * Executing more queued batches on top of corrupted state is 3150 * risky. But we take the risk by trying to advance through 3151 * the queued requests in order to make the client behaviour 3152 * more predictable around resets, by not throwing away random 3153 * amount of batches it has prepared for execution. Sophisticated 3154 * clients can use gem_reset_stats_ioctl and dma fence status 3155 * (exported via sync_file info ioctl on explicit fences) to observe 3156 * when it loses the context state and should rebuild accordingly. 3157 * 3158 * The context ban, and ultimately the client ban, mechanism are safety 3159 * valves if client submission ends up resulting in nothing more than 3160 * subsequent hangs. 3161 */ 3162 3163 if (i915_request_completed(request)) { 3164 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3165 engine->name, request->global_seqno, 3166 request->fence.context, request->fence.seqno, 3167 intel_engine_get_seqno(engine)); 3168 stalled = false; 3169 } 3170 3171 if (stalled) { 3172 i915_gem_context_mark_guilty(request->gem_context); 3173 i915_request_skip(request, -EIO); 3174 3175 /* If this context is now banned, skip all pending requests. */ 3176 if (i915_gem_context_is_banned(request->gem_context)) 3177 engine_skip_context(request); 3178 } else { 3179 /* 3180 * Since this is not the hung engine, it may have advanced 3181 * since the hang declaration. Double check by refinding 3182 * the active request at the time of the reset. 3183 */ 3184 request = i915_gem_find_active_request(engine); 3185 if (request) { 3186 unsigned long flags; 3187 3188 i915_gem_context_mark_innocent(request->gem_context); 3189 dma_fence_set_error(&request->fence, -EAGAIN); 3190 3191 /* Rewind the engine to replay the incomplete rq */ 3192 spin_lock_irqsave(&engine->timeline.lock, flags); 3193 request = list_prev_entry(request, link); 3194 if (&request->link == &engine->timeline.requests) 3195 request = NULL; 3196 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3197 } 3198 } 3199 3200 return request; 3201 } 3202 3203 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3204 struct i915_request *request, 3205 bool stalled) 3206 { 3207 /* 3208 * Make sure this write is visible before we re-enable the interrupt 3209 * handlers on another CPU, as tasklet_enable() resolves to just 3210 * a compiler barrier which is insufficient for our purpose here. 3211 */ 3212 smp_store_mb(engine->irq_posted, 0); 3213 3214 if (request) 3215 request = i915_gem_reset_request(engine, request, stalled); 3216 3217 /* Setup the CS to resume from the breadcrumb of the hung request */ 3218 engine->reset.reset(engine, request); 3219 } 3220 3221 void i915_gem_reset(struct drm_i915_private *dev_priv, 3222 unsigned int stalled_mask) 3223 { 3224 struct intel_engine_cs *engine; 3225 enum intel_engine_id id; 3226 3227 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3228 3229 i915_retire_requests(dev_priv); 3230 3231 for_each_engine(engine, dev_priv, id) { 3232 struct intel_context *ce; 3233 3234 i915_gem_reset_engine(engine, 3235 engine->hangcheck.active_request, 3236 stalled_mask & ENGINE_MASK(id)); 3237 ce = fetch_and_zero(&engine->last_retired_context); 3238 if (ce) 3239 intel_context_unpin(ce); 3240 3241 /* 3242 * Ostensibily, we always want a context loaded for powersaving, 3243 * so if the engine is idle after the reset, send a request 3244 * to load our scratch kernel_context. 3245 * 3246 * More mysteriously, if we leave the engine idle after a reset, 3247 * the next userspace batch may hang, with what appears to be 3248 * an incoherent read by the CS (presumably stale TLB). An 3249 * empty request appears sufficient to paper over the glitch. 3250 */ 3251 if (intel_engine_is_idle(engine)) { 3252 struct i915_request *rq; 3253 3254 rq = i915_request_alloc(engine, 3255 dev_priv->kernel_context); 3256 if (!IS_ERR(rq)) 3257 i915_request_add(rq); 3258 } 3259 } 3260 3261 i915_gem_restore_fences(dev_priv); 3262 } 3263 3264 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3265 { 3266 engine->reset.finish(engine); 3267 3268 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3269 } 3270 3271 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3272 { 3273 struct intel_engine_cs *engine; 3274 enum intel_engine_id id; 3275 3276 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3277 3278 for_each_engine(engine, dev_priv, id) { 3279 engine->hangcheck.active_request = NULL; 3280 i915_gem_reset_finish_engine(engine); 3281 } 3282 } 3283 3284 static void nop_submit_request(struct i915_request *request) 3285 { 3286 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3287 request->engine->name, 3288 request->fence.context, request->fence.seqno); 3289 dma_fence_set_error(&request->fence, -EIO); 3290 3291 i915_request_submit(request); 3292 } 3293 3294 static void nop_complete_submit_request(struct i915_request *request) 3295 { 3296 unsigned long flags; 3297 3298 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3299 request->engine->name, 3300 request->fence.context, request->fence.seqno); 3301 dma_fence_set_error(&request->fence, -EIO); 3302 3303 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3304 __i915_request_submit(request); 3305 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3306 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3307 } 3308 3309 void i915_gem_set_wedged(struct drm_i915_private *i915) 3310 { 3311 struct intel_engine_cs *engine; 3312 enum intel_engine_id id; 3313 3314 GEM_TRACE("start\n"); 3315 3316 if (GEM_SHOW_DEBUG()) { 3317 struct drm_printer p = drm_debug_printer(__func__); 3318 3319 for_each_engine(engine, i915, id) 3320 intel_engine_dump(engine, &p, "%s\n", engine->name); 3321 } 3322 3323 if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) 3324 goto out; 3325 3326 /* 3327 * First, stop submission to hw, but do not yet complete requests by 3328 * rolling the global seqno forward (since this would complete requests 3329 * for which we haven't set the fence error to EIO yet). 3330 */ 3331 for_each_engine(engine, i915, id) { 3332 i915_gem_reset_prepare_engine(engine); 3333 3334 engine->submit_request = nop_submit_request; 3335 engine->schedule = NULL; 3336 } 3337 i915->caps.scheduler = 0; 3338 3339 /* Even if the GPU reset fails, it should still stop the engines */ 3340 if (INTEL_GEN(i915) >= 5) 3341 intel_gpu_reset(i915, ALL_ENGINES); 3342 3343 /* 3344 * Make sure no one is running the old callback before we proceed with 3345 * cancelling requests and resetting the completion tracking. Otherwise 3346 * we might submit a request to the hardware which never completes. 3347 */ 3348 synchronize_rcu(); 3349 3350 for_each_engine(engine, i915, id) { 3351 /* Mark all executing requests as skipped */ 3352 engine->cancel_requests(engine); 3353 3354 /* 3355 * Only once we've force-cancelled all in-flight requests can we 3356 * start to complete all requests. 3357 */ 3358 engine->submit_request = nop_complete_submit_request; 3359 } 3360 3361 /* 3362 * Make sure no request can slip through without getting completed by 3363 * either this call here to intel_engine_init_global_seqno, or the one 3364 * in nop_complete_submit_request. 3365 */ 3366 synchronize_rcu(); 3367 3368 for_each_engine(engine, i915, id) { 3369 unsigned long flags; 3370 3371 /* 3372 * Mark all pending requests as complete so that any concurrent 3373 * (lockless) lookup doesn't try and wait upon the request as we 3374 * reset it. 3375 */ 3376 spin_lock_irqsave(&engine->timeline.lock, flags); 3377 intel_engine_init_global_seqno(engine, 3378 intel_engine_last_submit(engine)); 3379 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3380 3381 i915_gem_reset_finish_engine(engine); 3382 } 3383 3384 out: 3385 GEM_TRACE("end\n"); 3386 3387 wake_up_all(&i915->gpu_error.reset_queue); 3388 } 3389 3390 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3391 { 3392 struct i915_timeline *tl; 3393 3394 lockdep_assert_held(&i915->drm.struct_mutex); 3395 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3396 return true; 3397 3398 GEM_TRACE("start\n"); 3399 3400 /* 3401 * Before unwedging, make sure that all pending operations 3402 * are flushed and errored out - we may have requests waiting upon 3403 * third party fences. We marked all inflight requests as EIO, and 3404 * every execbuf since returned EIO, for consistency we want all 3405 * the currently pending requests to also be marked as EIO, which 3406 * is done inside our nop_submit_request - and so we must wait. 3407 * 3408 * No more can be submitted until we reset the wedged bit. 3409 */ 3410 list_for_each_entry(tl, &i915->gt.timelines, link) { 3411 struct i915_request *rq; 3412 3413 rq = i915_gem_active_peek(&tl->last_request, 3414 &i915->drm.struct_mutex); 3415 if (!rq) 3416 continue; 3417 3418 /* 3419 * We can't use our normal waiter as we want to 3420 * avoid recursively trying to handle the current 3421 * reset. The basic dma_fence_default_wait() installs 3422 * a callback for dma_fence_signal(), which is 3423 * triggered by our nop handler (indirectly, the 3424 * callback enables the signaler thread which is 3425 * woken by the nop_submit_request() advancing the seqno 3426 * and when the seqno passes the fence, the signaler 3427 * then signals the fence waking us up). 3428 */ 3429 if (dma_fence_default_wait(&rq->fence, true, 3430 MAX_SCHEDULE_TIMEOUT) < 0) 3431 return false; 3432 } 3433 i915_retire_requests(i915); 3434 GEM_BUG_ON(i915->gt.active_requests); 3435 3436 if (!intel_gpu_reset(i915, ALL_ENGINES)) 3437 intel_engines_sanitize(i915); 3438 3439 /* 3440 * Undo nop_submit_request. We prevent all new i915 requests from 3441 * being queued (by disallowing execbuf whilst wedged) so having 3442 * waited for all active requests above, we know the system is idle 3443 * and do not have to worry about a thread being inside 3444 * engine->submit_request() as we swap over. So unlike installing 3445 * the nop_submit_request on reset, we can do this from normal 3446 * context and do not require stop_machine(). 3447 */ 3448 intel_engines_reset_default_submission(i915); 3449 i915_gem_contexts_lost(i915); 3450 3451 GEM_TRACE("end\n"); 3452 3453 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3454 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3455 3456 return true; 3457 } 3458 3459 static void 3460 i915_gem_retire_work_handler(struct work_struct *work) 3461 { 3462 struct drm_i915_private *dev_priv = 3463 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3464 struct drm_device *dev = &dev_priv->drm; 3465 3466 /* Come back later if the device is busy... */ 3467 if (mutex_trylock(&dev->struct_mutex)) { 3468 i915_retire_requests(dev_priv); 3469 mutex_unlock(&dev->struct_mutex); 3470 } 3471 3472 /* 3473 * Keep the retire handler running until we are finally idle. 3474 * We do not need to do this test under locking as in the worst-case 3475 * we queue the retire worker once too often. 3476 */ 3477 if (READ_ONCE(dev_priv->gt.awake)) 3478 queue_delayed_work(dev_priv->wq, 3479 &dev_priv->gt.retire_work, 3480 round_jiffies_up_relative(HZ)); 3481 } 3482 3483 static void shrink_caches(struct drm_i915_private *i915) 3484 { 3485 /* 3486 * kmem_cache_shrink() discards empty slabs and reorders partially 3487 * filled slabs to prioritise allocating from the mostly full slabs, 3488 * with the aim of reducing fragmentation. 3489 */ 3490 kmem_cache_shrink(i915->priorities); 3491 kmem_cache_shrink(i915->dependencies); 3492 kmem_cache_shrink(i915->requests); 3493 kmem_cache_shrink(i915->luts); 3494 kmem_cache_shrink(i915->vmas); 3495 kmem_cache_shrink(i915->objects); 3496 } 3497 3498 struct sleep_rcu_work { 3499 union { 3500 struct rcu_head rcu; 3501 struct work_struct work; 3502 }; 3503 struct drm_i915_private *i915; 3504 unsigned int epoch; 3505 }; 3506 3507 static inline bool 3508 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3509 { 3510 /* 3511 * There is a small chance that the epoch wrapped since we started 3512 * sleeping. If we assume that epoch is at least a u32, then it will 3513 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3514 */ 3515 return epoch == READ_ONCE(i915->gt.epoch); 3516 } 3517 3518 static void __sleep_work(struct work_struct *work) 3519 { 3520 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3521 struct drm_i915_private *i915 = s->i915; 3522 unsigned int epoch = s->epoch; 3523 3524 kfree(s); 3525 if (same_epoch(i915, epoch)) 3526 shrink_caches(i915); 3527 } 3528 3529 static void __sleep_rcu(struct rcu_head *rcu) 3530 { 3531 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3532 struct drm_i915_private *i915 = s->i915; 3533 3534 if (same_epoch(i915, s->epoch)) { 3535 INIT_WORK(&s->work, __sleep_work); 3536 queue_work(i915->wq, &s->work); 3537 } else { 3538 kfree(s); 3539 } 3540 } 3541 3542 static inline bool 3543 new_requests_since_last_retire(const struct drm_i915_private *i915) 3544 { 3545 return (READ_ONCE(i915->gt.active_requests) || 3546 work_pending(&i915->gt.idle_work.work)); 3547 } 3548 3549 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3550 { 3551 struct intel_engine_cs *engine; 3552 enum intel_engine_id id; 3553 3554 if (i915_terminally_wedged(&i915->gpu_error)) 3555 return; 3556 3557 GEM_BUG_ON(i915->gt.active_requests); 3558 for_each_engine(engine, i915, id) { 3559 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3560 GEM_BUG_ON(engine->last_retired_context != 3561 to_intel_context(i915->kernel_context, engine)); 3562 } 3563 } 3564 3565 static void 3566 i915_gem_idle_work_handler(struct work_struct *work) 3567 { 3568 struct drm_i915_private *dev_priv = 3569 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3570 unsigned int epoch = I915_EPOCH_INVALID; 3571 bool rearm_hangcheck; 3572 3573 if (!READ_ONCE(dev_priv->gt.awake)) 3574 return; 3575 3576 if (READ_ONCE(dev_priv->gt.active_requests)) 3577 return; 3578 3579 /* 3580 * Flush out the last user context, leaving only the pinned 3581 * kernel context resident. When we are idling on the kernel_context, 3582 * no more new requests (with a context switch) are emitted and we 3583 * can finally rest. A consequence is that the idle work handler is 3584 * always called at least twice before idling (and if the system is 3585 * idle that implies a round trip through the retire worker). 3586 */ 3587 mutex_lock(&dev_priv->drm.struct_mutex); 3588 i915_gem_switch_to_kernel_context(dev_priv); 3589 mutex_unlock(&dev_priv->drm.struct_mutex); 3590 3591 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3592 READ_ONCE(dev_priv->gt.active_requests)); 3593 3594 /* 3595 * Wait for last execlists context complete, but bail out in case a 3596 * new request is submitted. As we don't trust the hardware, we 3597 * continue on if the wait times out. This is necessary to allow 3598 * the machine to suspend even if the hardware dies, and we will 3599 * try to recover in resume (after depriving the hardware of power, 3600 * it may be in a better mmod). 3601 */ 3602 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3603 intel_engines_are_idle(dev_priv), 3604 I915_IDLE_ENGINES_TIMEOUT * 1000, 3605 10, 500); 3606 3607 rearm_hangcheck = 3608 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3609 3610 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3611 /* Currently busy, come back later */ 3612 mod_delayed_work(dev_priv->wq, 3613 &dev_priv->gt.idle_work, 3614 msecs_to_jiffies(50)); 3615 goto out_rearm; 3616 } 3617 3618 /* 3619 * New request retired after this work handler started, extend active 3620 * period until next instance of the work. 3621 */ 3622 if (new_requests_since_last_retire(dev_priv)) 3623 goto out_unlock; 3624 3625 epoch = __i915_gem_park(dev_priv); 3626 3627 assert_kernel_context_is_current(dev_priv); 3628 3629 rearm_hangcheck = false; 3630 out_unlock: 3631 mutex_unlock(&dev_priv->drm.struct_mutex); 3632 3633 out_rearm: 3634 if (rearm_hangcheck) { 3635 GEM_BUG_ON(!dev_priv->gt.awake); 3636 i915_queue_hangcheck(dev_priv); 3637 } 3638 3639 /* 3640 * When we are idle, it is an opportune time to reap our caches. 3641 * However, we have many objects that utilise RCU and the ordered 3642 * i915->wq that this work is executing on. To try and flush any 3643 * pending frees now we are idle, we first wait for an RCU grace 3644 * period, and then queue a task (that will run last on the wq) to 3645 * shrink and re-optimize the caches. 3646 */ 3647 if (same_epoch(dev_priv, epoch)) { 3648 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3649 if (s) { 3650 s->i915 = dev_priv; 3651 s->epoch = epoch; 3652 call_rcu(&s->rcu, __sleep_rcu); 3653 } 3654 } 3655 } 3656 3657 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3658 { 3659 struct drm_i915_private *i915 = to_i915(gem->dev); 3660 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3661 struct drm_i915_file_private *fpriv = file->driver_priv; 3662 struct i915_lut_handle *lut, *ln; 3663 3664 mutex_lock(&i915->drm.struct_mutex); 3665 3666 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3667 struct i915_gem_context *ctx = lut->ctx; 3668 struct i915_vma *vma; 3669 3670 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3671 if (ctx->file_priv != fpriv) 3672 continue; 3673 3674 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3675 GEM_BUG_ON(vma->obj != obj); 3676 3677 /* We allow the process to have multiple handles to the same 3678 * vma, in the same fd namespace, by virtue of flink/open. 3679 */ 3680 GEM_BUG_ON(!vma->open_count); 3681 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3682 i915_vma_close(vma); 3683 3684 list_del(&lut->obj_link); 3685 list_del(&lut->ctx_link); 3686 3687 kmem_cache_free(i915->luts, lut); 3688 __i915_gem_object_release_unless_active(obj); 3689 } 3690 3691 mutex_unlock(&i915->drm.struct_mutex); 3692 } 3693 3694 static unsigned long to_wait_timeout(s64 timeout_ns) 3695 { 3696 if (timeout_ns < 0) 3697 return MAX_SCHEDULE_TIMEOUT; 3698 3699 if (timeout_ns == 0) 3700 return 0; 3701 3702 return nsecs_to_jiffies_timeout(timeout_ns); 3703 } 3704 3705 /** 3706 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3707 * @dev: drm device pointer 3708 * @data: ioctl data blob 3709 * @file: drm file pointer 3710 * 3711 * Returns 0 if successful, else an error is returned with the remaining time in 3712 * the timeout parameter. 3713 * -ETIME: object is still busy after timeout 3714 * -ERESTARTSYS: signal interrupted the wait 3715 * -ENONENT: object doesn't exist 3716 * Also possible, but rare: 3717 * -EAGAIN: incomplete, restart syscall 3718 * -ENOMEM: damn 3719 * -ENODEV: Internal IRQ fail 3720 * -E?: The add request failed 3721 * 3722 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3723 * non-zero timeout parameter the wait ioctl will wait for the given number of 3724 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3725 * without holding struct_mutex the object may become re-busied before this 3726 * function completes. A similar but shorter * race condition exists in the busy 3727 * ioctl 3728 */ 3729 int 3730 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3731 { 3732 struct drm_i915_gem_wait *args = data; 3733 struct drm_i915_gem_object *obj; 3734 ktime_t start; 3735 long ret; 3736 3737 if (args->flags != 0) 3738 return -EINVAL; 3739 3740 obj = i915_gem_object_lookup(file, args->bo_handle); 3741 if (!obj) 3742 return -ENOENT; 3743 3744 start = ktime_get(); 3745 3746 ret = i915_gem_object_wait(obj, 3747 I915_WAIT_INTERRUPTIBLE | 3748 I915_WAIT_PRIORITY | 3749 I915_WAIT_ALL, 3750 to_wait_timeout(args->timeout_ns), 3751 to_rps_client(file)); 3752 3753 if (args->timeout_ns > 0) { 3754 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3755 if (args->timeout_ns < 0) 3756 args->timeout_ns = 0; 3757 3758 /* 3759 * Apparently ktime isn't accurate enough and occasionally has a 3760 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3761 * things up to make the test happy. We allow up to 1 jiffy. 3762 * 3763 * This is a regression from the timespec->ktime conversion. 3764 */ 3765 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3766 args->timeout_ns = 0; 3767 3768 /* Asked to wait beyond the jiffie/scheduler precision? */ 3769 if (ret == -ETIME && args->timeout_ns) 3770 ret = -EAGAIN; 3771 } 3772 3773 i915_gem_object_put(obj); 3774 return ret; 3775 } 3776 3777 static long wait_for_timeline(struct i915_timeline *tl, 3778 unsigned int flags, long timeout) 3779 { 3780 struct i915_request *rq; 3781 3782 rq = i915_gem_active_get_unlocked(&tl->last_request); 3783 if (!rq) 3784 return timeout; 3785 3786 /* 3787 * "Race-to-idle". 3788 * 3789 * Switching to the kernel context is often used a synchronous 3790 * step prior to idling, e.g. in suspend for flushing all 3791 * current operations to memory before sleeping. These we 3792 * want to complete as quickly as possible to avoid prolonged 3793 * stalls, so allow the gpu to boost to maximum clocks. 3794 */ 3795 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3796 gen6_rps_boost(rq, NULL); 3797 3798 timeout = i915_request_wait(rq, flags, timeout); 3799 i915_request_put(rq); 3800 3801 return timeout; 3802 } 3803 3804 static int wait_for_engines(struct drm_i915_private *i915) 3805 { 3806 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3807 dev_err(i915->drm.dev, 3808 "Failed to idle engines, declaring wedged!\n"); 3809 GEM_TRACE_DUMP(); 3810 i915_gem_set_wedged(i915); 3811 return -EIO; 3812 } 3813 3814 return 0; 3815 } 3816 3817 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3818 unsigned int flags, long timeout) 3819 { 3820 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3821 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3822 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3823 3824 /* If the device is asleep, we have no requests outstanding */ 3825 if (!READ_ONCE(i915->gt.awake)) 3826 return 0; 3827 3828 if (flags & I915_WAIT_LOCKED) { 3829 struct i915_timeline *tl; 3830 int err; 3831 3832 lockdep_assert_held(&i915->drm.struct_mutex); 3833 3834 list_for_each_entry(tl, &i915->gt.timelines, link) { 3835 timeout = wait_for_timeline(tl, flags, timeout); 3836 if (timeout < 0) 3837 return timeout; 3838 } 3839 if (GEM_SHOW_DEBUG() && !timeout) { 3840 /* Presume that timeout was non-zero to begin with! */ 3841 dev_warn(&i915->drm.pdev->dev, 3842 "Missed idle-completion interrupt!\n"); 3843 GEM_TRACE_DUMP(); 3844 } 3845 3846 err = wait_for_engines(i915); 3847 if (err) 3848 return err; 3849 3850 i915_retire_requests(i915); 3851 GEM_BUG_ON(i915->gt.active_requests); 3852 } else { 3853 struct intel_engine_cs *engine; 3854 enum intel_engine_id id; 3855 3856 for_each_engine(engine, i915, id) { 3857 struct i915_timeline *tl = &engine->timeline; 3858 3859 timeout = wait_for_timeline(tl, flags, timeout); 3860 if (timeout < 0) 3861 return timeout; 3862 } 3863 } 3864 3865 return 0; 3866 } 3867 3868 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3869 { 3870 /* 3871 * We manually flush the CPU domain so that we can override and 3872 * force the flush for the display, and perform it asyncrhonously. 3873 */ 3874 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3875 if (obj->cache_dirty) 3876 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3877 obj->write_domain = 0; 3878 } 3879 3880 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3881 { 3882 if (!READ_ONCE(obj->pin_global)) 3883 return; 3884 3885 mutex_lock(&obj->base.dev->struct_mutex); 3886 __i915_gem_object_flush_for_display(obj); 3887 mutex_unlock(&obj->base.dev->struct_mutex); 3888 } 3889 3890 /** 3891 * Moves a single object to the WC read, and possibly write domain. 3892 * @obj: object to act on 3893 * @write: ask for write access or read only 3894 * 3895 * This function returns when the move is complete, including waiting on 3896 * flushes to occur. 3897 */ 3898 int 3899 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3900 { 3901 int ret; 3902 3903 lockdep_assert_held(&obj->base.dev->struct_mutex); 3904 3905 ret = i915_gem_object_wait(obj, 3906 I915_WAIT_INTERRUPTIBLE | 3907 I915_WAIT_LOCKED | 3908 (write ? I915_WAIT_ALL : 0), 3909 MAX_SCHEDULE_TIMEOUT, 3910 NULL); 3911 if (ret) 3912 return ret; 3913 3914 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3915 return 0; 3916 3917 /* Flush and acquire obj->pages so that we are coherent through 3918 * direct access in memory with previous cached writes through 3919 * shmemfs and that our cache domain tracking remains valid. 3920 * For example, if the obj->filp was moved to swap without us 3921 * being notified and releasing the pages, we would mistakenly 3922 * continue to assume that the obj remained out of the CPU cached 3923 * domain. 3924 */ 3925 ret = i915_gem_object_pin_pages(obj); 3926 if (ret) 3927 return ret; 3928 3929 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3930 3931 /* Serialise direct access to this object with the barriers for 3932 * coherent writes from the GPU, by effectively invalidating the 3933 * WC domain upon first access. 3934 */ 3935 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3936 mb(); 3937 3938 /* It should now be out of any other write domains, and we can update 3939 * the domain values for our changes. 3940 */ 3941 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3942 obj->read_domains |= I915_GEM_DOMAIN_WC; 3943 if (write) { 3944 obj->read_domains = I915_GEM_DOMAIN_WC; 3945 obj->write_domain = I915_GEM_DOMAIN_WC; 3946 obj->mm.dirty = true; 3947 } 3948 3949 i915_gem_object_unpin_pages(obj); 3950 return 0; 3951 } 3952 3953 /** 3954 * Moves a single object to the GTT read, and possibly write domain. 3955 * @obj: object to act on 3956 * @write: ask for write access or read only 3957 * 3958 * This function returns when the move is complete, including waiting on 3959 * flushes to occur. 3960 */ 3961 int 3962 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3963 { 3964 int ret; 3965 3966 lockdep_assert_held(&obj->base.dev->struct_mutex); 3967 3968 ret = i915_gem_object_wait(obj, 3969 I915_WAIT_INTERRUPTIBLE | 3970 I915_WAIT_LOCKED | 3971 (write ? I915_WAIT_ALL : 0), 3972 MAX_SCHEDULE_TIMEOUT, 3973 NULL); 3974 if (ret) 3975 return ret; 3976 3977 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3978 return 0; 3979 3980 /* Flush and acquire obj->pages so that we are coherent through 3981 * direct access in memory with previous cached writes through 3982 * shmemfs and that our cache domain tracking remains valid. 3983 * For example, if the obj->filp was moved to swap without us 3984 * being notified and releasing the pages, we would mistakenly 3985 * continue to assume that the obj remained out of the CPU cached 3986 * domain. 3987 */ 3988 ret = i915_gem_object_pin_pages(obj); 3989 if (ret) 3990 return ret; 3991 3992 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3993 3994 /* Serialise direct access to this object with the barriers for 3995 * coherent writes from the GPU, by effectively invalidating the 3996 * GTT domain upon first access. 3997 */ 3998 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3999 mb(); 4000 4001 /* It should now be out of any other write domains, and we can update 4002 * the domain values for our changes. 4003 */ 4004 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4005 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4006 if (write) { 4007 obj->read_domains = I915_GEM_DOMAIN_GTT; 4008 obj->write_domain = I915_GEM_DOMAIN_GTT; 4009 obj->mm.dirty = true; 4010 } 4011 4012 i915_gem_object_unpin_pages(obj); 4013 return 0; 4014 } 4015 4016 /** 4017 * Changes the cache-level of an object across all VMA. 4018 * @obj: object to act on 4019 * @cache_level: new cache level to set for the object 4020 * 4021 * After this function returns, the object will be in the new cache-level 4022 * across all GTT and the contents of the backing storage will be coherent, 4023 * with respect to the new cache-level. In order to keep the backing storage 4024 * coherent for all users, we only allow a single cache level to be set 4025 * globally on the object and prevent it from being changed whilst the 4026 * hardware is reading from the object. That is if the object is currently 4027 * on the scanout it will be set to uncached (or equivalent display 4028 * cache coherency) and all non-MOCS GPU access will also be uncached so 4029 * that all direct access to the scanout remains coherent. 4030 */ 4031 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4032 enum i915_cache_level cache_level) 4033 { 4034 struct i915_vma *vma; 4035 int ret; 4036 4037 lockdep_assert_held(&obj->base.dev->struct_mutex); 4038 4039 if (obj->cache_level == cache_level) 4040 return 0; 4041 4042 /* Inspect the list of currently bound VMA and unbind any that would 4043 * be invalid given the new cache-level. This is principally to 4044 * catch the issue of the CS prefetch crossing page boundaries and 4045 * reading an invalid PTE on older architectures. 4046 */ 4047 restart: 4048 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4049 if (!drm_mm_node_allocated(&vma->node)) 4050 continue; 4051 4052 if (i915_vma_is_pinned(vma)) { 4053 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4054 return -EBUSY; 4055 } 4056 4057 if (!i915_vma_is_closed(vma) && 4058 i915_gem_valid_gtt_space(vma, cache_level)) 4059 continue; 4060 4061 ret = i915_vma_unbind(vma); 4062 if (ret) 4063 return ret; 4064 4065 /* As unbinding may affect other elements in the 4066 * obj->vma_list (due to side-effects from retiring 4067 * an active vma), play safe and restart the iterator. 4068 */ 4069 goto restart; 4070 } 4071 4072 /* We can reuse the existing drm_mm nodes but need to change the 4073 * cache-level on the PTE. We could simply unbind them all and 4074 * rebind with the correct cache-level on next use. However since 4075 * we already have a valid slot, dma mapping, pages etc, we may as 4076 * rewrite the PTE in the belief that doing so tramples upon less 4077 * state and so involves less work. 4078 */ 4079 if (obj->bind_count) { 4080 /* Before we change the PTE, the GPU must not be accessing it. 4081 * If we wait upon the object, we know that all the bound 4082 * VMA are no longer active. 4083 */ 4084 ret = i915_gem_object_wait(obj, 4085 I915_WAIT_INTERRUPTIBLE | 4086 I915_WAIT_LOCKED | 4087 I915_WAIT_ALL, 4088 MAX_SCHEDULE_TIMEOUT, 4089 NULL); 4090 if (ret) 4091 return ret; 4092 4093 if (!HAS_LLC(to_i915(obj->base.dev)) && 4094 cache_level != I915_CACHE_NONE) { 4095 /* Access to snoopable pages through the GTT is 4096 * incoherent and on some machines causes a hard 4097 * lockup. Relinquish the CPU mmaping to force 4098 * userspace to refault in the pages and we can 4099 * then double check if the GTT mapping is still 4100 * valid for that pointer access. 4101 */ 4102 i915_gem_release_mmap(obj); 4103 4104 /* As we no longer need a fence for GTT access, 4105 * we can relinquish it now (and so prevent having 4106 * to steal a fence from someone else on the next 4107 * fence request). Note GPU activity would have 4108 * dropped the fence as all snoopable access is 4109 * supposed to be linear. 4110 */ 4111 for_each_ggtt_vma(vma, obj) { 4112 ret = i915_vma_put_fence(vma); 4113 if (ret) 4114 return ret; 4115 } 4116 } else { 4117 /* We either have incoherent backing store and 4118 * so no GTT access or the architecture is fully 4119 * coherent. In such cases, existing GTT mmaps 4120 * ignore the cache bit in the PTE and we can 4121 * rewrite it without confusing the GPU or having 4122 * to force userspace to fault back in its mmaps. 4123 */ 4124 } 4125 4126 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4127 if (!drm_mm_node_allocated(&vma->node)) 4128 continue; 4129 4130 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4131 if (ret) 4132 return ret; 4133 } 4134 } 4135 4136 list_for_each_entry(vma, &obj->vma_list, obj_link) 4137 vma->node.color = cache_level; 4138 i915_gem_object_set_cache_coherency(obj, cache_level); 4139 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4140 4141 return 0; 4142 } 4143 4144 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4145 struct drm_file *file) 4146 { 4147 struct drm_i915_gem_caching *args = data; 4148 struct drm_i915_gem_object *obj; 4149 int err = 0; 4150 4151 rcu_read_lock(); 4152 obj = i915_gem_object_lookup_rcu(file, args->handle); 4153 if (!obj) { 4154 err = -ENOENT; 4155 goto out; 4156 } 4157 4158 switch (obj->cache_level) { 4159 case I915_CACHE_LLC: 4160 case I915_CACHE_L3_LLC: 4161 args->caching = I915_CACHING_CACHED; 4162 break; 4163 4164 case I915_CACHE_WT: 4165 args->caching = I915_CACHING_DISPLAY; 4166 break; 4167 4168 default: 4169 args->caching = I915_CACHING_NONE; 4170 break; 4171 } 4172 out: 4173 rcu_read_unlock(); 4174 return err; 4175 } 4176 4177 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4178 struct drm_file *file) 4179 { 4180 struct drm_i915_private *i915 = to_i915(dev); 4181 struct drm_i915_gem_caching *args = data; 4182 struct drm_i915_gem_object *obj; 4183 enum i915_cache_level level; 4184 int ret = 0; 4185 4186 switch (args->caching) { 4187 case I915_CACHING_NONE: 4188 level = I915_CACHE_NONE; 4189 break; 4190 case I915_CACHING_CACHED: 4191 /* 4192 * Due to a HW issue on BXT A stepping, GPU stores via a 4193 * snooped mapping may leave stale data in a corresponding CPU 4194 * cacheline, whereas normally such cachelines would get 4195 * invalidated. 4196 */ 4197 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4198 return -ENODEV; 4199 4200 level = I915_CACHE_LLC; 4201 break; 4202 case I915_CACHING_DISPLAY: 4203 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4204 break; 4205 default: 4206 return -EINVAL; 4207 } 4208 4209 obj = i915_gem_object_lookup(file, args->handle); 4210 if (!obj) 4211 return -ENOENT; 4212 4213 /* 4214 * The caching mode of proxy object is handled by its generator, and 4215 * not allowed to be changed by userspace. 4216 */ 4217 if (i915_gem_object_is_proxy(obj)) { 4218 ret = -ENXIO; 4219 goto out; 4220 } 4221 4222 if (obj->cache_level == level) 4223 goto out; 4224 4225 ret = i915_gem_object_wait(obj, 4226 I915_WAIT_INTERRUPTIBLE, 4227 MAX_SCHEDULE_TIMEOUT, 4228 to_rps_client(file)); 4229 if (ret) 4230 goto out; 4231 4232 ret = i915_mutex_lock_interruptible(dev); 4233 if (ret) 4234 goto out; 4235 4236 ret = i915_gem_object_set_cache_level(obj, level); 4237 mutex_unlock(&dev->struct_mutex); 4238 4239 out: 4240 i915_gem_object_put(obj); 4241 return ret; 4242 } 4243 4244 /* 4245 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4246 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4247 * (for pageflips). We only flush the caches while preparing the buffer for 4248 * display, the callers are responsible for frontbuffer flush. 4249 */ 4250 struct i915_vma * 4251 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4252 u32 alignment, 4253 const struct i915_ggtt_view *view, 4254 unsigned int flags) 4255 { 4256 struct i915_vma *vma; 4257 int ret; 4258 4259 lockdep_assert_held(&obj->base.dev->struct_mutex); 4260 4261 /* Mark the global pin early so that we account for the 4262 * display coherency whilst setting up the cache domains. 4263 */ 4264 obj->pin_global++; 4265 4266 /* The display engine is not coherent with the LLC cache on gen6. As 4267 * a result, we make sure that the pinning that is about to occur is 4268 * done with uncached PTEs. This is lowest common denominator for all 4269 * chipsets. 4270 * 4271 * However for gen6+, we could do better by using the GFDT bit instead 4272 * of uncaching, which would allow us to flush all the LLC-cached data 4273 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4274 */ 4275 ret = i915_gem_object_set_cache_level(obj, 4276 HAS_WT(to_i915(obj->base.dev)) ? 4277 I915_CACHE_WT : I915_CACHE_NONE); 4278 if (ret) { 4279 vma = ERR_PTR(ret); 4280 goto err_unpin_global; 4281 } 4282 4283 /* As the user may map the buffer once pinned in the display plane 4284 * (e.g. libkms for the bootup splash), we have to ensure that we 4285 * always use map_and_fenceable for all scanout buffers. However, 4286 * it may simply be too big to fit into mappable, in which case 4287 * put it anyway and hope that userspace can cope (but always first 4288 * try to preserve the existing ABI). 4289 */ 4290 vma = ERR_PTR(-ENOSPC); 4291 if ((flags & PIN_MAPPABLE) == 0 && 4292 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4293 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4294 flags | 4295 PIN_MAPPABLE | 4296 PIN_NONBLOCK); 4297 if (IS_ERR(vma)) 4298 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4299 if (IS_ERR(vma)) 4300 goto err_unpin_global; 4301 4302 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4303 4304 __i915_gem_object_flush_for_display(obj); 4305 4306 /* It should now be out of any other write domains, and we can update 4307 * the domain values for our changes. 4308 */ 4309 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4310 4311 return vma; 4312 4313 err_unpin_global: 4314 obj->pin_global--; 4315 return vma; 4316 } 4317 4318 void 4319 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4320 { 4321 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4322 4323 if (WARN_ON(vma->obj->pin_global == 0)) 4324 return; 4325 4326 if (--vma->obj->pin_global == 0) 4327 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4328 4329 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4330 i915_gem_object_bump_inactive_ggtt(vma->obj); 4331 4332 i915_vma_unpin(vma); 4333 } 4334 4335 /** 4336 * Moves a single object to the CPU read, and possibly write domain. 4337 * @obj: object to act on 4338 * @write: requesting write or read-only access 4339 * 4340 * This function returns when the move is complete, including waiting on 4341 * flushes to occur. 4342 */ 4343 int 4344 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4345 { 4346 int ret; 4347 4348 lockdep_assert_held(&obj->base.dev->struct_mutex); 4349 4350 ret = i915_gem_object_wait(obj, 4351 I915_WAIT_INTERRUPTIBLE | 4352 I915_WAIT_LOCKED | 4353 (write ? I915_WAIT_ALL : 0), 4354 MAX_SCHEDULE_TIMEOUT, 4355 NULL); 4356 if (ret) 4357 return ret; 4358 4359 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4360 4361 /* Flush the CPU cache if it's still invalid. */ 4362 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4363 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4364 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4365 } 4366 4367 /* It should now be out of any other write domains, and we can update 4368 * the domain values for our changes. 4369 */ 4370 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4371 4372 /* If we're writing through the CPU, then the GPU read domains will 4373 * need to be invalidated at next use. 4374 */ 4375 if (write) 4376 __start_cpu_write(obj); 4377 4378 return 0; 4379 } 4380 4381 /* Throttle our rendering by waiting until the ring has completed our requests 4382 * emitted over 20 msec ago. 4383 * 4384 * Note that if we were to use the current jiffies each time around the loop, 4385 * we wouldn't escape the function with any frames outstanding if the time to 4386 * render a frame was over 20ms. 4387 * 4388 * This should get us reasonable parallelism between CPU and GPU but also 4389 * relatively low latency when blocking on a particular request to finish. 4390 */ 4391 static int 4392 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4393 { 4394 struct drm_i915_private *dev_priv = to_i915(dev); 4395 struct drm_i915_file_private *file_priv = file->driver_priv; 4396 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4397 struct i915_request *request, *target = NULL; 4398 long ret; 4399 4400 /* ABI: return -EIO if already wedged */ 4401 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4402 return -EIO; 4403 4404 spin_lock(&file_priv->mm.lock); 4405 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4406 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4407 break; 4408 4409 if (target) { 4410 list_del(&target->client_link); 4411 target->file_priv = NULL; 4412 } 4413 4414 target = request; 4415 } 4416 if (target) 4417 i915_request_get(target); 4418 spin_unlock(&file_priv->mm.lock); 4419 4420 if (target == NULL) 4421 return 0; 4422 4423 ret = i915_request_wait(target, 4424 I915_WAIT_INTERRUPTIBLE, 4425 MAX_SCHEDULE_TIMEOUT); 4426 i915_request_put(target); 4427 4428 return ret < 0 ? ret : 0; 4429 } 4430 4431 struct i915_vma * 4432 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4433 const struct i915_ggtt_view *view, 4434 u64 size, 4435 u64 alignment, 4436 u64 flags) 4437 { 4438 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4439 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4440 struct i915_vma *vma; 4441 int ret; 4442 4443 lockdep_assert_held(&obj->base.dev->struct_mutex); 4444 4445 if (flags & PIN_MAPPABLE && 4446 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4447 /* If the required space is larger than the available 4448 * aperture, we will not able to find a slot for the 4449 * object and unbinding the object now will be in 4450 * vain. Worse, doing so may cause us to ping-pong 4451 * the object in and out of the Global GTT and 4452 * waste a lot of cycles under the mutex. 4453 */ 4454 if (obj->base.size > dev_priv->ggtt.mappable_end) 4455 return ERR_PTR(-E2BIG); 4456 4457 /* If NONBLOCK is set the caller is optimistically 4458 * trying to cache the full object within the mappable 4459 * aperture, and *must* have a fallback in place for 4460 * situations where we cannot bind the object. We 4461 * can be a little more lax here and use the fallback 4462 * more often to avoid costly migrations of ourselves 4463 * and other objects within the aperture. 4464 * 4465 * Half-the-aperture is used as a simple heuristic. 4466 * More interesting would to do search for a free 4467 * block prior to making the commitment to unbind. 4468 * That caters for the self-harm case, and with a 4469 * little more heuristics (e.g. NOFAULT, NOEVICT) 4470 * we could try to minimise harm to others. 4471 */ 4472 if (flags & PIN_NONBLOCK && 4473 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4474 return ERR_PTR(-ENOSPC); 4475 } 4476 4477 vma = i915_vma_instance(obj, vm, view); 4478 if (unlikely(IS_ERR(vma))) 4479 return vma; 4480 4481 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4482 if (flags & PIN_NONBLOCK) { 4483 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4484 return ERR_PTR(-ENOSPC); 4485 4486 if (flags & PIN_MAPPABLE && 4487 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4488 return ERR_PTR(-ENOSPC); 4489 } 4490 4491 WARN(i915_vma_is_pinned(vma), 4492 "bo is already pinned in ggtt with incorrect alignment:" 4493 " offset=%08x, req.alignment=%llx," 4494 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4495 i915_ggtt_offset(vma), alignment, 4496 !!(flags & PIN_MAPPABLE), 4497 i915_vma_is_map_and_fenceable(vma)); 4498 ret = i915_vma_unbind(vma); 4499 if (ret) 4500 return ERR_PTR(ret); 4501 } 4502 4503 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4504 if (ret) 4505 return ERR_PTR(ret); 4506 4507 return vma; 4508 } 4509 4510 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4511 { 4512 /* Note that we could alias engines in the execbuf API, but 4513 * that would be very unwise as it prevents userspace from 4514 * fine control over engine selection. Ahem. 4515 * 4516 * This should be something like EXEC_MAX_ENGINE instead of 4517 * I915_NUM_ENGINES. 4518 */ 4519 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4520 return 0x10000 << id; 4521 } 4522 4523 static __always_inline unsigned int __busy_write_id(unsigned int id) 4524 { 4525 /* The uABI guarantees an active writer is also amongst the read 4526 * engines. This would be true if we accessed the activity tracking 4527 * under the lock, but as we perform the lookup of the object and 4528 * its activity locklessly we can not guarantee that the last_write 4529 * being active implies that we have set the same engine flag from 4530 * last_read - hence we always set both read and write busy for 4531 * last_write. 4532 */ 4533 return id | __busy_read_flag(id); 4534 } 4535 4536 static __always_inline unsigned int 4537 __busy_set_if_active(const struct dma_fence *fence, 4538 unsigned int (*flag)(unsigned int id)) 4539 { 4540 struct i915_request *rq; 4541 4542 /* We have to check the current hw status of the fence as the uABI 4543 * guarantees forward progress. We could rely on the idle worker 4544 * to eventually flush us, but to minimise latency just ask the 4545 * hardware. 4546 * 4547 * Note we only report on the status of native fences. 4548 */ 4549 if (!dma_fence_is_i915(fence)) 4550 return 0; 4551 4552 /* opencode to_request() in order to avoid const warnings */ 4553 rq = container_of(fence, struct i915_request, fence); 4554 if (i915_request_completed(rq)) 4555 return 0; 4556 4557 return flag(rq->engine->uabi_id); 4558 } 4559 4560 static __always_inline unsigned int 4561 busy_check_reader(const struct dma_fence *fence) 4562 { 4563 return __busy_set_if_active(fence, __busy_read_flag); 4564 } 4565 4566 static __always_inline unsigned int 4567 busy_check_writer(const struct dma_fence *fence) 4568 { 4569 if (!fence) 4570 return 0; 4571 4572 return __busy_set_if_active(fence, __busy_write_id); 4573 } 4574 4575 int 4576 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4577 struct drm_file *file) 4578 { 4579 struct drm_i915_gem_busy *args = data; 4580 struct drm_i915_gem_object *obj; 4581 struct reservation_object_list *list; 4582 unsigned int seq; 4583 int err; 4584 4585 err = -ENOENT; 4586 rcu_read_lock(); 4587 obj = i915_gem_object_lookup_rcu(file, args->handle); 4588 if (!obj) 4589 goto out; 4590 4591 /* A discrepancy here is that we do not report the status of 4592 * non-i915 fences, i.e. even though we may report the object as idle, 4593 * a call to set-domain may still stall waiting for foreign rendering. 4594 * This also means that wait-ioctl may report an object as busy, 4595 * where busy-ioctl considers it idle. 4596 * 4597 * We trade the ability to warn of foreign fences to report on which 4598 * i915 engines are active for the object. 4599 * 4600 * Alternatively, we can trade that extra information on read/write 4601 * activity with 4602 * args->busy = 4603 * !reservation_object_test_signaled_rcu(obj->resv, true); 4604 * to report the overall busyness. This is what the wait-ioctl does. 4605 * 4606 */ 4607 retry: 4608 seq = raw_read_seqcount(&obj->resv->seq); 4609 4610 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4611 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4612 4613 /* Translate shared fences to READ set of engines */ 4614 list = rcu_dereference(obj->resv->fence); 4615 if (list) { 4616 unsigned int shared_count = list->shared_count, i; 4617 4618 for (i = 0; i < shared_count; ++i) { 4619 struct dma_fence *fence = 4620 rcu_dereference(list->shared[i]); 4621 4622 args->busy |= busy_check_reader(fence); 4623 } 4624 } 4625 4626 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4627 goto retry; 4628 4629 err = 0; 4630 out: 4631 rcu_read_unlock(); 4632 return err; 4633 } 4634 4635 int 4636 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4637 struct drm_file *file_priv) 4638 { 4639 return i915_gem_ring_throttle(dev, file_priv); 4640 } 4641 4642 int 4643 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4644 struct drm_file *file_priv) 4645 { 4646 struct drm_i915_private *dev_priv = to_i915(dev); 4647 struct drm_i915_gem_madvise *args = data; 4648 struct drm_i915_gem_object *obj; 4649 int err; 4650 4651 switch (args->madv) { 4652 case I915_MADV_DONTNEED: 4653 case I915_MADV_WILLNEED: 4654 break; 4655 default: 4656 return -EINVAL; 4657 } 4658 4659 obj = i915_gem_object_lookup(file_priv, args->handle); 4660 if (!obj) 4661 return -ENOENT; 4662 4663 err = mutex_lock_interruptible(&obj->mm.lock); 4664 if (err) 4665 goto out; 4666 4667 if (i915_gem_object_has_pages(obj) && 4668 i915_gem_object_is_tiled(obj) && 4669 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4670 if (obj->mm.madv == I915_MADV_WILLNEED) { 4671 GEM_BUG_ON(!obj->mm.quirked); 4672 __i915_gem_object_unpin_pages(obj); 4673 obj->mm.quirked = false; 4674 } 4675 if (args->madv == I915_MADV_WILLNEED) { 4676 GEM_BUG_ON(obj->mm.quirked); 4677 __i915_gem_object_pin_pages(obj); 4678 obj->mm.quirked = true; 4679 } 4680 } 4681 4682 if (obj->mm.madv != __I915_MADV_PURGED) 4683 obj->mm.madv = args->madv; 4684 4685 /* if the object is no longer attached, discard its backing storage */ 4686 if (obj->mm.madv == I915_MADV_DONTNEED && 4687 !i915_gem_object_has_pages(obj)) 4688 i915_gem_object_truncate(obj); 4689 4690 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4691 mutex_unlock(&obj->mm.lock); 4692 4693 out: 4694 i915_gem_object_put(obj); 4695 return err; 4696 } 4697 4698 static void 4699 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4700 { 4701 struct drm_i915_gem_object *obj = 4702 container_of(active, typeof(*obj), frontbuffer_write); 4703 4704 intel_fb_obj_flush(obj, ORIGIN_CS); 4705 } 4706 4707 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4708 const struct drm_i915_gem_object_ops *ops) 4709 { 4710 mutex_init(&obj->mm.lock); 4711 4712 INIT_LIST_HEAD(&obj->vma_list); 4713 INIT_LIST_HEAD(&obj->lut_list); 4714 INIT_LIST_HEAD(&obj->batch_pool_link); 4715 4716 obj->ops = ops; 4717 4718 reservation_object_init(&obj->__builtin_resv); 4719 obj->resv = &obj->__builtin_resv; 4720 4721 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4722 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4723 4724 obj->mm.madv = I915_MADV_WILLNEED; 4725 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4726 mutex_init(&obj->mm.get_page.lock); 4727 4728 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4729 } 4730 4731 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4732 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4733 I915_GEM_OBJECT_IS_SHRINKABLE, 4734 4735 .get_pages = i915_gem_object_get_pages_gtt, 4736 .put_pages = i915_gem_object_put_pages_gtt, 4737 4738 .pwrite = i915_gem_object_pwrite_gtt, 4739 }; 4740 4741 static int i915_gem_object_create_shmem(struct drm_device *dev, 4742 struct drm_gem_object *obj, 4743 size_t size) 4744 { 4745 struct drm_i915_private *i915 = to_i915(dev); 4746 unsigned long flags = VM_NORESERVE; 4747 struct file *filp; 4748 4749 drm_gem_private_object_init(dev, obj, size); 4750 4751 if (i915->mm.gemfs) 4752 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4753 flags); 4754 else 4755 filp = shmem_file_setup("i915", size, flags); 4756 4757 if (IS_ERR(filp)) 4758 return PTR_ERR(filp); 4759 4760 obj->filp = filp; 4761 4762 return 0; 4763 } 4764 4765 struct drm_i915_gem_object * 4766 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4767 { 4768 struct drm_i915_gem_object *obj; 4769 struct address_space *mapping; 4770 unsigned int cache_level; 4771 gfp_t mask; 4772 int ret; 4773 4774 /* There is a prevalence of the assumption that we fit the object's 4775 * page count inside a 32bit _signed_ variable. Let's document this and 4776 * catch if we ever need to fix it. In the meantime, if you do spot 4777 * such a local variable, please consider fixing! 4778 */ 4779 if (size >> PAGE_SHIFT > INT_MAX) 4780 return ERR_PTR(-E2BIG); 4781 4782 if (overflows_type(size, obj->base.size)) 4783 return ERR_PTR(-E2BIG); 4784 4785 obj = i915_gem_object_alloc(dev_priv); 4786 if (obj == NULL) 4787 return ERR_PTR(-ENOMEM); 4788 4789 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4790 if (ret) 4791 goto fail; 4792 4793 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4794 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4795 /* 965gm cannot relocate objects above 4GiB. */ 4796 mask &= ~__GFP_HIGHMEM; 4797 mask |= __GFP_DMA32; 4798 } 4799 4800 mapping = obj->base.filp->f_mapping; 4801 mapping_set_gfp_mask(mapping, mask); 4802 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4803 4804 i915_gem_object_init(obj, &i915_gem_object_ops); 4805 4806 obj->write_domain = I915_GEM_DOMAIN_CPU; 4807 obj->read_domains = I915_GEM_DOMAIN_CPU; 4808 4809 if (HAS_LLC(dev_priv)) 4810 /* On some devices, we can have the GPU use the LLC (the CPU 4811 * cache) for about a 10% performance improvement 4812 * compared to uncached. Graphics requests other than 4813 * display scanout are coherent with the CPU in 4814 * accessing this cache. This means in this mode we 4815 * don't need to clflush on the CPU side, and on the 4816 * GPU side we only need to flush internal caches to 4817 * get data visible to the CPU. 4818 * 4819 * However, we maintain the display planes as UC, and so 4820 * need to rebind when first used as such. 4821 */ 4822 cache_level = I915_CACHE_LLC; 4823 else 4824 cache_level = I915_CACHE_NONE; 4825 4826 i915_gem_object_set_cache_coherency(obj, cache_level); 4827 4828 trace_i915_gem_object_create(obj); 4829 4830 return obj; 4831 4832 fail: 4833 i915_gem_object_free(obj); 4834 return ERR_PTR(ret); 4835 } 4836 4837 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4838 { 4839 /* If we are the last user of the backing storage (be it shmemfs 4840 * pages or stolen etc), we know that the pages are going to be 4841 * immediately released. In this case, we can then skip copying 4842 * back the contents from the GPU. 4843 */ 4844 4845 if (obj->mm.madv != I915_MADV_WILLNEED) 4846 return false; 4847 4848 if (obj->base.filp == NULL) 4849 return true; 4850 4851 /* At first glance, this looks racy, but then again so would be 4852 * userspace racing mmap against close. However, the first external 4853 * reference to the filp can only be obtained through the 4854 * i915_gem_mmap_ioctl() which safeguards us against the user 4855 * acquiring such a reference whilst we are in the middle of 4856 * freeing the object. 4857 */ 4858 return atomic_long_read(&obj->base.filp->f_count) == 1; 4859 } 4860 4861 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4862 struct llist_node *freed) 4863 { 4864 struct drm_i915_gem_object *obj, *on; 4865 4866 intel_runtime_pm_get(i915); 4867 llist_for_each_entry_safe(obj, on, freed, freed) { 4868 struct i915_vma *vma, *vn; 4869 4870 trace_i915_gem_object_destroy(obj); 4871 4872 mutex_lock(&i915->drm.struct_mutex); 4873 4874 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4875 list_for_each_entry_safe(vma, vn, 4876 &obj->vma_list, obj_link) { 4877 GEM_BUG_ON(i915_vma_is_active(vma)); 4878 vma->flags &= ~I915_VMA_PIN_MASK; 4879 i915_vma_destroy(vma); 4880 } 4881 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4882 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4883 4884 /* This serializes freeing with the shrinker. Since the free 4885 * is delayed, first by RCU then by the workqueue, we want the 4886 * shrinker to be able to free pages of unreferenced objects, 4887 * or else we may oom whilst there are plenty of deferred 4888 * freed objects. 4889 */ 4890 if (i915_gem_object_has_pages(obj)) { 4891 spin_lock(&i915->mm.obj_lock); 4892 list_del_init(&obj->mm.link); 4893 spin_unlock(&i915->mm.obj_lock); 4894 } 4895 4896 mutex_unlock(&i915->drm.struct_mutex); 4897 4898 GEM_BUG_ON(obj->bind_count); 4899 GEM_BUG_ON(obj->userfault_count); 4900 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4901 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4902 4903 if (obj->ops->release) 4904 obj->ops->release(obj); 4905 4906 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4907 atomic_set(&obj->mm.pages_pin_count, 0); 4908 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4909 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4910 4911 if (obj->base.import_attach) 4912 drm_prime_gem_destroy(&obj->base, NULL); 4913 4914 reservation_object_fini(&obj->__builtin_resv); 4915 drm_gem_object_release(&obj->base); 4916 i915_gem_info_remove_obj(i915, obj->base.size); 4917 4918 kfree(obj->bit_17); 4919 i915_gem_object_free(obj); 4920 4921 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4922 atomic_dec(&i915->mm.free_count); 4923 4924 if (on) 4925 cond_resched(); 4926 } 4927 intel_runtime_pm_put(i915); 4928 } 4929 4930 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4931 { 4932 struct llist_node *freed; 4933 4934 /* Free the oldest, most stale object to keep the free_list short */ 4935 freed = NULL; 4936 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4937 /* Only one consumer of llist_del_first() allowed */ 4938 spin_lock(&i915->mm.free_lock); 4939 freed = llist_del_first(&i915->mm.free_list); 4940 spin_unlock(&i915->mm.free_lock); 4941 } 4942 if (unlikely(freed)) { 4943 freed->next = NULL; 4944 __i915_gem_free_objects(i915, freed); 4945 } 4946 } 4947 4948 static void __i915_gem_free_work(struct work_struct *work) 4949 { 4950 struct drm_i915_private *i915 = 4951 container_of(work, struct drm_i915_private, mm.free_work); 4952 struct llist_node *freed; 4953 4954 /* 4955 * All file-owned VMA should have been released by this point through 4956 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4957 * However, the object may also be bound into the global GTT (e.g. 4958 * older GPUs without per-process support, or for direct access through 4959 * the GTT either for the user or for scanout). Those VMA still need to 4960 * unbound now. 4961 */ 4962 4963 spin_lock(&i915->mm.free_lock); 4964 while ((freed = llist_del_all(&i915->mm.free_list))) { 4965 spin_unlock(&i915->mm.free_lock); 4966 4967 __i915_gem_free_objects(i915, freed); 4968 if (need_resched()) 4969 return; 4970 4971 spin_lock(&i915->mm.free_lock); 4972 } 4973 spin_unlock(&i915->mm.free_lock); 4974 } 4975 4976 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4977 { 4978 struct drm_i915_gem_object *obj = 4979 container_of(head, typeof(*obj), rcu); 4980 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4981 4982 /* 4983 * Since we require blocking on struct_mutex to unbind the freed 4984 * object from the GPU before releasing resources back to the 4985 * system, we can not do that directly from the RCU callback (which may 4986 * be a softirq context), but must instead then defer that work onto a 4987 * kthread. We use the RCU callback rather than move the freed object 4988 * directly onto the work queue so that we can mix between using the 4989 * worker and performing frees directly from subsequent allocations for 4990 * crude but effective memory throttling. 4991 */ 4992 if (llist_add(&obj->freed, &i915->mm.free_list)) 4993 queue_work(i915->wq, &i915->mm.free_work); 4994 } 4995 4996 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4997 { 4998 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4999 5000 if (obj->mm.quirked) 5001 __i915_gem_object_unpin_pages(obj); 5002 5003 if (discard_backing_storage(obj)) 5004 obj->mm.madv = I915_MADV_DONTNEED; 5005 5006 /* 5007 * Before we free the object, make sure any pure RCU-only 5008 * read-side critical sections are complete, e.g. 5009 * i915_gem_busy_ioctl(). For the corresponding synchronized 5010 * lookup see i915_gem_object_lookup_rcu(). 5011 */ 5012 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5013 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5014 } 5015 5016 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5017 { 5018 lockdep_assert_held(&obj->base.dev->struct_mutex); 5019 5020 if (!i915_gem_object_has_active_reference(obj) && 5021 i915_gem_object_is_active(obj)) 5022 i915_gem_object_set_active_reference(obj); 5023 else 5024 i915_gem_object_put(obj); 5025 } 5026 5027 void i915_gem_sanitize(struct drm_i915_private *i915) 5028 { 5029 int err; 5030 5031 GEM_TRACE("\n"); 5032 5033 mutex_lock(&i915->drm.struct_mutex); 5034 5035 intel_runtime_pm_get(i915); 5036 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5037 5038 /* 5039 * As we have just resumed the machine and woken the device up from 5040 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5041 * back to defaults, recovering from whatever wedged state we left it 5042 * in and so worth trying to use the device once more. 5043 */ 5044 if (i915_terminally_wedged(&i915->gpu_error)) 5045 i915_gem_unset_wedged(i915); 5046 5047 /* 5048 * If we inherit context state from the BIOS or earlier occupants 5049 * of the GPU, the GPU may be in an inconsistent state when we 5050 * try to take over. The only way to remove the earlier state 5051 * is by resetting. However, resetting on earlier gen is tricky as 5052 * it may impact the display and we are uncertain about the stability 5053 * of the reset, so this could be applied to even earlier gen. 5054 */ 5055 err = -ENODEV; 5056 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5057 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5058 if (!err) 5059 intel_engines_sanitize(i915); 5060 5061 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5062 intel_runtime_pm_put(i915); 5063 5064 i915_gem_contexts_lost(i915); 5065 mutex_unlock(&i915->drm.struct_mutex); 5066 } 5067 5068 int i915_gem_suspend(struct drm_i915_private *i915) 5069 { 5070 int ret; 5071 5072 GEM_TRACE("\n"); 5073 5074 intel_runtime_pm_get(i915); 5075 intel_suspend_gt_powersave(i915); 5076 5077 mutex_lock(&i915->drm.struct_mutex); 5078 5079 /* 5080 * We have to flush all the executing contexts to main memory so 5081 * that they can saved in the hibernation image. To ensure the last 5082 * context image is coherent, we have to switch away from it. That 5083 * leaves the i915->kernel_context still active when 5084 * we actually suspend, and its image in memory may not match the GPU 5085 * state. Fortunately, the kernel_context is disposable and we do 5086 * not rely on its state. 5087 */ 5088 if (!i915_terminally_wedged(&i915->gpu_error)) { 5089 ret = i915_gem_switch_to_kernel_context(i915); 5090 if (ret) 5091 goto err_unlock; 5092 5093 ret = i915_gem_wait_for_idle(i915, 5094 I915_WAIT_INTERRUPTIBLE | 5095 I915_WAIT_LOCKED | 5096 I915_WAIT_FOR_IDLE_BOOST, 5097 MAX_SCHEDULE_TIMEOUT); 5098 if (ret && ret != -EIO) 5099 goto err_unlock; 5100 5101 assert_kernel_context_is_current(i915); 5102 } 5103 i915_retire_requests(i915); /* ensure we flush after wedging */ 5104 5105 mutex_unlock(&i915->drm.struct_mutex); 5106 5107 intel_uc_suspend(i915); 5108 5109 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5110 cancel_delayed_work_sync(&i915->gt.retire_work); 5111 5112 /* 5113 * As the idle_work is rearming if it detects a race, play safe and 5114 * repeat the flush until it is definitely idle. 5115 */ 5116 drain_delayed_work(&i915->gt.idle_work); 5117 5118 /* 5119 * Assert that we successfully flushed all the work and 5120 * reset the GPU back to its idle, low power state. 5121 */ 5122 WARN_ON(i915->gt.awake); 5123 if (WARN_ON(!intel_engines_are_idle(i915))) 5124 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5125 5126 intel_runtime_pm_put(i915); 5127 return 0; 5128 5129 err_unlock: 5130 mutex_unlock(&i915->drm.struct_mutex); 5131 intel_runtime_pm_put(i915); 5132 return ret; 5133 } 5134 5135 void i915_gem_suspend_late(struct drm_i915_private *i915) 5136 { 5137 struct drm_i915_gem_object *obj; 5138 struct list_head *phases[] = { 5139 &i915->mm.unbound_list, 5140 &i915->mm.bound_list, 5141 NULL 5142 }, **phase; 5143 5144 /* 5145 * Neither the BIOS, ourselves or any other kernel 5146 * expects the system to be in execlists mode on startup, 5147 * so we need to reset the GPU back to legacy mode. And the only 5148 * known way to disable logical contexts is through a GPU reset. 5149 * 5150 * So in order to leave the system in a known default configuration, 5151 * always reset the GPU upon unload and suspend. Afterwards we then 5152 * clean up the GEM state tracking, flushing off the requests and 5153 * leaving the system in a known idle state. 5154 * 5155 * Note that is of the upmost importance that the GPU is idle and 5156 * all stray writes are flushed *before* we dismantle the backing 5157 * storage for the pinned objects. 5158 * 5159 * However, since we are uncertain that resetting the GPU on older 5160 * machines is a good idea, we don't - just in case it leaves the 5161 * machine in an unusable condition. 5162 */ 5163 5164 mutex_lock(&i915->drm.struct_mutex); 5165 for (phase = phases; *phase; phase++) { 5166 list_for_each_entry(obj, *phase, mm.link) 5167 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5168 } 5169 mutex_unlock(&i915->drm.struct_mutex); 5170 5171 intel_uc_sanitize(i915); 5172 i915_gem_sanitize(i915); 5173 } 5174 5175 void i915_gem_resume(struct drm_i915_private *i915) 5176 { 5177 GEM_TRACE("\n"); 5178 5179 WARN_ON(i915->gt.awake); 5180 5181 mutex_lock(&i915->drm.struct_mutex); 5182 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5183 5184 i915_gem_restore_gtt_mappings(i915); 5185 i915_gem_restore_fences(i915); 5186 5187 /* 5188 * As we didn't flush the kernel context before suspend, we cannot 5189 * guarantee that the context image is complete. So let's just reset 5190 * it and start again. 5191 */ 5192 i915->gt.resume(i915); 5193 5194 if (i915_gem_init_hw(i915)) 5195 goto err_wedged; 5196 5197 intel_uc_resume(i915); 5198 5199 /* Always reload a context for powersaving. */ 5200 if (i915_gem_switch_to_kernel_context(i915)) 5201 goto err_wedged; 5202 5203 out_unlock: 5204 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5205 mutex_unlock(&i915->drm.struct_mutex); 5206 return; 5207 5208 err_wedged: 5209 if (!i915_terminally_wedged(&i915->gpu_error)) { 5210 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5211 i915_gem_set_wedged(i915); 5212 } 5213 goto out_unlock; 5214 } 5215 5216 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5217 { 5218 if (INTEL_GEN(dev_priv) < 5 || 5219 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5220 return; 5221 5222 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5223 DISP_TILE_SURFACE_SWIZZLING); 5224 5225 if (IS_GEN5(dev_priv)) 5226 return; 5227 5228 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5229 if (IS_GEN6(dev_priv)) 5230 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5231 else if (IS_GEN7(dev_priv)) 5232 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5233 else if (IS_GEN8(dev_priv)) 5234 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5235 else 5236 BUG(); 5237 } 5238 5239 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5240 { 5241 I915_WRITE(RING_CTL(base), 0); 5242 I915_WRITE(RING_HEAD(base), 0); 5243 I915_WRITE(RING_TAIL(base), 0); 5244 I915_WRITE(RING_START(base), 0); 5245 } 5246 5247 static void init_unused_rings(struct drm_i915_private *dev_priv) 5248 { 5249 if (IS_I830(dev_priv)) { 5250 init_unused_ring(dev_priv, PRB1_BASE); 5251 init_unused_ring(dev_priv, SRB0_BASE); 5252 init_unused_ring(dev_priv, SRB1_BASE); 5253 init_unused_ring(dev_priv, SRB2_BASE); 5254 init_unused_ring(dev_priv, SRB3_BASE); 5255 } else if (IS_GEN2(dev_priv)) { 5256 init_unused_ring(dev_priv, SRB0_BASE); 5257 init_unused_ring(dev_priv, SRB1_BASE); 5258 } else if (IS_GEN3(dev_priv)) { 5259 init_unused_ring(dev_priv, PRB1_BASE); 5260 init_unused_ring(dev_priv, PRB2_BASE); 5261 } 5262 } 5263 5264 static int __i915_gem_restart_engines(void *data) 5265 { 5266 struct drm_i915_private *i915 = data; 5267 struct intel_engine_cs *engine; 5268 enum intel_engine_id id; 5269 int err; 5270 5271 for_each_engine(engine, i915, id) { 5272 err = engine->init_hw(engine); 5273 if (err) { 5274 DRM_ERROR("Failed to restart %s (%d)\n", 5275 engine->name, err); 5276 return err; 5277 } 5278 } 5279 5280 return 0; 5281 } 5282 5283 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5284 { 5285 int ret; 5286 5287 dev_priv->gt.last_init_time = ktime_get(); 5288 5289 /* Double layer security blanket, see i915_gem_init() */ 5290 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5291 5292 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5293 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5294 5295 if (IS_HASWELL(dev_priv)) 5296 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5297 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5298 5299 intel_gt_workarounds_apply(dev_priv); 5300 5301 i915_gem_init_swizzling(dev_priv); 5302 5303 /* 5304 * At least 830 can leave some of the unused rings 5305 * "active" (ie. head != tail) after resume which 5306 * will prevent c3 entry. Makes sure all unused rings 5307 * are totally idle. 5308 */ 5309 init_unused_rings(dev_priv); 5310 5311 BUG_ON(!dev_priv->kernel_context); 5312 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5313 ret = -EIO; 5314 goto out; 5315 } 5316 5317 ret = i915_ppgtt_init_hw(dev_priv); 5318 if (ret) { 5319 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5320 goto out; 5321 } 5322 5323 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5324 if (ret) { 5325 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5326 goto out; 5327 } 5328 5329 /* We can't enable contexts until all firmware is loaded */ 5330 ret = intel_uc_init_hw(dev_priv); 5331 if (ret) { 5332 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5333 goto out; 5334 } 5335 5336 intel_mocs_init_l3cc_table(dev_priv); 5337 5338 /* Only when the HW is re-initialised, can we replay the requests */ 5339 ret = __i915_gem_restart_engines(dev_priv); 5340 if (ret) 5341 goto cleanup_uc; 5342 5343 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5344 5345 return 0; 5346 5347 cleanup_uc: 5348 intel_uc_fini_hw(dev_priv); 5349 out: 5350 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5351 5352 return ret; 5353 } 5354 5355 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5356 { 5357 struct i915_gem_context *ctx; 5358 struct intel_engine_cs *engine; 5359 enum intel_engine_id id; 5360 int err; 5361 5362 /* 5363 * As we reset the gpu during very early sanitisation, the current 5364 * register state on the GPU should reflect its defaults values. 5365 * We load a context onto the hw (with restore-inhibit), then switch 5366 * over to a second context to save that default register state. We 5367 * can then prime every new context with that state so they all start 5368 * from the same default HW values. 5369 */ 5370 5371 ctx = i915_gem_context_create_kernel(i915, 0); 5372 if (IS_ERR(ctx)) 5373 return PTR_ERR(ctx); 5374 5375 for_each_engine(engine, i915, id) { 5376 struct i915_request *rq; 5377 5378 rq = i915_request_alloc(engine, ctx); 5379 if (IS_ERR(rq)) { 5380 err = PTR_ERR(rq); 5381 goto out_ctx; 5382 } 5383 5384 err = 0; 5385 if (engine->init_context) 5386 err = engine->init_context(rq); 5387 5388 i915_request_add(rq); 5389 if (err) 5390 goto err_active; 5391 } 5392 5393 err = i915_gem_switch_to_kernel_context(i915); 5394 if (err) 5395 goto err_active; 5396 5397 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5398 i915_gem_set_wedged(i915); 5399 err = -EIO; /* Caller will declare us wedged */ 5400 goto err_active; 5401 } 5402 5403 assert_kernel_context_is_current(i915); 5404 5405 /* 5406 * Immediately park the GPU so that we enable powersaving and 5407 * treat it as idle. The next time we issue a request, we will 5408 * unpark and start using the engine->pinned_default_state, otherwise 5409 * it is in limbo and an early reset may fail. 5410 */ 5411 __i915_gem_park(i915); 5412 5413 for_each_engine(engine, i915, id) { 5414 struct i915_vma *state; 5415 void *vaddr; 5416 5417 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 5418 5419 state = to_intel_context(ctx, engine)->state; 5420 if (!state) 5421 continue; 5422 5423 /* 5424 * As we will hold a reference to the logical state, it will 5425 * not be torn down with the context, and importantly the 5426 * object will hold onto its vma (making it possible for a 5427 * stray GTT write to corrupt our defaults). Unmap the vma 5428 * from the GTT to prevent such accidents and reclaim the 5429 * space. 5430 */ 5431 err = i915_vma_unbind(state); 5432 if (err) 5433 goto err_active; 5434 5435 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5436 if (err) 5437 goto err_active; 5438 5439 engine->default_state = i915_gem_object_get(state->obj); 5440 5441 /* Check we can acquire the image of the context state */ 5442 vaddr = i915_gem_object_pin_map(engine->default_state, 5443 I915_MAP_FORCE_WB); 5444 if (IS_ERR(vaddr)) { 5445 err = PTR_ERR(vaddr); 5446 goto err_active; 5447 } 5448 5449 i915_gem_object_unpin_map(engine->default_state); 5450 } 5451 5452 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5453 unsigned int found = intel_engines_has_context_isolation(i915); 5454 5455 /* 5456 * Make sure that classes with multiple engine instances all 5457 * share the same basic configuration. 5458 */ 5459 for_each_engine(engine, i915, id) { 5460 unsigned int bit = BIT(engine->uabi_class); 5461 unsigned int expected = engine->default_state ? bit : 0; 5462 5463 if ((found & bit) != expected) { 5464 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5465 engine->uabi_class, engine->name); 5466 } 5467 } 5468 } 5469 5470 out_ctx: 5471 i915_gem_context_set_closed(ctx); 5472 i915_gem_context_put(ctx); 5473 return err; 5474 5475 err_active: 5476 /* 5477 * If we have to abandon now, we expect the engines to be idle 5478 * and ready to be torn-down. First try to flush any remaining 5479 * request, ensure we are pointing at the kernel context and 5480 * then remove it. 5481 */ 5482 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5483 goto out_ctx; 5484 5485 if (WARN_ON(i915_gem_wait_for_idle(i915, 5486 I915_WAIT_LOCKED, 5487 MAX_SCHEDULE_TIMEOUT))) 5488 goto out_ctx; 5489 5490 i915_gem_contexts_lost(i915); 5491 goto out_ctx; 5492 } 5493 5494 int i915_gem_init(struct drm_i915_private *dev_priv) 5495 { 5496 int ret; 5497 5498 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5499 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5500 mkwrite_device_info(dev_priv)->page_sizes = 5501 I915_GTT_PAGE_SIZE_4K; 5502 5503 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5504 5505 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5506 dev_priv->gt.resume = intel_lr_context_resume; 5507 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5508 } else { 5509 dev_priv->gt.resume = intel_legacy_submission_resume; 5510 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5511 } 5512 5513 ret = i915_gem_init_userptr(dev_priv); 5514 if (ret) 5515 return ret; 5516 5517 ret = intel_uc_init_misc(dev_priv); 5518 if (ret) 5519 return ret; 5520 5521 ret = intel_wopcm_init(&dev_priv->wopcm); 5522 if (ret) 5523 goto err_uc_misc; 5524 5525 /* This is just a security blanket to placate dragons. 5526 * On some systems, we very sporadically observe that the first TLBs 5527 * used by the CS may be stale, despite us poking the TLB reset. If 5528 * we hold the forcewake during initialisation these problems 5529 * just magically go away. 5530 */ 5531 mutex_lock(&dev_priv->drm.struct_mutex); 5532 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5533 5534 ret = i915_gem_init_ggtt(dev_priv); 5535 if (ret) { 5536 GEM_BUG_ON(ret == -EIO); 5537 goto err_unlock; 5538 } 5539 5540 ret = i915_gem_contexts_init(dev_priv); 5541 if (ret) { 5542 GEM_BUG_ON(ret == -EIO); 5543 goto err_ggtt; 5544 } 5545 5546 ret = intel_engines_init(dev_priv); 5547 if (ret) { 5548 GEM_BUG_ON(ret == -EIO); 5549 goto err_context; 5550 } 5551 5552 intel_init_gt_powersave(dev_priv); 5553 5554 ret = intel_uc_init(dev_priv); 5555 if (ret) 5556 goto err_pm; 5557 5558 ret = i915_gem_init_hw(dev_priv); 5559 if (ret) 5560 goto err_uc_init; 5561 5562 /* 5563 * Despite its name intel_init_clock_gating applies both display 5564 * clock gating workarounds; GT mmio workarounds and the occasional 5565 * GT power context workaround. Worse, sometimes it includes a context 5566 * register workaround which we need to apply before we record the 5567 * default HW state for all contexts. 5568 * 5569 * FIXME: break up the workarounds and apply them at the right time! 5570 */ 5571 intel_init_clock_gating(dev_priv); 5572 5573 ret = __intel_engines_record_defaults(dev_priv); 5574 if (ret) 5575 goto err_init_hw; 5576 5577 if (i915_inject_load_failure()) { 5578 ret = -ENODEV; 5579 goto err_init_hw; 5580 } 5581 5582 if (i915_inject_load_failure()) { 5583 ret = -EIO; 5584 goto err_init_hw; 5585 } 5586 5587 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5588 mutex_unlock(&dev_priv->drm.struct_mutex); 5589 5590 return 0; 5591 5592 /* 5593 * Unwinding is complicated by that we want to handle -EIO to mean 5594 * disable GPU submission but keep KMS alive. We want to mark the 5595 * HW as irrevisibly wedged, but keep enough state around that the 5596 * driver doesn't explode during runtime. 5597 */ 5598 err_init_hw: 5599 mutex_unlock(&dev_priv->drm.struct_mutex); 5600 5601 WARN_ON(i915_gem_suspend(dev_priv)); 5602 i915_gem_suspend_late(dev_priv); 5603 5604 i915_gem_drain_workqueue(dev_priv); 5605 5606 mutex_lock(&dev_priv->drm.struct_mutex); 5607 intel_uc_fini_hw(dev_priv); 5608 err_uc_init: 5609 intel_uc_fini(dev_priv); 5610 err_pm: 5611 if (ret != -EIO) { 5612 intel_cleanup_gt_powersave(dev_priv); 5613 i915_gem_cleanup_engines(dev_priv); 5614 } 5615 err_context: 5616 if (ret != -EIO) 5617 i915_gem_contexts_fini(dev_priv); 5618 err_ggtt: 5619 err_unlock: 5620 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5621 mutex_unlock(&dev_priv->drm.struct_mutex); 5622 5623 err_uc_misc: 5624 intel_uc_fini_misc(dev_priv); 5625 5626 if (ret != -EIO) 5627 i915_gem_cleanup_userptr(dev_priv); 5628 5629 if (ret == -EIO) { 5630 mutex_lock(&dev_priv->drm.struct_mutex); 5631 5632 /* 5633 * Allow engine initialisation to fail by marking the GPU as 5634 * wedged. But we only want to do this where the GPU is angry, 5635 * for all other failure, such as an allocation failure, bail. 5636 */ 5637 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5638 i915_load_error(dev_priv, 5639 "Failed to initialize GPU, declaring it wedged!\n"); 5640 i915_gem_set_wedged(dev_priv); 5641 } 5642 5643 /* Minimal basic recovery for KMS */ 5644 ret = i915_ggtt_enable_hw(dev_priv); 5645 i915_gem_restore_gtt_mappings(dev_priv); 5646 i915_gem_restore_fences(dev_priv); 5647 intel_init_clock_gating(dev_priv); 5648 5649 mutex_unlock(&dev_priv->drm.struct_mutex); 5650 } 5651 5652 i915_gem_drain_freed_objects(dev_priv); 5653 return ret; 5654 } 5655 5656 void i915_gem_fini(struct drm_i915_private *dev_priv) 5657 { 5658 i915_gem_suspend_late(dev_priv); 5659 intel_disable_gt_powersave(dev_priv); 5660 5661 /* Flush any outstanding unpin_work. */ 5662 i915_gem_drain_workqueue(dev_priv); 5663 5664 mutex_lock(&dev_priv->drm.struct_mutex); 5665 intel_uc_fini_hw(dev_priv); 5666 intel_uc_fini(dev_priv); 5667 i915_gem_cleanup_engines(dev_priv); 5668 i915_gem_contexts_fini(dev_priv); 5669 mutex_unlock(&dev_priv->drm.struct_mutex); 5670 5671 intel_cleanup_gt_powersave(dev_priv); 5672 5673 intel_uc_fini_misc(dev_priv); 5674 i915_gem_cleanup_userptr(dev_priv); 5675 5676 i915_gem_drain_freed_objects(dev_priv); 5677 5678 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5679 } 5680 5681 void i915_gem_init_mmio(struct drm_i915_private *i915) 5682 { 5683 i915_gem_sanitize(i915); 5684 } 5685 5686 void 5687 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5688 { 5689 struct intel_engine_cs *engine; 5690 enum intel_engine_id id; 5691 5692 for_each_engine(engine, dev_priv, id) 5693 dev_priv->gt.cleanup_engine(engine); 5694 } 5695 5696 void 5697 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5698 { 5699 int i; 5700 5701 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5702 !IS_CHERRYVIEW(dev_priv)) 5703 dev_priv->num_fence_regs = 32; 5704 else if (INTEL_GEN(dev_priv) >= 4 || 5705 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5706 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5707 dev_priv->num_fence_regs = 16; 5708 else 5709 dev_priv->num_fence_regs = 8; 5710 5711 if (intel_vgpu_active(dev_priv)) 5712 dev_priv->num_fence_regs = 5713 I915_READ(vgtif_reg(avail_rs.fence_num)); 5714 5715 /* Initialize fence registers to zero */ 5716 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5717 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5718 5719 fence->i915 = dev_priv; 5720 fence->id = i; 5721 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5722 } 5723 i915_gem_restore_fences(dev_priv); 5724 5725 i915_gem_detect_bit_6_swizzle(dev_priv); 5726 } 5727 5728 static void i915_gem_init__mm(struct drm_i915_private *i915) 5729 { 5730 spin_lock_init(&i915->mm.object_stat_lock); 5731 spin_lock_init(&i915->mm.obj_lock); 5732 spin_lock_init(&i915->mm.free_lock); 5733 5734 init_llist_head(&i915->mm.free_list); 5735 5736 INIT_LIST_HEAD(&i915->mm.unbound_list); 5737 INIT_LIST_HEAD(&i915->mm.bound_list); 5738 INIT_LIST_HEAD(&i915->mm.fence_list); 5739 INIT_LIST_HEAD(&i915->mm.userfault_list); 5740 5741 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5742 } 5743 5744 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5745 { 5746 int err = -ENOMEM; 5747 5748 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5749 if (!dev_priv->objects) 5750 goto err_out; 5751 5752 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5753 if (!dev_priv->vmas) 5754 goto err_objects; 5755 5756 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5757 if (!dev_priv->luts) 5758 goto err_vmas; 5759 5760 dev_priv->requests = KMEM_CACHE(i915_request, 5761 SLAB_HWCACHE_ALIGN | 5762 SLAB_RECLAIM_ACCOUNT | 5763 SLAB_TYPESAFE_BY_RCU); 5764 if (!dev_priv->requests) 5765 goto err_luts; 5766 5767 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5768 SLAB_HWCACHE_ALIGN | 5769 SLAB_RECLAIM_ACCOUNT); 5770 if (!dev_priv->dependencies) 5771 goto err_requests; 5772 5773 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5774 if (!dev_priv->priorities) 5775 goto err_dependencies; 5776 5777 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5778 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5779 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5780 5781 i915_gem_init__mm(dev_priv); 5782 5783 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5784 i915_gem_retire_work_handler); 5785 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5786 i915_gem_idle_work_handler); 5787 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5788 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5789 5790 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5791 5792 spin_lock_init(&dev_priv->fb_tracking.lock); 5793 5794 err = i915_gemfs_init(dev_priv); 5795 if (err) 5796 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5797 5798 return 0; 5799 5800 err_dependencies: 5801 kmem_cache_destroy(dev_priv->dependencies); 5802 err_requests: 5803 kmem_cache_destroy(dev_priv->requests); 5804 err_luts: 5805 kmem_cache_destroy(dev_priv->luts); 5806 err_vmas: 5807 kmem_cache_destroy(dev_priv->vmas); 5808 err_objects: 5809 kmem_cache_destroy(dev_priv->objects); 5810 err_out: 5811 return err; 5812 } 5813 5814 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5815 { 5816 i915_gem_drain_freed_objects(dev_priv); 5817 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5818 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5819 WARN_ON(dev_priv->mm.object_count); 5820 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5821 5822 kmem_cache_destroy(dev_priv->priorities); 5823 kmem_cache_destroy(dev_priv->dependencies); 5824 kmem_cache_destroy(dev_priv->requests); 5825 kmem_cache_destroy(dev_priv->luts); 5826 kmem_cache_destroy(dev_priv->vmas); 5827 kmem_cache_destroy(dev_priv->objects); 5828 5829 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5830 rcu_barrier(); 5831 5832 i915_gemfs_fini(dev_priv); 5833 } 5834 5835 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5836 { 5837 /* Discard all purgeable objects, let userspace recover those as 5838 * required after resuming. 5839 */ 5840 i915_gem_shrink_all(dev_priv); 5841 5842 return 0; 5843 } 5844 5845 int i915_gem_freeze_late(struct drm_i915_private *i915) 5846 { 5847 struct drm_i915_gem_object *obj; 5848 struct list_head *phases[] = { 5849 &i915->mm.unbound_list, 5850 &i915->mm.bound_list, 5851 NULL 5852 }, **phase; 5853 5854 /* 5855 * Called just before we write the hibernation image. 5856 * 5857 * We need to update the domain tracking to reflect that the CPU 5858 * will be accessing all the pages to create and restore from the 5859 * hibernation, and so upon restoration those pages will be in the 5860 * CPU domain. 5861 * 5862 * To make sure the hibernation image contains the latest state, 5863 * we update that state just before writing out the image. 5864 * 5865 * To try and reduce the hibernation image, we manually shrink 5866 * the objects as well, see i915_gem_freeze() 5867 */ 5868 5869 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5870 i915_gem_drain_freed_objects(i915); 5871 5872 mutex_lock(&i915->drm.struct_mutex); 5873 for (phase = phases; *phase; phase++) { 5874 list_for_each_entry(obj, *phase, mm.link) 5875 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5876 } 5877 mutex_unlock(&i915->drm.struct_mutex); 5878 5879 return 0; 5880 } 5881 5882 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5883 { 5884 struct drm_i915_file_private *file_priv = file->driver_priv; 5885 struct i915_request *request; 5886 5887 /* Clean up our request list when the client is going away, so that 5888 * later retire_requests won't dereference our soon-to-be-gone 5889 * file_priv. 5890 */ 5891 spin_lock(&file_priv->mm.lock); 5892 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5893 request->file_priv = NULL; 5894 spin_unlock(&file_priv->mm.lock); 5895 } 5896 5897 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5898 { 5899 struct drm_i915_file_private *file_priv; 5900 int ret; 5901 5902 DRM_DEBUG("\n"); 5903 5904 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5905 if (!file_priv) 5906 return -ENOMEM; 5907 5908 file->driver_priv = file_priv; 5909 file_priv->dev_priv = i915; 5910 file_priv->file = file; 5911 5912 spin_lock_init(&file_priv->mm.lock); 5913 INIT_LIST_HEAD(&file_priv->mm.request_list); 5914 5915 file_priv->bsd_engine = -1; 5916 file_priv->hang_timestamp = jiffies; 5917 5918 ret = i915_gem_context_open(i915, file); 5919 if (ret) 5920 kfree(file_priv); 5921 5922 return ret; 5923 } 5924 5925 /** 5926 * i915_gem_track_fb - update frontbuffer tracking 5927 * @old: current GEM buffer for the frontbuffer slots 5928 * @new: new GEM buffer for the frontbuffer slots 5929 * @frontbuffer_bits: bitmask of frontbuffer slots 5930 * 5931 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5932 * from @old and setting them in @new. Both @old and @new can be NULL. 5933 */ 5934 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5935 struct drm_i915_gem_object *new, 5936 unsigned frontbuffer_bits) 5937 { 5938 /* Control of individual bits within the mask are guarded by 5939 * the owning plane->mutex, i.e. we can never see concurrent 5940 * manipulation of individual bits. But since the bitfield as a whole 5941 * is updated using RMW, we need to use atomics in order to update 5942 * the bits. 5943 */ 5944 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5945 BITS_PER_TYPE(atomic_t)); 5946 5947 if (old) { 5948 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5949 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5950 } 5951 5952 if (new) { 5953 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5954 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5955 } 5956 } 5957 5958 /* Allocate a new GEM object and fill it with the supplied data */ 5959 struct drm_i915_gem_object * 5960 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5961 const void *data, size_t size) 5962 { 5963 struct drm_i915_gem_object *obj; 5964 struct file *file; 5965 size_t offset; 5966 int err; 5967 5968 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5969 if (IS_ERR(obj)) 5970 return obj; 5971 5972 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5973 5974 file = obj->base.filp; 5975 offset = 0; 5976 do { 5977 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5978 struct page *page; 5979 void *pgdata, *vaddr; 5980 5981 err = pagecache_write_begin(file, file->f_mapping, 5982 offset, len, 0, 5983 &page, &pgdata); 5984 if (err < 0) 5985 goto fail; 5986 5987 vaddr = kmap(page); 5988 memcpy(vaddr, data, len); 5989 kunmap(page); 5990 5991 err = pagecache_write_end(file, file->f_mapping, 5992 offset, len, len, 5993 page, pgdata); 5994 if (err < 0) 5995 goto fail; 5996 5997 size -= len; 5998 data += len; 5999 offset += len; 6000 } while (size); 6001 6002 return obj; 6003 6004 fail: 6005 i915_gem_object_put(obj); 6006 return ERR_PTR(err); 6007 } 6008 6009 struct scatterlist * 6010 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6011 unsigned int n, 6012 unsigned int *offset) 6013 { 6014 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6015 struct scatterlist *sg; 6016 unsigned int idx, count; 6017 6018 might_sleep(); 6019 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6020 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6021 6022 /* As we iterate forward through the sg, we record each entry in a 6023 * radixtree for quick repeated (backwards) lookups. If we have seen 6024 * this index previously, we will have an entry for it. 6025 * 6026 * Initial lookup is O(N), but this is amortized to O(1) for 6027 * sequential page access (where each new request is consecutive 6028 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6029 * i.e. O(1) with a large constant! 6030 */ 6031 if (n < READ_ONCE(iter->sg_idx)) 6032 goto lookup; 6033 6034 mutex_lock(&iter->lock); 6035 6036 /* We prefer to reuse the last sg so that repeated lookup of this 6037 * (or the subsequent) sg are fast - comparing against the last 6038 * sg is faster than going through the radixtree. 6039 */ 6040 6041 sg = iter->sg_pos; 6042 idx = iter->sg_idx; 6043 count = __sg_page_count(sg); 6044 6045 while (idx + count <= n) { 6046 unsigned long exception, i; 6047 int ret; 6048 6049 /* If we cannot allocate and insert this entry, or the 6050 * individual pages from this range, cancel updating the 6051 * sg_idx so that on this lookup we are forced to linearly 6052 * scan onwards, but on future lookups we will try the 6053 * insertion again (in which case we need to be careful of 6054 * the error return reporting that we have already inserted 6055 * this index). 6056 */ 6057 ret = radix_tree_insert(&iter->radix, idx, sg); 6058 if (ret && ret != -EEXIST) 6059 goto scan; 6060 6061 exception = 6062 RADIX_TREE_EXCEPTIONAL_ENTRY | 6063 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 6064 for (i = 1; i < count; i++) { 6065 ret = radix_tree_insert(&iter->radix, idx + i, 6066 (void *)exception); 6067 if (ret && ret != -EEXIST) 6068 goto scan; 6069 } 6070 6071 idx += count; 6072 sg = ____sg_next(sg); 6073 count = __sg_page_count(sg); 6074 } 6075 6076 scan: 6077 iter->sg_pos = sg; 6078 iter->sg_idx = idx; 6079 6080 mutex_unlock(&iter->lock); 6081 6082 if (unlikely(n < idx)) /* insertion completed by another thread */ 6083 goto lookup; 6084 6085 /* In case we failed to insert the entry into the radixtree, we need 6086 * to look beyond the current sg. 6087 */ 6088 while (idx + count <= n) { 6089 idx += count; 6090 sg = ____sg_next(sg); 6091 count = __sg_page_count(sg); 6092 } 6093 6094 *offset = n - idx; 6095 return sg; 6096 6097 lookup: 6098 rcu_read_lock(); 6099 6100 sg = radix_tree_lookup(&iter->radix, n); 6101 GEM_BUG_ON(!sg); 6102 6103 /* If this index is in the middle of multi-page sg entry, 6104 * the radixtree will contain an exceptional entry that points 6105 * to the start of that range. We will return the pointer to 6106 * the base page and the offset of this page within the 6107 * sg entry's range. 6108 */ 6109 *offset = 0; 6110 if (unlikely(radix_tree_exception(sg))) { 6111 unsigned long base = 6112 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 6113 6114 sg = radix_tree_lookup(&iter->radix, base); 6115 GEM_BUG_ON(!sg); 6116 6117 *offset = n - base; 6118 } 6119 6120 rcu_read_unlock(); 6121 6122 return sg; 6123 } 6124 6125 struct page * 6126 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6127 { 6128 struct scatterlist *sg; 6129 unsigned int offset; 6130 6131 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6132 6133 sg = i915_gem_object_get_sg(obj, n, &offset); 6134 return nth_page(sg_page(sg), offset); 6135 } 6136 6137 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6138 struct page * 6139 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6140 unsigned int n) 6141 { 6142 struct page *page; 6143 6144 page = i915_gem_object_get_page(obj, n); 6145 if (!obj->mm.dirty) 6146 set_page_dirty(page); 6147 6148 return page; 6149 } 6150 6151 dma_addr_t 6152 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6153 unsigned long n) 6154 { 6155 struct scatterlist *sg; 6156 unsigned int offset; 6157 6158 sg = i915_gem_object_get_sg(obj, n, &offset); 6159 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6160 } 6161 6162 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6163 { 6164 struct sg_table *pages; 6165 int err; 6166 6167 if (align > obj->base.size) 6168 return -EINVAL; 6169 6170 if (obj->ops == &i915_gem_phys_ops) 6171 return 0; 6172 6173 if (obj->ops != &i915_gem_object_ops) 6174 return -EINVAL; 6175 6176 err = i915_gem_object_unbind(obj); 6177 if (err) 6178 return err; 6179 6180 mutex_lock(&obj->mm.lock); 6181 6182 if (obj->mm.madv != I915_MADV_WILLNEED) { 6183 err = -EFAULT; 6184 goto err_unlock; 6185 } 6186 6187 if (obj->mm.quirked) { 6188 err = -EFAULT; 6189 goto err_unlock; 6190 } 6191 6192 if (obj->mm.mapping) { 6193 err = -EBUSY; 6194 goto err_unlock; 6195 } 6196 6197 pages = __i915_gem_object_unset_pages(obj); 6198 6199 obj->ops = &i915_gem_phys_ops; 6200 6201 err = ____i915_gem_object_get_pages(obj); 6202 if (err) 6203 goto err_xfer; 6204 6205 /* Perma-pin (until release) the physical set of pages */ 6206 __i915_gem_object_pin_pages(obj); 6207 6208 if (!IS_ERR_OR_NULL(pages)) 6209 i915_gem_object_ops.put_pages(obj, pages); 6210 mutex_unlock(&obj->mm.lock); 6211 return 0; 6212 6213 err_xfer: 6214 obj->ops = &i915_gem_object_ops; 6215 if (!IS_ERR_OR_NULL(pages)) { 6216 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6217 6218 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6219 } 6220 err_unlock: 6221 mutex_unlock(&obj->mm.lock); 6222 return err; 6223 } 6224 6225 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6226 #include "selftests/scatterlist.c" 6227 #include "selftests/mock_gem_device.c" 6228 #include "selftests/huge_gem_object.c" 6229 #include "selftests/huge_pages.c" 6230 #include "selftests/i915_gem_object.c" 6231 #include "selftests/i915_gem_coherency.c" 6232 #include "selftests/i915_gem.c" 6233 #endif 6234