1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 wmb(); 806 807 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 808 return; 809 810 i915_gem_chipset_flush(dev_priv); 811 812 intel_runtime_pm_get(dev_priv); 813 spin_lock_irq(&dev_priv->uncore.lock); 814 815 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 816 817 spin_unlock_irq(&dev_priv->uncore.lock); 818 intel_runtime_pm_put(dev_priv); 819 } 820 821 static void 822 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 823 { 824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 825 struct i915_vma *vma; 826 827 if (!(obj->write_domain & flush_domains)) 828 return; 829 830 switch (obj->write_domain) { 831 case I915_GEM_DOMAIN_GTT: 832 i915_gem_flush_ggtt_writes(dev_priv); 833 834 intel_fb_obj_flush(obj, 835 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 836 837 for_each_ggtt_vma(vma, obj) { 838 if (vma->iomap) 839 continue; 840 841 i915_vma_unset_ggtt_write(vma); 842 } 843 break; 844 845 case I915_GEM_DOMAIN_WC: 846 wmb(); 847 break; 848 849 case I915_GEM_DOMAIN_CPU: 850 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 851 break; 852 853 case I915_GEM_DOMAIN_RENDER: 854 if (gpu_write_needs_clflush(obj)) 855 obj->cache_dirty = true; 856 break; 857 } 858 859 obj->write_domain = 0; 860 } 861 862 static inline int 863 __copy_to_user_swizzled(char __user *cpu_vaddr, 864 const char *gpu_vaddr, int gpu_offset, 865 int length) 866 { 867 int ret, cpu_offset = 0; 868 869 while (length > 0) { 870 int cacheline_end = ALIGN(gpu_offset + 1, 64); 871 int this_length = min(cacheline_end - gpu_offset, length); 872 int swizzled_gpu_offset = gpu_offset ^ 64; 873 874 ret = __copy_to_user(cpu_vaddr + cpu_offset, 875 gpu_vaddr + swizzled_gpu_offset, 876 this_length); 877 if (ret) 878 return ret + length; 879 880 cpu_offset += this_length; 881 gpu_offset += this_length; 882 length -= this_length; 883 } 884 885 return 0; 886 } 887 888 static inline int 889 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 890 const char __user *cpu_vaddr, 891 int length) 892 { 893 int ret, cpu_offset = 0; 894 895 while (length > 0) { 896 int cacheline_end = ALIGN(gpu_offset + 1, 64); 897 int this_length = min(cacheline_end - gpu_offset, length); 898 int swizzled_gpu_offset = gpu_offset ^ 64; 899 900 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 901 cpu_vaddr + cpu_offset, 902 this_length); 903 if (ret) 904 return ret + length; 905 906 cpu_offset += this_length; 907 gpu_offset += this_length; 908 length -= this_length; 909 } 910 911 return 0; 912 } 913 914 /* 915 * Pins the specified object's pages and synchronizes the object with 916 * GPU accesses. Sets needs_clflush to non-zero if the caller should 917 * flush the object from the CPU cache. 918 */ 919 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 920 unsigned int *needs_clflush) 921 { 922 int ret; 923 924 lockdep_assert_held(&obj->base.dev->struct_mutex); 925 926 *needs_clflush = 0; 927 if (!i915_gem_object_has_struct_page(obj)) 928 return -ENODEV; 929 930 ret = i915_gem_object_wait(obj, 931 I915_WAIT_INTERRUPTIBLE | 932 I915_WAIT_LOCKED, 933 MAX_SCHEDULE_TIMEOUT, 934 NULL); 935 if (ret) 936 return ret; 937 938 ret = i915_gem_object_pin_pages(obj); 939 if (ret) 940 return ret; 941 942 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 943 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 944 ret = i915_gem_object_set_to_cpu_domain(obj, false); 945 if (ret) 946 goto err_unpin; 947 else 948 goto out; 949 } 950 951 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 952 953 /* If we're not in the cpu read domain, set ourself into the gtt 954 * read domain and manually flush cachelines (if required). This 955 * optimizes for the case when the gpu will dirty the data 956 * anyway again before the next pread happens. 957 */ 958 if (!obj->cache_dirty && 959 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 960 *needs_clflush = CLFLUSH_BEFORE; 961 962 out: 963 /* return with the pages pinned */ 964 return 0; 965 966 err_unpin: 967 i915_gem_object_unpin_pages(obj); 968 return ret; 969 } 970 971 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 972 unsigned int *needs_clflush) 973 { 974 int ret; 975 976 lockdep_assert_held(&obj->base.dev->struct_mutex); 977 978 *needs_clflush = 0; 979 if (!i915_gem_object_has_struct_page(obj)) 980 return -ENODEV; 981 982 ret = i915_gem_object_wait(obj, 983 I915_WAIT_INTERRUPTIBLE | 984 I915_WAIT_LOCKED | 985 I915_WAIT_ALL, 986 MAX_SCHEDULE_TIMEOUT, 987 NULL); 988 if (ret) 989 return ret; 990 991 ret = i915_gem_object_pin_pages(obj); 992 if (ret) 993 return ret; 994 995 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 996 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 997 ret = i915_gem_object_set_to_cpu_domain(obj, true); 998 if (ret) 999 goto err_unpin; 1000 else 1001 goto out; 1002 } 1003 1004 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1005 1006 /* If we're not in the cpu write domain, set ourself into the 1007 * gtt write domain and manually flush cachelines (as required). 1008 * This optimizes for the case when the gpu will use the data 1009 * right away and we therefore have to clflush anyway. 1010 */ 1011 if (!obj->cache_dirty) { 1012 *needs_clflush |= CLFLUSH_AFTER; 1013 1014 /* 1015 * Same trick applies to invalidate partially written 1016 * cachelines read before writing. 1017 */ 1018 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1019 *needs_clflush |= CLFLUSH_BEFORE; 1020 } 1021 1022 out: 1023 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1024 obj->mm.dirty = true; 1025 /* return with the pages pinned */ 1026 return 0; 1027 1028 err_unpin: 1029 i915_gem_object_unpin_pages(obj); 1030 return ret; 1031 } 1032 1033 static void 1034 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1035 bool swizzled) 1036 { 1037 if (unlikely(swizzled)) { 1038 unsigned long start = (unsigned long) addr; 1039 unsigned long end = (unsigned long) addr + length; 1040 1041 /* For swizzling simply ensure that we always flush both 1042 * channels. Lame, but simple and it works. Swizzled 1043 * pwrite/pread is far from a hotpath - current userspace 1044 * doesn't use it at all. */ 1045 start = round_down(start, 128); 1046 end = round_up(end, 128); 1047 1048 drm_clflush_virt_range((void *)start, end - start); 1049 } else { 1050 drm_clflush_virt_range(addr, length); 1051 } 1052 1053 } 1054 1055 /* Only difference to the fast-path function is that this can handle bit17 1056 * and uses non-atomic copy and kmap functions. */ 1057 static int 1058 shmem_pread_slow(struct page *page, int offset, int length, 1059 char __user *user_data, 1060 bool page_do_bit17_swizzling, bool needs_clflush) 1061 { 1062 char *vaddr; 1063 int ret; 1064 1065 vaddr = kmap(page); 1066 if (needs_clflush) 1067 shmem_clflush_swizzled_range(vaddr + offset, length, 1068 page_do_bit17_swizzling); 1069 1070 if (page_do_bit17_swizzling) 1071 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1072 else 1073 ret = __copy_to_user(user_data, vaddr + offset, length); 1074 kunmap(page); 1075 1076 return ret ? - EFAULT : 0; 1077 } 1078 1079 static int 1080 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1081 bool page_do_bit17_swizzling, bool needs_clflush) 1082 { 1083 int ret; 1084 1085 ret = -ENODEV; 1086 if (!page_do_bit17_swizzling) { 1087 char *vaddr = kmap_atomic(page); 1088 1089 if (needs_clflush) 1090 drm_clflush_virt_range(vaddr + offset, length); 1091 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1092 kunmap_atomic(vaddr); 1093 } 1094 if (ret == 0) 1095 return 0; 1096 1097 return shmem_pread_slow(page, offset, length, user_data, 1098 page_do_bit17_swizzling, needs_clflush); 1099 } 1100 1101 static int 1102 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1103 struct drm_i915_gem_pread *args) 1104 { 1105 char __user *user_data; 1106 u64 remain; 1107 unsigned int obj_do_bit17_swizzling; 1108 unsigned int needs_clflush; 1109 unsigned int idx, offset; 1110 int ret; 1111 1112 obj_do_bit17_swizzling = 0; 1113 if (i915_gem_object_needs_bit17_swizzle(obj)) 1114 obj_do_bit17_swizzling = BIT(17); 1115 1116 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1121 mutex_unlock(&obj->base.dev->struct_mutex); 1122 if (ret) 1123 return ret; 1124 1125 remain = args->size; 1126 user_data = u64_to_user_ptr(args->data_ptr); 1127 offset = offset_in_page(args->offset); 1128 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1129 struct page *page = i915_gem_object_get_page(obj, idx); 1130 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1131 1132 ret = shmem_pread(page, offset, length, user_data, 1133 page_to_phys(page) & obj_do_bit17_swizzling, 1134 needs_clflush); 1135 if (ret) 1136 break; 1137 1138 remain -= length; 1139 user_data += length; 1140 offset = 0; 1141 } 1142 1143 i915_gem_obj_finish_shmem_access(obj); 1144 return ret; 1145 } 1146 1147 static inline bool 1148 gtt_user_read(struct io_mapping *mapping, 1149 loff_t base, int offset, 1150 char __user *user_data, int length) 1151 { 1152 void __iomem *vaddr; 1153 unsigned long unwritten; 1154 1155 /* We can use the cpu mem copy function because this is X86. */ 1156 vaddr = io_mapping_map_atomic_wc(mapping, base); 1157 unwritten = __copy_to_user_inatomic(user_data, 1158 (void __force *)vaddr + offset, 1159 length); 1160 io_mapping_unmap_atomic(vaddr); 1161 if (unwritten) { 1162 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1163 unwritten = copy_to_user(user_data, 1164 (void __force *)vaddr + offset, 1165 length); 1166 io_mapping_unmap(vaddr); 1167 } 1168 return unwritten; 1169 } 1170 1171 static int 1172 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1173 const struct drm_i915_gem_pread *args) 1174 { 1175 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1176 struct i915_ggtt *ggtt = &i915->ggtt; 1177 struct drm_mm_node node; 1178 struct i915_vma *vma; 1179 void __user *user_data; 1180 u64 remain, offset; 1181 int ret; 1182 1183 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1184 if (ret) 1185 return ret; 1186 1187 intel_runtime_pm_get(i915); 1188 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1189 PIN_MAPPABLE | 1190 PIN_NONFAULT | 1191 PIN_NONBLOCK); 1192 if (!IS_ERR(vma)) { 1193 node.start = i915_ggtt_offset(vma); 1194 node.allocated = false; 1195 ret = i915_vma_put_fence(vma); 1196 if (ret) { 1197 i915_vma_unpin(vma); 1198 vma = ERR_PTR(ret); 1199 } 1200 } 1201 if (IS_ERR(vma)) { 1202 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1203 if (ret) 1204 goto out_unlock; 1205 GEM_BUG_ON(!node.allocated); 1206 } 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1209 if (ret) 1210 goto out_unpin; 1211 1212 mutex_unlock(&i915->drm.struct_mutex); 1213 1214 user_data = u64_to_user_ptr(args->data_ptr); 1215 remain = args->size; 1216 offset = args->offset; 1217 1218 while (remain > 0) { 1219 /* Operation in this page 1220 * 1221 * page_base = page offset within aperture 1222 * page_offset = offset within page 1223 * page_length = bytes to copy for this page 1224 */ 1225 u32 page_base = node.start; 1226 unsigned page_offset = offset_in_page(offset); 1227 unsigned page_length = PAGE_SIZE - page_offset; 1228 page_length = remain < page_length ? remain : page_length; 1229 if (node.allocated) { 1230 wmb(); 1231 ggtt->vm.insert_page(&ggtt->vm, 1232 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1233 node.start, I915_CACHE_NONE, 0); 1234 wmb(); 1235 } else { 1236 page_base += offset & PAGE_MASK; 1237 } 1238 1239 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1240 user_data, page_length)) { 1241 ret = -EFAULT; 1242 break; 1243 } 1244 1245 remain -= page_length; 1246 user_data += page_length; 1247 offset += page_length; 1248 } 1249 1250 mutex_lock(&i915->drm.struct_mutex); 1251 out_unpin: 1252 if (node.allocated) { 1253 wmb(); 1254 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1255 remove_mappable_node(&node); 1256 } else { 1257 i915_vma_unpin(vma); 1258 } 1259 out_unlock: 1260 intel_runtime_pm_put(i915); 1261 mutex_unlock(&i915->drm.struct_mutex); 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * Reads data from the object referenced by handle. 1268 * @dev: drm device pointer 1269 * @data: ioctl data blob 1270 * @file: drm file pointer 1271 * 1272 * On error, the contents of *data are undefined. 1273 */ 1274 int 1275 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1276 struct drm_file *file) 1277 { 1278 struct drm_i915_gem_pread *args = data; 1279 struct drm_i915_gem_object *obj; 1280 int ret; 1281 1282 if (args->size == 0) 1283 return 0; 1284 1285 if (!access_ok(u64_to_user_ptr(args->data_ptr), 1286 args->size)) 1287 return -EFAULT; 1288 1289 obj = i915_gem_object_lookup(file, args->handle); 1290 if (!obj) 1291 return -ENOENT; 1292 1293 /* Bounds check source. */ 1294 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 1299 trace_i915_gem_object_pread(obj, args->offset, args->size); 1300 1301 ret = i915_gem_object_wait(obj, 1302 I915_WAIT_INTERRUPTIBLE, 1303 MAX_SCHEDULE_TIMEOUT, 1304 to_rps_client(file)); 1305 if (ret) 1306 goto out; 1307 1308 ret = i915_gem_object_pin_pages(obj); 1309 if (ret) 1310 goto out; 1311 1312 ret = i915_gem_shmem_pread(obj, args); 1313 if (ret == -EFAULT || ret == -ENODEV) 1314 ret = i915_gem_gtt_pread(obj, args); 1315 1316 i915_gem_object_unpin_pages(obj); 1317 out: 1318 i915_gem_object_put(obj); 1319 return ret; 1320 } 1321 1322 /* This is the fast write path which cannot handle 1323 * page faults in the source data 1324 */ 1325 1326 static inline bool 1327 ggtt_write(struct io_mapping *mapping, 1328 loff_t base, int offset, 1329 char __user *user_data, int length) 1330 { 1331 void __iomem *vaddr; 1332 unsigned long unwritten; 1333 1334 /* We can use the cpu mem copy function because this is X86. */ 1335 vaddr = io_mapping_map_atomic_wc(mapping, base); 1336 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1337 user_data, length); 1338 io_mapping_unmap_atomic(vaddr); 1339 if (unwritten) { 1340 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1341 unwritten = copy_from_user((void __force *)vaddr + offset, 1342 user_data, length); 1343 io_mapping_unmap(vaddr); 1344 } 1345 1346 return unwritten; 1347 } 1348 1349 /** 1350 * This is the fast pwrite path, where we copy the data directly from the 1351 * user into the GTT, uncached. 1352 * @obj: i915 GEM object 1353 * @args: pwrite arguments structure 1354 */ 1355 static int 1356 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1357 const struct drm_i915_gem_pwrite *args) 1358 { 1359 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1360 struct i915_ggtt *ggtt = &i915->ggtt; 1361 struct drm_mm_node node; 1362 struct i915_vma *vma; 1363 u64 remain, offset; 1364 void __user *user_data; 1365 int ret; 1366 1367 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1368 if (ret) 1369 return ret; 1370 1371 if (i915_gem_object_has_struct_page(obj)) { 1372 /* 1373 * Avoid waking the device up if we can fallback, as 1374 * waking/resuming is very slow (worst-case 10-100 ms 1375 * depending on PCI sleeps and our own resume time). 1376 * This easily dwarfs any performance advantage from 1377 * using the cache bypass of indirect GGTT access. 1378 */ 1379 if (!intel_runtime_pm_get_if_in_use(i915)) { 1380 ret = -EFAULT; 1381 goto out_unlock; 1382 } 1383 } else { 1384 /* No backing pages, no fallback, we must force GGTT access */ 1385 intel_runtime_pm_get(i915); 1386 } 1387 1388 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1389 PIN_MAPPABLE | 1390 PIN_NONFAULT | 1391 PIN_NONBLOCK); 1392 if (!IS_ERR(vma)) { 1393 node.start = i915_ggtt_offset(vma); 1394 node.allocated = false; 1395 ret = i915_vma_put_fence(vma); 1396 if (ret) { 1397 i915_vma_unpin(vma); 1398 vma = ERR_PTR(ret); 1399 } 1400 } 1401 if (IS_ERR(vma)) { 1402 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1403 if (ret) 1404 goto out_rpm; 1405 GEM_BUG_ON(!node.allocated); 1406 } 1407 1408 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1409 if (ret) 1410 goto out_unpin; 1411 1412 mutex_unlock(&i915->drm.struct_mutex); 1413 1414 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1415 1416 user_data = u64_to_user_ptr(args->data_ptr); 1417 offset = args->offset; 1418 remain = args->size; 1419 while (remain) { 1420 /* Operation in this page 1421 * 1422 * page_base = page offset within aperture 1423 * page_offset = offset within page 1424 * page_length = bytes to copy for this page 1425 */ 1426 u32 page_base = node.start; 1427 unsigned int page_offset = offset_in_page(offset); 1428 unsigned int page_length = PAGE_SIZE - page_offset; 1429 page_length = remain < page_length ? remain : page_length; 1430 if (node.allocated) { 1431 wmb(); /* flush the write before we modify the GGTT */ 1432 ggtt->vm.insert_page(&ggtt->vm, 1433 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1434 node.start, I915_CACHE_NONE, 0); 1435 wmb(); /* flush modifications to the GGTT (insert_page) */ 1436 } else { 1437 page_base += offset & PAGE_MASK; 1438 } 1439 /* If we get a fault while copying data, then (presumably) our 1440 * source page isn't available. Return the error and we'll 1441 * retry in the slow path. 1442 * If the object is non-shmem backed, we retry again with the 1443 * path that handles page fault. 1444 */ 1445 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1446 user_data, page_length)) { 1447 ret = -EFAULT; 1448 break; 1449 } 1450 1451 remain -= page_length; 1452 user_data += page_length; 1453 offset += page_length; 1454 } 1455 intel_fb_obj_flush(obj, ORIGIN_CPU); 1456 1457 mutex_lock(&i915->drm.struct_mutex); 1458 out_unpin: 1459 if (node.allocated) { 1460 wmb(); 1461 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1462 remove_mappable_node(&node); 1463 } else { 1464 i915_vma_unpin(vma); 1465 } 1466 out_rpm: 1467 intel_runtime_pm_put(i915); 1468 out_unlock: 1469 mutex_unlock(&i915->drm.struct_mutex); 1470 return ret; 1471 } 1472 1473 static int 1474 shmem_pwrite_slow(struct page *page, int offset, int length, 1475 char __user *user_data, 1476 bool page_do_bit17_swizzling, 1477 bool needs_clflush_before, 1478 bool needs_clflush_after) 1479 { 1480 char *vaddr; 1481 int ret; 1482 1483 vaddr = kmap(page); 1484 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1485 shmem_clflush_swizzled_range(vaddr + offset, length, 1486 page_do_bit17_swizzling); 1487 if (page_do_bit17_swizzling) 1488 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1489 length); 1490 else 1491 ret = __copy_from_user(vaddr + offset, user_data, length); 1492 if (needs_clflush_after) 1493 shmem_clflush_swizzled_range(vaddr + offset, length, 1494 page_do_bit17_swizzling); 1495 kunmap(page); 1496 1497 return ret ? -EFAULT : 0; 1498 } 1499 1500 /* Per-page copy function for the shmem pwrite fastpath. 1501 * Flushes invalid cachelines before writing to the target if 1502 * needs_clflush_before is set and flushes out any written cachelines after 1503 * writing if needs_clflush is set. 1504 */ 1505 static int 1506 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1507 bool page_do_bit17_swizzling, 1508 bool needs_clflush_before, 1509 bool needs_clflush_after) 1510 { 1511 int ret; 1512 1513 ret = -ENODEV; 1514 if (!page_do_bit17_swizzling) { 1515 char *vaddr = kmap_atomic(page); 1516 1517 if (needs_clflush_before) 1518 drm_clflush_virt_range(vaddr + offset, len); 1519 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1520 if (needs_clflush_after) 1521 drm_clflush_virt_range(vaddr + offset, len); 1522 1523 kunmap_atomic(vaddr); 1524 } 1525 if (ret == 0) 1526 return ret; 1527 1528 return shmem_pwrite_slow(page, offset, len, user_data, 1529 page_do_bit17_swizzling, 1530 needs_clflush_before, 1531 needs_clflush_after); 1532 } 1533 1534 static int 1535 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1536 const struct drm_i915_gem_pwrite *args) 1537 { 1538 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1539 void __user *user_data; 1540 u64 remain; 1541 unsigned int obj_do_bit17_swizzling; 1542 unsigned int partial_cacheline_write; 1543 unsigned int needs_clflush; 1544 unsigned int offset, idx; 1545 int ret; 1546 1547 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1548 if (ret) 1549 return ret; 1550 1551 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1552 mutex_unlock(&i915->drm.struct_mutex); 1553 if (ret) 1554 return ret; 1555 1556 obj_do_bit17_swizzling = 0; 1557 if (i915_gem_object_needs_bit17_swizzle(obj)) 1558 obj_do_bit17_swizzling = BIT(17); 1559 1560 /* If we don't overwrite a cacheline completely we need to be 1561 * careful to have up-to-date data by first clflushing. Don't 1562 * overcomplicate things and flush the entire patch. 1563 */ 1564 partial_cacheline_write = 0; 1565 if (needs_clflush & CLFLUSH_BEFORE) 1566 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1567 1568 user_data = u64_to_user_ptr(args->data_ptr); 1569 remain = args->size; 1570 offset = offset_in_page(args->offset); 1571 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1572 struct page *page = i915_gem_object_get_page(obj, idx); 1573 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1574 1575 ret = shmem_pwrite(page, offset, length, user_data, 1576 page_to_phys(page) & obj_do_bit17_swizzling, 1577 (offset | length) & partial_cacheline_write, 1578 needs_clflush & CLFLUSH_AFTER); 1579 if (ret) 1580 break; 1581 1582 remain -= length; 1583 user_data += length; 1584 offset = 0; 1585 } 1586 1587 intel_fb_obj_flush(obj, ORIGIN_CPU); 1588 i915_gem_obj_finish_shmem_access(obj); 1589 return ret; 1590 } 1591 1592 /** 1593 * Writes data to the object referenced by handle. 1594 * @dev: drm device 1595 * @data: ioctl data blob 1596 * @file: drm file 1597 * 1598 * On error, the contents of the buffer that were to be modified are undefined. 1599 */ 1600 int 1601 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1602 struct drm_file *file) 1603 { 1604 struct drm_i915_gem_pwrite *args = data; 1605 struct drm_i915_gem_object *obj; 1606 int ret; 1607 1608 if (args->size == 0) 1609 return 0; 1610 1611 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 1612 return -EFAULT; 1613 1614 obj = i915_gem_object_lookup(file, args->handle); 1615 if (!obj) 1616 return -ENOENT; 1617 1618 /* Bounds check destination. */ 1619 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1620 ret = -EINVAL; 1621 goto err; 1622 } 1623 1624 /* Writes not allowed into this read-only object */ 1625 if (i915_gem_object_is_readonly(obj)) { 1626 ret = -EINVAL; 1627 goto err; 1628 } 1629 1630 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1631 1632 ret = -ENODEV; 1633 if (obj->ops->pwrite) 1634 ret = obj->ops->pwrite(obj, args); 1635 if (ret != -ENODEV) 1636 goto err; 1637 1638 ret = i915_gem_object_wait(obj, 1639 I915_WAIT_INTERRUPTIBLE | 1640 I915_WAIT_ALL, 1641 MAX_SCHEDULE_TIMEOUT, 1642 to_rps_client(file)); 1643 if (ret) 1644 goto err; 1645 1646 ret = i915_gem_object_pin_pages(obj); 1647 if (ret) 1648 goto err; 1649 1650 ret = -EFAULT; 1651 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1652 * it would end up going through the fenced access, and we'll get 1653 * different detiling behavior between reading and writing. 1654 * pread/pwrite currently are reading and writing from the CPU 1655 * perspective, requiring manual detiling by the client. 1656 */ 1657 if (!i915_gem_object_has_struct_page(obj) || 1658 cpu_write_needs_clflush(obj)) 1659 /* Note that the gtt paths might fail with non-page-backed user 1660 * pointers (e.g. gtt mappings when moving data between 1661 * textures). Fallback to the shmem path in that case. 1662 */ 1663 ret = i915_gem_gtt_pwrite_fast(obj, args); 1664 1665 if (ret == -EFAULT || ret == -ENOSPC) { 1666 if (obj->phys_handle) 1667 ret = i915_gem_phys_pwrite(obj, args, file); 1668 else 1669 ret = i915_gem_shmem_pwrite(obj, args); 1670 } 1671 1672 i915_gem_object_unpin_pages(obj); 1673 err: 1674 i915_gem_object_put(obj); 1675 return ret; 1676 } 1677 1678 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1679 { 1680 struct drm_i915_private *i915; 1681 struct list_head *list; 1682 struct i915_vma *vma; 1683 1684 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1685 1686 for_each_ggtt_vma(vma, obj) { 1687 if (i915_vma_is_active(vma)) 1688 continue; 1689 1690 if (!drm_mm_node_allocated(&vma->node)) 1691 continue; 1692 1693 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1694 } 1695 1696 i915 = to_i915(obj->base.dev); 1697 spin_lock(&i915->mm.obj_lock); 1698 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1699 list_move_tail(&obj->mm.link, list); 1700 spin_unlock(&i915->mm.obj_lock); 1701 } 1702 1703 /** 1704 * Called when user space prepares to use an object with the CPU, either 1705 * through the mmap ioctl's mapping or a GTT mapping. 1706 * @dev: drm device 1707 * @data: ioctl data blob 1708 * @file: drm file 1709 */ 1710 int 1711 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1712 struct drm_file *file) 1713 { 1714 struct drm_i915_gem_set_domain *args = data; 1715 struct drm_i915_gem_object *obj; 1716 uint32_t read_domains = args->read_domains; 1717 uint32_t write_domain = args->write_domain; 1718 int err; 1719 1720 /* Only handle setting domains to types used by the CPU. */ 1721 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1722 return -EINVAL; 1723 1724 /* Having something in the write domain implies it's in the read 1725 * domain, and only that read domain. Enforce that in the request. 1726 */ 1727 if (write_domain != 0 && read_domains != write_domain) 1728 return -EINVAL; 1729 1730 obj = i915_gem_object_lookup(file, args->handle); 1731 if (!obj) 1732 return -ENOENT; 1733 1734 /* Try to flush the object off the GPU without holding the lock. 1735 * We will repeat the flush holding the lock in the normal manner 1736 * to catch cases where we are gazumped. 1737 */ 1738 err = i915_gem_object_wait(obj, 1739 I915_WAIT_INTERRUPTIBLE | 1740 I915_WAIT_PRIORITY | 1741 (write_domain ? I915_WAIT_ALL : 0), 1742 MAX_SCHEDULE_TIMEOUT, 1743 to_rps_client(file)); 1744 if (err) 1745 goto out; 1746 1747 /* 1748 * Proxy objects do not control access to the backing storage, ergo 1749 * they cannot be used as a means to manipulate the cache domain 1750 * tracking for that backing storage. The proxy object is always 1751 * considered to be outside of any cache domain. 1752 */ 1753 if (i915_gem_object_is_proxy(obj)) { 1754 err = -ENXIO; 1755 goto out; 1756 } 1757 1758 /* 1759 * Flush and acquire obj->pages so that we are coherent through 1760 * direct access in memory with previous cached writes through 1761 * shmemfs and that our cache domain tracking remains valid. 1762 * For example, if the obj->filp was moved to swap without us 1763 * being notified and releasing the pages, we would mistakenly 1764 * continue to assume that the obj remained out of the CPU cached 1765 * domain. 1766 */ 1767 err = i915_gem_object_pin_pages(obj); 1768 if (err) 1769 goto out; 1770 1771 err = i915_mutex_lock_interruptible(dev); 1772 if (err) 1773 goto out_unpin; 1774 1775 if (read_domains & I915_GEM_DOMAIN_WC) 1776 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1777 else if (read_domains & I915_GEM_DOMAIN_GTT) 1778 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1779 else 1780 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1781 1782 /* And bump the LRU for this access */ 1783 i915_gem_object_bump_inactive_ggtt(obj); 1784 1785 mutex_unlock(&dev->struct_mutex); 1786 1787 if (write_domain != 0) 1788 intel_fb_obj_invalidate(obj, 1789 fb_write_origin(obj, write_domain)); 1790 1791 out_unpin: 1792 i915_gem_object_unpin_pages(obj); 1793 out: 1794 i915_gem_object_put(obj); 1795 return err; 1796 } 1797 1798 /** 1799 * Called when user space has done writes to this buffer 1800 * @dev: drm device 1801 * @data: ioctl data blob 1802 * @file: drm file 1803 */ 1804 int 1805 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1806 struct drm_file *file) 1807 { 1808 struct drm_i915_gem_sw_finish *args = data; 1809 struct drm_i915_gem_object *obj; 1810 1811 obj = i915_gem_object_lookup(file, args->handle); 1812 if (!obj) 1813 return -ENOENT; 1814 1815 /* 1816 * Proxy objects are barred from CPU access, so there is no 1817 * need to ban sw_finish as it is a nop. 1818 */ 1819 1820 /* Pinned buffers may be scanout, so flush the cache */ 1821 i915_gem_object_flush_if_display(obj); 1822 i915_gem_object_put(obj); 1823 1824 return 0; 1825 } 1826 1827 /** 1828 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1829 * it is mapped to. 1830 * @dev: drm device 1831 * @data: ioctl data blob 1832 * @file: drm file 1833 * 1834 * While the mapping holds a reference on the contents of the object, it doesn't 1835 * imply a ref on the object itself. 1836 * 1837 * IMPORTANT: 1838 * 1839 * DRM driver writers who look a this function as an example for how to do GEM 1840 * mmap support, please don't implement mmap support like here. The modern way 1841 * to implement DRM mmap support is with an mmap offset ioctl (like 1842 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1843 * That way debug tooling like valgrind will understand what's going on, hiding 1844 * the mmap call in a driver private ioctl will break that. The i915 driver only 1845 * does cpu mmaps this way because we didn't know better. 1846 */ 1847 int 1848 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1849 struct drm_file *file) 1850 { 1851 struct drm_i915_gem_mmap *args = data; 1852 struct drm_i915_gem_object *obj; 1853 unsigned long addr; 1854 1855 if (args->flags & ~(I915_MMAP_WC)) 1856 return -EINVAL; 1857 1858 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1859 return -ENODEV; 1860 1861 obj = i915_gem_object_lookup(file, args->handle); 1862 if (!obj) 1863 return -ENOENT; 1864 1865 /* prime objects have no backing filp to GEM mmap 1866 * pages from. 1867 */ 1868 if (!obj->base.filp) { 1869 i915_gem_object_put(obj); 1870 return -ENXIO; 1871 } 1872 1873 addr = vm_mmap(obj->base.filp, 0, args->size, 1874 PROT_READ | PROT_WRITE, MAP_SHARED, 1875 args->offset); 1876 if (args->flags & I915_MMAP_WC) { 1877 struct mm_struct *mm = current->mm; 1878 struct vm_area_struct *vma; 1879 1880 if (down_write_killable(&mm->mmap_sem)) { 1881 i915_gem_object_put(obj); 1882 return -EINTR; 1883 } 1884 vma = find_vma(mm, addr); 1885 if (vma) 1886 vma->vm_page_prot = 1887 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1888 else 1889 addr = -ENOMEM; 1890 up_write(&mm->mmap_sem); 1891 1892 /* This may race, but that's ok, it only gets set */ 1893 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1894 } 1895 i915_gem_object_put(obj); 1896 if (IS_ERR((void *)addr)) 1897 return addr; 1898 1899 args->addr_ptr = (uint64_t) addr; 1900 1901 return 0; 1902 } 1903 1904 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1905 { 1906 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1907 } 1908 1909 /** 1910 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1911 * 1912 * A history of the GTT mmap interface: 1913 * 1914 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1915 * aligned and suitable for fencing, and still fit into the available 1916 * mappable space left by the pinned display objects. A classic problem 1917 * we called the page-fault-of-doom where we would ping-pong between 1918 * two objects that could not fit inside the GTT and so the memcpy 1919 * would page one object in at the expense of the other between every 1920 * single byte. 1921 * 1922 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1923 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1924 * object is too large for the available space (or simply too large 1925 * for the mappable aperture!), a view is created instead and faulted 1926 * into userspace. (This view is aligned and sized appropriately for 1927 * fenced access.) 1928 * 1929 * 2 - Recognise WC as a separate cache domain so that we can flush the 1930 * delayed writes via GTT before performing direct access via WC. 1931 * 1932 * Restrictions: 1933 * 1934 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1935 * hangs on some architectures, corruption on others. An attempt to service 1936 * a GTT page fault from a snoopable object will generate a SIGBUS. 1937 * 1938 * * the object must be able to fit into RAM (physical memory, though no 1939 * limited to the mappable aperture). 1940 * 1941 * 1942 * Caveats: 1943 * 1944 * * a new GTT page fault will synchronize rendering from the GPU and flush 1945 * all data to system memory. Subsequent access will not be synchronized. 1946 * 1947 * * all mappings are revoked on runtime device suspend. 1948 * 1949 * * there are only 8, 16 or 32 fence registers to share between all users 1950 * (older machines require fence register for display and blitter access 1951 * as well). Contention of the fence registers will cause the previous users 1952 * to be unmapped and any new access will generate new page faults. 1953 * 1954 * * running out of memory while servicing a fault may generate a SIGBUS, 1955 * rather than the expected SIGSEGV. 1956 */ 1957 int i915_gem_mmap_gtt_version(void) 1958 { 1959 return 2; 1960 } 1961 1962 static inline struct i915_ggtt_view 1963 compute_partial_view(const struct drm_i915_gem_object *obj, 1964 pgoff_t page_offset, 1965 unsigned int chunk) 1966 { 1967 struct i915_ggtt_view view; 1968 1969 if (i915_gem_object_is_tiled(obj)) 1970 chunk = roundup(chunk, tile_row_pages(obj)); 1971 1972 view.type = I915_GGTT_VIEW_PARTIAL; 1973 view.partial.offset = rounddown(page_offset, chunk); 1974 view.partial.size = 1975 min_t(unsigned int, chunk, 1976 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1977 1978 /* If the partial covers the entire object, just create a normal VMA. */ 1979 if (chunk >= obj->base.size >> PAGE_SHIFT) 1980 view.type = I915_GGTT_VIEW_NORMAL; 1981 1982 return view; 1983 } 1984 1985 /** 1986 * i915_gem_fault - fault a page into the GTT 1987 * @vmf: fault info 1988 * 1989 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1990 * from userspace. The fault handler takes care of binding the object to 1991 * the GTT (if needed), allocating and programming a fence register (again, 1992 * only if needed based on whether the old reg is still valid or the object 1993 * is tiled) and inserting a new PTE into the faulting process. 1994 * 1995 * Note that the faulting process may involve evicting existing objects 1996 * from the GTT and/or fence registers to make room. So performance may 1997 * suffer if the GTT working set is large or there are few fence registers 1998 * left. 1999 * 2000 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2001 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2002 */ 2003 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2004 { 2005 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2006 struct vm_area_struct *area = vmf->vma; 2007 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2008 struct drm_device *dev = obj->base.dev; 2009 struct drm_i915_private *dev_priv = to_i915(dev); 2010 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2011 bool write = area->vm_flags & VM_WRITE; 2012 struct i915_vma *vma; 2013 pgoff_t page_offset; 2014 int ret; 2015 2016 /* Sanity check that we allow writing into this object */ 2017 if (i915_gem_object_is_readonly(obj) && write) 2018 return VM_FAULT_SIGBUS; 2019 2020 /* We don't use vmf->pgoff since that has the fake offset */ 2021 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2022 2023 trace_i915_gem_object_fault(obj, page_offset, true, write); 2024 2025 /* Try to flush the object off the GPU first without holding the lock. 2026 * Upon acquiring the lock, we will perform our sanity checks and then 2027 * repeat the flush holding the lock in the normal manner to catch cases 2028 * where we are gazumped. 2029 */ 2030 ret = i915_gem_object_wait(obj, 2031 I915_WAIT_INTERRUPTIBLE, 2032 MAX_SCHEDULE_TIMEOUT, 2033 NULL); 2034 if (ret) 2035 goto err; 2036 2037 ret = i915_gem_object_pin_pages(obj); 2038 if (ret) 2039 goto err; 2040 2041 intel_runtime_pm_get(dev_priv); 2042 2043 ret = i915_mutex_lock_interruptible(dev); 2044 if (ret) 2045 goto err_rpm; 2046 2047 /* Access to snoopable pages through the GTT is incoherent. */ 2048 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2049 ret = -EFAULT; 2050 goto err_unlock; 2051 } 2052 2053 2054 /* Now pin it into the GTT as needed */ 2055 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2056 PIN_MAPPABLE | 2057 PIN_NONBLOCK | 2058 PIN_NONFAULT); 2059 if (IS_ERR(vma)) { 2060 /* Use a partial view if it is bigger than available space */ 2061 struct i915_ggtt_view view = 2062 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2063 unsigned int flags; 2064 2065 flags = PIN_MAPPABLE; 2066 if (view.type == I915_GGTT_VIEW_NORMAL) 2067 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2068 2069 /* 2070 * Userspace is now writing through an untracked VMA, abandon 2071 * all hope that the hardware is able to track future writes. 2072 */ 2073 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2074 2075 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2076 if (IS_ERR(vma) && !view.type) { 2077 flags = PIN_MAPPABLE; 2078 view.type = I915_GGTT_VIEW_PARTIAL; 2079 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2080 } 2081 } 2082 if (IS_ERR(vma)) { 2083 ret = PTR_ERR(vma); 2084 goto err_unlock; 2085 } 2086 2087 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2088 if (ret) 2089 goto err_unpin; 2090 2091 ret = i915_vma_pin_fence(vma); 2092 if (ret) 2093 goto err_unpin; 2094 2095 /* Finally, remap it using the new GTT offset */ 2096 ret = remap_io_mapping(area, 2097 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2098 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2099 min_t(u64, vma->size, area->vm_end - area->vm_start), 2100 &ggtt->iomap); 2101 if (ret) 2102 goto err_fence; 2103 2104 /* Mark as being mmapped into userspace for later revocation */ 2105 assert_rpm_wakelock_held(dev_priv); 2106 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2107 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2108 GEM_BUG_ON(!obj->userfault_count); 2109 2110 i915_vma_set_ggtt_write(vma); 2111 2112 err_fence: 2113 i915_vma_unpin_fence(vma); 2114 err_unpin: 2115 __i915_vma_unpin(vma); 2116 err_unlock: 2117 mutex_unlock(&dev->struct_mutex); 2118 err_rpm: 2119 intel_runtime_pm_put(dev_priv); 2120 i915_gem_object_unpin_pages(obj); 2121 err: 2122 switch (ret) { 2123 case -EIO: 2124 /* 2125 * We eat errors when the gpu is terminally wedged to avoid 2126 * userspace unduly crashing (gl has no provisions for mmaps to 2127 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2128 * and so needs to be reported. 2129 */ 2130 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2131 return VM_FAULT_SIGBUS; 2132 /* else: fall through */ 2133 case -EAGAIN: 2134 /* 2135 * EAGAIN means the gpu is hung and we'll wait for the error 2136 * handler to reset everything when re-faulting in 2137 * i915_mutex_lock_interruptible. 2138 */ 2139 case 0: 2140 case -ERESTARTSYS: 2141 case -EINTR: 2142 case -EBUSY: 2143 /* 2144 * EBUSY is ok: this just means that another thread 2145 * already did the job. 2146 */ 2147 return VM_FAULT_NOPAGE; 2148 case -ENOMEM: 2149 return VM_FAULT_OOM; 2150 case -ENOSPC: 2151 case -EFAULT: 2152 return VM_FAULT_SIGBUS; 2153 default: 2154 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2155 return VM_FAULT_SIGBUS; 2156 } 2157 } 2158 2159 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2160 { 2161 struct i915_vma *vma; 2162 2163 GEM_BUG_ON(!obj->userfault_count); 2164 2165 obj->userfault_count = 0; 2166 list_del(&obj->userfault_link); 2167 drm_vma_node_unmap(&obj->base.vma_node, 2168 obj->base.dev->anon_inode->i_mapping); 2169 2170 for_each_ggtt_vma(vma, obj) 2171 i915_vma_unset_userfault(vma); 2172 } 2173 2174 /** 2175 * i915_gem_release_mmap - remove physical page mappings 2176 * @obj: obj in question 2177 * 2178 * Preserve the reservation of the mmapping with the DRM core code, but 2179 * relinquish ownership of the pages back to the system. 2180 * 2181 * It is vital that we remove the page mapping if we have mapped a tiled 2182 * object through the GTT and then lose the fence register due to 2183 * resource pressure. Similarly if the object has been moved out of the 2184 * aperture, than pages mapped into userspace must be revoked. Removing the 2185 * mapping will then trigger a page fault on the next user access, allowing 2186 * fixup by i915_gem_fault(). 2187 */ 2188 void 2189 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2190 { 2191 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2192 2193 /* Serialisation between user GTT access and our code depends upon 2194 * revoking the CPU's PTE whilst the mutex is held. The next user 2195 * pagefault then has to wait until we release the mutex. 2196 * 2197 * Note that RPM complicates somewhat by adding an additional 2198 * requirement that operations to the GGTT be made holding the RPM 2199 * wakeref. 2200 */ 2201 lockdep_assert_held(&i915->drm.struct_mutex); 2202 intel_runtime_pm_get(i915); 2203 2204 if (!obj->userfault_count) 2205 goto out; 2206 2207 __i915_gem_object_release_mmap(obj); 2208 2209 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2210 * memory transactions from userspace before we return. The TLB 2211 * flushing implied above by changing the PTE above *should* be 2212 * sufficient, an extra barrier here just provides us with a bit 2213 * of paranoid documentation about our requirement to serialise 2214 * memory writes before touching registers / GSM. 2215 */ 2216 wmb(); 2217 2218 out: 2219 intel_runtime_pm_put(i915); 2220 } 2221 2222 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2223 { 2224 struct drm_i915_gem_object *obj, *on; 2225 int i; 2226 2227 /* 2228 * Only called during RPM suspend. All users of the userfault_list 2229 * must be holding an RPM wakeref to ensure that this can not 2230 * run concurrently with themselves (and use the struct_mutex for 2231 * protection between themselves). 2232 */ 2233 2234 list_for_each_entry_safe(obj, on, 2235 &dev_priv->mm.userfault_list, userfault_link) 2236 __i915_gem_object_release_mmap(obj); 2237 2238 /* The fence will be lost when the device powers down. If any were 2239 * in use by hardware (i.e. they are pinned), we should not be powering 2240 * down! All other fences will be reacquired by the user upon waking. 2241 */ 2242 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2243 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2244 2245 /* Ideally we want to assert that the fence register is not 2246 * live at this point (i.e. that no piece of code will be 2247 * trying to write through fence + GTT, as that both violates 2248 * our tracking of activity and associated locking/barriers, 2249 * but also is illegal given that the hw is powered down). 2250 * 2251 * Previously we used reg->pin_count as a "liveness" indicator. 2252 * That is not sufficient, and we need a more fine-grained 2253 * tool if we want to have a sanity check here. 2254 */ 2255 2256 if (!reg->vma) 2257 continue; 2258 2259 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2260 reg->dirty = true; 2261 } 2262 } 2263 2264 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2265 { 2266 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2267 int err; 2268 2269 err = drm_gem_create_mmap_offset(&obj->base); 2270 if (likely(!err)) 2271 return 0; 2272 2273 /* Attempt to reap some mmap space from dead objects */ 2274 do { 2275 err = i915_gem_wait_for_idle(dev_priv, 2276 I915_WAIT_INTERRUPTIBLE, 2277 MAX_SCHEDULE_TIMEOUT); 2278 if (err) 2279 break; 2280 2281 i915_gem_drain_freed_objects(dev_priv); 2282 err = drm_gem_create_mmap_offset(&obj->base); 2283 if (!err) 2284 break; 2285 2286 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2287 2288 return err; 2289 } 2290 2291 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2292 { 2293 drm_gem_free_mmap_offset(&obj->base); 2294 } 2295 2296 int 2297 i915_gem_mmap_gtt(struct drm_file *file, 2298 struct drm_device *dev, 2299 uint32_t handle, 2300 uint64_t *offset) 2301 { 2302 struct drm_i915_gem_object *obj; 2303 int ret; 2304 2305 obj = i915_gem_object_lookup(file, handle); 2306 if (!obj) 2307 return -ENOENT; 2308 2309 ret = i915_gem_object_create_mmap_offset(obj); 2310 if (ret == 0) 2311 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2312 2313 i915_gem_object_put(obj); 2314 return ret; 2315 } 2316 2317 /** 2318 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2319 * @dev: DRM device 2320 * @data: GTT mapping ioctl data 2321 * @file: GEM object info 2322 * 2323 * Simply returns the fake offset to userspace so it can mmap it. 2324 * The mmap call will end up in drm_gem_mmap(), which will set things 2325 * up so we can get faults in the handler above. 2326 * 2327 * The fault handler will take care of binding the object into the GTT 2328 * (since it may have been evicted to make room for something), allocating 2329 * a fence register, and mapping the appropriate aperture address into 2330 * userspace. 2331 */ 2332 int 2333 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2334 struct drm_file *file) 2335 { 2336 struct drm_i915_gem_mmap_gtt *args = data; 2337 2338 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2339 } 2340 2341 /* Immediately discard the backing storage */ 2342 static void 2343 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2344 { 2345 i915_gem_object_free_mmap_offset(obj); 2346 2347 if (obj->base.filp == NULL) 2348 return; 2349 2350 /* Our goal here is to return as much of the memory as 2351 * is possible back to the system as we are called from OOM. 2352 * To do this we must instruct the shmfs to drop all of its 2353 * backing pages, *now*. 2354 */ 2355 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2356 obj->mm.madv = __I915_MADV_PURGED; 2357 obj->mm.pages = ERR_PTR(-EFAULT); 2358 } 2359 2360 /* Try to discard unwanted pages */ 2361 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2362 { 2363 struct address_space *mapping; 2364 2365 lockdep_assert_held(&obj->mm.lock); 2366 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2367 2368 switch (obj->mm.madv) { 2369 case I915_MADV_DONTNEED: 2370 i915_gem_object_truncate(obj); 2371 case __I915_MADV_PURGED: 2372 return; 2373 } 2374 2375 if (obj->base.filp == NULL) 2376 return; 2377 2378 mapping = obj->base.filp->f_mapping, 2379 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2380 } 2381 2382 /* 2383 * Move pages to appropriate lru and release the pagevec, decrementing the 2384 * ref count of those pages. 2385 */ 2386 static void check_release_pagevec(struct pagevec *pvec) 2387 { 2388 check_move_unevictable_pages(pvec); 2389 __pagevec_release(pvec); 2390 cond_resched(); 2391 } 2392 2393 static void 2394 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2395 struct sg_table *pages) 2396 { 2397 struct sgt_iter sgt_iter; 2398 struct pagevec pvec; 2399 struct page *page; 2400 2401 __i915_gem_object_release_shmem(obj, pages, true); 2402 2403 i915_gem_gtt_finish_pages(obj, pages); 2404 2405 if (i915_gem_object_needs_bit17_swizzle(obj)) 2406 i915_gem_object_save_bit_17_swizzle(obj, pages); 2407 2408 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2409 2410 pagevec_init(&pvec); 2411 for_each_sgt_page(page, sgt_iter, pages) { 2412 if (obj->mm.dirty) 2413 set_page_dirty(page); 2414 2415 if (obj->mm.madv == I915_MADV_WILLNEED) 2416 mark_page_accessed(page); 2417 2418 if (!pagevec_add(&pvec, page)) 2419 check_release_pagevec(&pvec); 2420 } 2421 if (pagevec_count(&pvec)) 2422 check_release_pagevec(&pvec); 2423 obj->mm.dirty = false; 2424 2425 sg_free_table(pages); 2426 kfree(pages); 2427 } 2428 2429 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2430 { 2431 struct radix_tree_iter iter; 2432 void __rcu **slot; 2433 2434 rcu_read_lock(); 2435 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2436 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2437 rcu_read_unlock(); 2438 } 2439 2440 static struct sg_table * 2441 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2442 { 2443 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2444 struct sg_table *pages; 2445 2446 pages = fetch_and_zero(&obj->mm.pages); 2447 if (!pages) 2448 return NULL; 2449 2450 spin_lock(&i915->mm.obj_lock); 2451 list_del(&obj->mm.link); 2452 spin_unlock(&i915->mm.obj_lock); 2453 2454 if (obj->mm.mapping) { 2455 void *ptr; 2456 2457 ptr = page_mask_bits(obj->mm.mapping); 2458 if (is_vmalloc_addr(ptr)) 2459 vunmap(ptr); 2460 else 2461 kunmap(kmap_to_page(ptr)); 2462 2463 obj->mm.mapping = NULL; 2464 } 2465 2466 __i915_gem_object_reset_page_iter(obj); 2467 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2468 2469 return pages; 2470 } 2471 2472 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2473 enum i915_mm_subclass subclass) 2474 { 2475 struct sg_table *pages; 2476 2477 if (i915_gem_object_has_pinned_pages(obj)) 2478 return; 2479 2480 GEM_BUG_ON(obj->bind_count); 2481 if (!i915_gem_object_has_pages(obj)) 2482 return; 2483 2484 /* May be called by shrinker from within get_pages() (on another bo) */ 2485 mutex_lock_nested(&obj->mm.lock, subclass); 2486 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2487 goto unlock; 2488 2489 /* 2490 * ->put_pages might need to allocate memory for the bit17 swizzle 2491 * array, hence protect them from being reaped by removing them from gtt 2492 * lists early. 2493 */ 2494 pages = __i915_gem_object_unset_pages(obj); 2495 if (!IS_ERR(pages)) 2496 obj->ops->put_pages(obj, pages); 2497 2498 unlock: 2499 mutex_unlock(&obj->mm.lock); 2500 } 2501 2502 bool i915_sg_trim(struct sg_table *orig_st) 2503 { 2504 struct sg_table new_st; 2505 struct scatterlist *sg, *new_sg; 2506 unsigned int i; 2507 2508 if (orig_st->nents == orig_st->orig_nents) 2509 return false; 2510 2511 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2512 return false; 2513 2514 new_sg = new_st.sgl; 2515 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2516 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2517 sg_dma_address(new_sg) = sg_dma_address(sg); 2518 sg_dma_len(new_sg) = sg_dma_len(sg); 2519 2520 new_sg = sg_next(new_sg); 2521 } 2522 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2523 2524 sg_free_table(orig_st); 2525 2526 *orig_st = new_st; 2527 return true; 2528 } 2529 2530 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2531 { 2532 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2533 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2534 unsigned long i; 2535 struct address_space *mapping; 2536 struct sg_table *st; 2537 struct scatterlist *sg; 2538 struct sgt_iter sgt_iter; 2539 struct page *page; 2540 unsigned long last_pfn = 0; /* suppress gcc warning */ 2541 unsigned int max_segment = i915_sg_segment_size(); 2542 unsigned int sg_page_sizes; 2543 struct pagevec pvec; 2544 gfp_t noreclaim; 2545 int ret; 2546 2547 /* 2548 * Assert that the object is not currently in any GPU domain. As it 2549 * wasn't in the GTT, there shouldn't be any way it could have been in 2550 * a GPU cache 2551 */ 2552 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2553 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2554 2555 /* 2556 * If there's no chance of allocating enough pages for the whole 2557 * object, bail early. 2558 */ 2559 if (page_count > totalram_pages()) 2560 return -ENOMEM; 2561 2562 st = kmalloc(sizeof(*st), GFP_KERNEL); 2563 if (st == NULL) 2564 return -ENOMEM; 2565 2566 rebuild_st: 2567 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2568 kfree(st); 2569 return -ENOMEM; 2570 } 2571 2572 /* 2573 * Get the list of pages out of our struct file. They'll be pinned 2574 * at this point until we release them. 2575 * 2576 * Fail silently without starting the shrinker 2577 */ 2578 mapping = obj->base.filp->f_mapping; 2579 mapping_set_unevictable(mapping); 2580 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2581 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2582 2583 sg = st->sgl; 2584 st->nents = 0; 2585 sg_page_sizes = 0; 2586 for (i = 0; i < page_count; i++) { 2587 const unsigned int shrink[] = { 2588 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2589 0, 2590 }, *s = shrink; 2591 gfp_t gfp = noreclaim; 2592 2593 do { 2594 cond_resched(); 2595 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2596 if (likely(!IS_ERR(page))) 2597 break; 2598 2599 if (!*s) { 2600 ret = PTR_ERR(page); 2601 goto err_sg; 2602 } 2603 2604 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2605 2606 /* 2607 * We've tried hard to allocate the memory by reaping 2608 * our own buffer, now let the real VM do its job and 2609 * go down in flames if truly OOM. 2610 * 2611 * However, since graphics tend to be disposable, 2612 * defer the oom here by reporting the ENOMEM back 2613 * to userspace. 2614 */ 2615 if (!*s) { 2616 /* reclaim and warn, but no oom */ 2617 gfp = mapping_gfp_mask(mapping); 2618 2619 /* 2620 * Our bo are always dirty and so we require 2621 * kswapd to reclaim our pages (direct reclaim 2622 * does not effectively begin pageout of our 2623 * buffers on its own). However, direct reclaim 2624 * only waits for kswapd when under allocation 2625 * congestion. So as a result __GFP_RECLAIM is 2626 * unreliable and fails to actually reclaim our 2627 * dirty pages -- unless you try over and over 2628 * again with !__GFP_NORETRY. However, we still 2629 * want to fail this allocation rather than 2630 * trigger the out-of-memory killer and for 2631 * this we want __GFP_RETRY_MAYFAIL. 2632 */ 2633 gfp |= __GFP_RETRY_MAYFAIL; 2634 } 2635 } while (1); 2636 2637 if (!i || 2638 sg->length >= max_segment || 2639 page_to_pfn(page) != last_pfn + 1) { 2640 if (i) { 2641 sg_page_sizes |= sg->length; 2642 sg = sg_next(sg); 2643 } 2644 st->nents++; 2645 sg_set_page(sg, page, PAGE_SIZE, 0); 2646 } else { 2647 sg->length += PAGE_SIZE; 2648 } 2649 last_pfn = page_to_pfn(page); 2650 2651 /* Check that the i965g/gm workaround works. */ 2652 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2653 } 2654 if (sg) { /* loop terminated early; short sg table */ 2655 sg_page_sizes |= sg->length; 2656 sg_mark_end(sg); 2657 } 2658 2659 /* Trim unused sg entries to avoid wasting memory. */ 2660 i915_sg_trim(st); 2661 2662 ret = i915_gem_gtt_prepare_pages(obj, st); 2663 if (ret) { 2664 /* 2665 * DMA remapping failed? One possible cause is that 2666 * it could not reserve enough large entries, asking 2667 * for PAGE_SIZE chunks instead may be helpful. 2668 */ 2669 if (max_segment > PAGE_SIZE) { 2670 for_each_sgt_page(page, sgt_iter, st) 2671 put_page(page); 2672 sg_free_table(st); 2673 2674 max_segment = PAGE_SIZE; 2675 goto rebuild_st; 2676 } else { 2677 dev_warn(&dev_priv->drm.pdev->dev, 2678 "Failed to DMA remap %lu pages\n", 2679 page_count); 2680 goto err_pages; 2681 } 2682 } 2683 2684 if (i915_gem_object_needs_bit17_swizzle(obj)) 2685 i915_gem_object_do_bit_17_swizzle(obj, st); 2686 2687 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2688 2689 return 0; 2690 2691 err_sg: 2692 sg_mark_end(sg); 2693 err_pages: 2694 mapping_clear_unevictable(mapping); 2695 pagevec_init(&pvec); 2696 for_each_sgt_page(page, sgt_iter, st) { 2697 if (!pagevec_add(&pvec, page)) 2698 check_release_pagevec(&pvec); 2699 } 2700 if (pagevec_count(&pvec)) 2701 check_release_pagevec(&pvec); 2702 sg_free_table(st); 2703 kfree(st); 2704 2705 /* 2706 * shmemfs first checks if there is enough memory to allocate the page 2707 * and reports ENOSPC should there be insufficient, along with the usual 2708 * ENOMEM for a genuine allocation failure. 2709 * 2710 * We use ENOSPC in our driver to mean that we have run out of aperture 2711 * space and so want to translate the error from shmemfs back to our 2712 * usual understanding of ENOMEM. 2713 */ 2714 if (ret == -ENOSPC) 2715 ret = -ENOMEM; 2716 2717 return ret; 2718 } 2719 2720 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2721 struct sg_table *pages, 2722 unsigned int sg_page_sizes) 2723 { 2724 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2725 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2726 int i; 2727 2728 lockdep_assert_held(&obj->mm.lock); 2729 2730 obj->mm.get_page.sg_pos = pages->sgl; 2731 obj->mm.get_page.sg_idx = 0; 2732 2733 obj->mm.pages = pages; 2734 2735 if (i915_gem_object_is_tiled(obj) && 2736 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2737 GEM_BUG_ON(obj->mm.quirked); 2738 __i915_gem_object_pin_pages(obj); 2739 obj->mm.quirked = true; 2740 } 2741 2742 GEM_BUG_ON(!sg_page_sizes); 2743 obj->mm.page_sizes.phys = sg_page_sizes; 2744 2745 /* 2746 * Calculate the supported page-sizes which fit into the given 2747 * sg_page_sizes. This will give us the page-sizes which we may be able 2748 * to use opportunistically when later inserting into the GTT. For 2749 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2750 * 64K or 4K pages, although in practice this will depend on a number of 2751 * other factors. 2752 */ 2753 obj->mm.page_sizes.sg = 0; 2754 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2755 if (obj->mm.page_sizes.phys & ~0u << i) 2756 obj->mm.page_sizes.sg |= BIT(i); 2757 } 2758 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2759 2760 spin_lock(&i915->mm.obj_lock); 2761 list_add(&obj->mm.link, &i915->mm.unbound_list); 2762 spin_unlock(&i915->mm.obj_lock); 2763 } 2764 2765 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2766 { 2767 int err; 2768 2769 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2770 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2771 return -EFAULT; 2772 } 2773 2774 err = obj->ops->get_pages(obj); 2775 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2776 2777 return err; 2778 } 2779 2780 /* Ensure that the associated pages are gathered from the backing storage 2781 * and pinned into our object. i915_gem_object_pin_pages() may be called 2782 * multiple times before they are released by a single call to 2783 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2784 * either as a result of memory pressure (reaping pages under the shrinker) 2785 * or as the object is itself released. 2786 */ 2787 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2788 { 2789 int err; 2790 2791 err = mutex_lock_interruptible(&obj->mm.lock); 2792 if (err) 2793 return err; 2794 2795 if (unlikely(!i915_gem_object_has_pages(obj))) { 2796 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2797 2798 err = ____i915_gem_object_get_pages(obj); 2799 if (err) 2800 goto unlock; 2801 2802 smp_mb__before_atomic(); 2803 } 2804 atomic_inc(&obj->mm.pages_pin_count); 2805 2806 unlock: 2807 mutex_unlock(&obj->mm.lock); 2808 return err; 2809 } 2810 2811 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2812 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2813 enum i915_map_type type) 2814 { 2815 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2816 struct sg_table *sgt = obj->mm.pages; 2817 struct sgt_iter sgt_iter; 2818 struct page *page; 2819 struct page *stack_pages[32]; 2820 struct page **pages = stack_pages; 2821 unsigned long i = 0; 2822 pgprot_t pgprot; 2823 void *addr; 2824 2825 /* A single page can always be kmapped */ 2826 if (n_pages == 1 && type == I915_MAP_WB) 2827 return kmap(sg_page(sgt->sgl)); 2828 2829 if (n_pages > ARRAY_SIZE(stack_pages)) { 2830 /* Too big for stack -- allocate temporary array instead */ 2831 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2832 if (!pages) 2833 return NULL; 2834 } 2835 2836 for_each_sgt_page(page, sgt_iter, sgt) 2837 pages[i++] = page; 2838 2839 /* Check that we have the expected number of pages */ 2840 GEM_BUG_ON(i != n_pages); 2841 2842 switch (type) { 2843 default: 2844 MISSING_CASE(type); 2845 /* fallthrough to use PAGE_KERNEL anyway */ 2846 case I915_MAP_WB: 2847 pgprot = PAGE_KERNEL; 2848 break; 2849 case I915_MAP_WC: 2850 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2851 break; 2852 } 2853 addr = vmap(pages, n_pages, 0, pgprot); 2854 2855 if (pages != stack_pages) 2856 kvfree(pages); 2857 2858 return addr; 2859 } 2860 2861 /* get, pin, and map the pages of the object into kernel space */ 2862 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2863 enum i915_map_type type) 2864 { 2865 enum i915_map_type has_type; 2866 bool pinned; 2867 void *ptr; 2868 int ret; 2869 2870 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2871 return ERR_PTR(-ENXIO); 2872 2873 ret = mutex_lock_interruptible(&obj->mm.lock); 2874 if (ret) 2875 return ERR_PTR(ret); 2876 2877 pinned = !(type & I915_MAP_OVERRIDE); 2878 type &= ~I915_MAP_OVERRIDE; 2879 2880 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2881 if (unlikely(!i915_gem_object_has_pages(obj))) { 2882 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2883 2884 ret = ____i915_gem_object_get_pages(obj); 2885 if (ret) 2886 goto err_unlock; 2887 2888 smp_mb__before_atomic(); 2889 } 2890 atomic_inc(&obj->mm.pages_pin_count); 2891 pinned = false; 2892 } 2893 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2894 2895 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2896 if (ptr && has_type != type) { 2897 if (pinned) { 2898 ret = -EBUSY; 2899 goto err_unpin; 2900 } 2901 2902 if (is_vmalloc_addr(ptr)) 2903 vunmap(ptr); 2904 else 2905 kunmap(kmap_to_page(ptr)); 2906 2907 ptr = obj->mm.mapping = NULL; 2908 } 2909 2910 if (!ptr) { 2911 ptr = i915_gem_object_map(obj, type); 2912 if (!ptr) { 2913 ret = -ENOMEM; 2914 goto err_unpin; 2915 } 2916 2917 obj->mm.mapping = page_pack_bits(ptr, type); 2918 } 2919 2920 out_unlock: 2921 mutex_unlock(&obj->mm.lock); 2922 return ptr; 2923 2924 err_unpin: 2925 atomic_dec(&obj->mm.pages_pin_count); 2926 err_unlock: 2927 ptr = ERR_PTR(ret); 2928 goto out_unlock; 2929 } 2930 2931 static int 2932 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2933 const struct drm_i915_gem_pwrite *arg) 2934 { 2935 struct address_space *mapping = obj->base.filp->f_mapping; 2936 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2937 u64 remain, offset; 2938 unsigned int pg; 2939 2940 /* Before we instantiate/pin the backing store for our use, we 2941 * can prepopulate the shmemfs filp efficiently using a write into 2942 * the pagecache. We avoid the penalty of instantiating all the 2943 * pages, important if the user is just writing to a few and never 2944 * uses the object on the GPU, and using a direct write into shmemfs 2945 * allows it to avoid the cost of retrieving a page (either swapin 2946 * or clearing-before-use) before it is overwritten. 2947 */ 2948 if (i915_gem_object_has_pages(obj)) 2949 return -ENODEV; 2950 2951 if (obj->mm.madv != I915_MADV_WILLNEED) 2952 return -EFAULT; 2953 2954 /* Before the pages are instantiated the object is treated as being 2955 * in the CPU domain. The pages will be clflushed as required before 2956 * use, and we can freely write into the pages directly. If userspace 2957 * races pwrite with any other operation; corruption will ensue - 2958 * that is userspace's prerogative! 2959 */ 2960 2961 remain = arg->size; 2962 offset = arg->offset; 2963 pg = offset_in_page(offset); 2964 2965 do { 2966 unsigned int len, unwritten; 2967 struct page *page; 2968 void *data, *vaddr; 2969 int err; 2970 2971 len = PAGE_SIZE - pg; 2972 if (len > remain) 2973 len = remain; 2974 2975 err = pagecache_write_begin(obj->base.filp, mapping, 2976 offset, len, 0, 2977 &page, &data); 2978 if (err < 0) 2979 return err; 2980 2981 vaddr = kmap(page); 2982 unwritten = copy_from_user(vaddr + pg, user_data, len); 2983 kunmap(page); 2984 2985 err = pagecache_write_end(obj->base.filp, mapping, 2986 offset, len, len - unwritten, 2987 page, data); 2988 if (err < 0) 2989 return err; 2990 2991 if (unwritten) 2992 return -EFAULT; 2993 2994 remain -= len; 2995 user_data += len; 2996 offset += len; 2997 pg = 0; 2998 } while (remain); 2999 3000 return 0; 3001 } 3002 3003 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 3004 const struct i915_gem_context *ctx) 3005 { 3006 unsigned int score; 3007 unsigned long prev_hang; 3008 3009 if (i915_gem_context_is_banned(ctx)) 3010 score = I915_CLIENT_SCORE_CONTEXT_BAN; 3011 else 3012 score = 0; 3013 3014 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 3015 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 3016 score += I915_CLIENT_SCORE_HANG_FAST; 3017 3018 if (score) { 3019 atomic_add(score, &file_priv->ban_score); 3020 3021 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 3022 ctx->name, score, 3023 atomic_read(&file_priv->ban_score)); 3024 } 3025 } 3026 3027 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3028 { 3029 unsigned int score; 3030 bool banned, bannable; 3031 3032 atomic_inc(&ctx->guilty_count); 3033 3034 bannable = i915_gem_context_is_bannable(ctx); 3035 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3036 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3037 3038 /* Cool contexts don't accumulate client ban score */ 3039 if (!bannable) 3040 return; 3041 3042 if (banned) { 3043 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3044 ctx->name, atomic_read(&ctx->guilty_count), 3045 score); 3046 i915_gem_context_set_banned(ctx); 3047 } 3048 3049 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3050 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3051 } 3052 3053 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3054 { 3055 atomic_inc(&ctx->active_count); 3056 } 3057 3058 struct i915_request * 3059 i915_gem_find_active_request(struct intel_engine_cs *engine) 3060 { 3061 struct i915_request *request, *active = NULL; 3062 unsigned long flags; 3063 3064 /* 3065 * We are called by the error capture, reset and to dump engine 3066 * state at random points in time. In particular, note that neither is 3067 * crucially ordered with an interrupt. After a hang, the GPU is dead 3068 * and we assume that no more writes can happen (we waited long enough 3069 * for all writes that were in transaction to be flushed) - adding an 3070 * extra delay for a recent interrupt is pointless. Hence, we do 3071 * not need an engine->irq_seqno_barrier() before the seqno reads. 3072 * At all other times, we must assume the GPU is still running, but 3073 * we only care about the snapshot of this moment. 3074 */ 3075 spin_lock_irqsave(&engine->timeline.lock, flags); 3076 list_for_each_entry(request, &engine->timeline.requests, link) { 3077 if (__i915_request_completed(request, request->global_seqno)) 3078 continue; 3079 3080 active = request; 3081 break; 3082 } 3083 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3084 3085 return active; 3086 } 3087 3088 /* 3089 * Ensure irq handler finishes, and not run again. 3090 * Also return the active request so that we only search for it once. 3091 */ 3092 struct i915_request * 3093 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3094 { 3095 struct i915_request *request; 3096 3097 /* 3098 * During the reset sequence, we must prevent the engine from 3099 * entering RC6. As the context state is undefined until we restart 3100 * the engine, if it does enter RC6 during the reset, the state 3101 * written to the powercontext is undefined and so we may lose 3102 * GPU state upon resume, i.e. fail to restart after a reset. 3103 */ 3104 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3105 3106 request = engine->reset.prepare(engine); 3107 if (request && request->fence.error == -EIO) 3108 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3109 3110 return request; 3111 } 3112 3113 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3114 { 3115 struct intel_engine_cs *engine; 3116 struct i915_request *request; 3117 enum intel_engine_id id; 3118 int err = 0; 3119 3120 for_each_engine(engine, dev_priv, id) { 3121 request = i915_gem_reset_prepare_engine(engine); 3122 if (IS_ERR(request)) { 3123 err = PTR_ERR(request); 3124 continue; 3125 } 3126 3127 engine->hangcheck.active_request = request; 3128 } 3129 3130 i915_gem_revoke_fences(dev_priv); 3131 intel_uc_sanitize(dev_priv); 3132 3133 return err; 3134 } 3135 3136 static void engine_skip_context(struct i915_request *request) 3137 { 3138 struct intel_engine_cs *engine = request->engine; 3139 struct i915_gem_context *hung_ctx = request->gem_context; 3140 struct i915_timeline *timeline = request->timeline; 3141 unsigned long flags; 3142 3143 GEM_BUG_ON(timeline == &engine->timeline); 3144 3145 spin_lock_irqsave(&engine->timeline.lock, flags); 3146 spin_lock(&timeline->lock); 3147 3148 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3149 if (request->gem_context == hung_ctx) 3150 i915_request_skip(request, -EIO); 3151 3152 list_for_each_entry(request, &timeline->requests, link) 3153 i915_request_skip(request, -EIO); 3154 3155 spin_unlock(&timeline->lock); 3156 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3157 } 3158 3159 /* Returns the request if it was guilty of the hang */ 3160 static struct i915_request * 3161 i915_gem_reset_request(struct intel_engine_cs *engine, 3162 struct i915_request *request, 3163 bool stalled) 3164 { 3165 /* The guilty request will get skipped on a hung engine. 3166 * 3167 * Users of client default contexts do not rely on logical 3168 * state preserved between batches so it is safe to execute 3169 * queued requests following the hang. Non default contexts 3170 * rely on preserved state, so skipping a batch loses the 3171 * evolution of the state and it needs to be considered corrupted. 3172 * Executing more queued batches on top of corrupted state is 3173 * risky. But we take the risk by trying to advance through 3174 * the queued requests in order to make the client behaviour 3175 * more predictable around resets, by not throwing away random 3176 * amount of batches it has prepared for execution. Sophisticated 3177 * clients can use gem_reset_stats_ioctl and dma fence status 3178 * (exported via sync_file info ioctl on explicit fences) to observe 3179 * when it loses the context state and should rebuild accordingly. 3180 * 3181 * The context ban, and ultimately the client ban, mechanism are safety 3182 * valves if client submission ends up resulting in nothing more than 3183 * subsequent hangs. 3184 */ 3185 3186 if (i915_request_completed(request)) { 3187 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3188 engine->name, request->global_seqno, 3189 request->fence.context, request->fence.seqno, 3190 intel_engine_get_seqno(engine)); 3191 stalled = false; 3192 } 3193 3194 if (stalled) { 3195 i915_gem_context_mark_guilty(request->gem_context); 3196 i915_request_skip(request, -EIO); 3197 3198 /* If this context is now banned, skip all pending requests. */ 3199 if (i915_gem_context_is_banned(request->gem_context)) 3200 engine_skip_context(request); 3201 } else { 3202 /* 3203 * Since this is not the hung engine, it may have advanced 3204 * since the hang declaration. Double check by refinding 3205 * the active request at the time of the reset. 3206 */ 3207 request = i915_gem_find_active_request(engine); 3208 if (request) { 3209 unsigned long flags; 3210 3211 i915_gem_context_mark_innocent(request->gem_context); 3212 dma_fence_set_error(&request->fence, -EAGAIN); 3213 3214 /* Rewind the engine to replay the incomplete rq */ 3215 spin_lock_irqsave(&engine->timeline.lock, flags); 3216 request = list_prev_entry(request, link); 3217 if (&request->link == &engine->timeline.requests) 3218 request = NULL; 3219 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3220 } 3221 } 3222 3223 return request; 3224 } 3225 3226 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3227 struct i915_request *request, 3228 bool stalled) 3229 { 3230 /* 3231 * Make sure this write is visible before we re-enable the interrupt 3232 * handlers on another CPU, as tasklet_enable() resolves to just 3233 * a compiler barrier which is insufficient for our purpose here. 3234 */ 3235 smp_store_mb(engine->irq_posted, 0); 3236 3237 if (request) 3238 request = i915_gem_reset_request(engine, request, stalled); 3239 3240 /* Setup the CS to resume from the breadcrumb of the hung request */ 3241 engine->reset.reset(engine, request); 3242 } 3243 3244 void i915_gem_reset(struct drm_i915_private *dev_priv, 3245 unsigned int stalled_mask) 3246 { 3247 struct intel_engine_cs *engine; 3248 enum intel_engine_id id; 3249 3250 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3251 3252 i915_retire_requests(dev_priv); 3253 3254 for_each_engine(engine, dev_priv, id) { 3255 struct intel_context *ce; 3256 3257 i915_gem_reset_engine(engine, 3258 engine->hangcheck.active_request, 3259 stalled_mask & ENGINE_MASK(id)); 3260 ce = fetch_and_zero(&engine->last_retired_context); 3261 if (ce) 3262 intel_context_unpin(ce); 3263 3264 /* 3265 * Ostensibily, we always want a context loaded for powersaving, 3266 * so if the engine is idle after the reset, send a request 3267 * to load our scratch kernel_context. 3268 * 3269 * More mysteriously, if we leave the engine idle after a reset, 3270 * the next userspace batch may hang, with what appears to be 3271 * an incoherent read by the CS (presumably stale TLB). An 3272 * empty request appears sufficient to paper over the glitch. 3273 */ 3274 if (intel_engine_is_idle(engine)) { 3275 struct i915_request *rq; 3276 3277 rq = i915_request_alloc(engine, 3278 dev_priv->kernel_context); 3279 if (!IS_ERR(rq)) 3280 i915_request_add(rq); 3281 } 3282 } 3283 3284 i915_gem_restore_fences(dev_priv); 3285 } 3286 3287 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3288 { 3289 engine->reset.finish(engine); 3290 3291 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3292 } 3293 3294 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3295 { 3296 struct intel_engine_cs *engine; 3297 enum intel_engine_id id; 3298 3299 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3300 3301 for_each_engine(engine, dev_priv, id) { 3302 engine->hangcheck.active_request = NULL; 3303 i915_gem_reset_finish_engine(engine); 3304 } 3305 } 3306 3307 static void nop_submit_request(struct i915_request *request) 3308 { 3309 unsigned long flags; 3310 3311 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3312 request->engine->name, 3313 request->fence.context, request->fence.seqno); 3314 dma_fence_set_error(&request->fence, -EIO); 3315 3316 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3317 __i915_request_submit(request); 3318 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3319 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3320 } 3321 3322 void i915_gem_set_wedged(struct drm_i915_private *i915) 3323 { 3324 struct intel_engine_cs *engine; 3325 enum intel_engine_id id; 3326 3327 GEM_TRACE("start\n"); 3328 3329 if (GEM_SHOW_DEBUG()) { 3330 struct drm_printer p = drm_debug_printer(__func__); 3331 3332 for_each_engine(engine, i915, id) 3333 intel_engine_dump(engine, &p, "%s\n", engine->name); 3334 } 3335 3336 if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) 3337 goto out; 3338 3339 /* 3340 * First, stop submission to hw, but do not yet complete requests by 3341 * rolling the global seqno forward (since this would complete requests 3342 * for which we haven't set the fence error to EIO yet). 3343 */ 3344 for_each_engine(engine, i915, id) 3345 i915_gem_reset_prepare_engine(engine); 3346 3347 /* Even if the GPU reset fails, it should still stop the engines */ 3348 if (INTEL_GEN(i915) >= 5) 3349 intel_gpu_reset(i915, ALL_ENGINES); 3350 3351 for_each_engine(engine, i915, id) { 3352 engine->submit_request = nop_submit_request; 3353 engine->schedule = NULL; 3354 } 3355 i915->caps.scheduler = 0; 3356 3357 /* 3358 * Make sure no request can slip through without getting completed by 3359 * either this call here to intel_engine_init_global_seqno, or the one 3360 * in nop_submit_request. 3361 */ 3362 synchronize_rcu(); 3363 3364 /* Mark all executing requests as skipped */ 3365 for_each_engine(engine, i915, id) 3366 engine->cancel_requests(engine); 3367 3368 for_each_engine(engine, i915, id) { 3369 i915_gem_reset_finish_engine(engine); 3370 intel_engine_wakeup(engine); 3371 } 3372 3373 out: 3374 GEM_TRACE("end\n"); 3375 3376 wake_up_all(&i915->gpu_error.reset_queue); 3377 } 3378 3379 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3380 { 3381 struct i915_timeline *tl; 3382 3383 lockdep_assert_held(&i915->drm.struct_mutex); 3384 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3385 return true; 3386 3387 GEM_TRACE("start\n"); 3388 3389 /* 3390 * Before unwedging, make sure that all pending operations 3391 * are flushed and errored out - we may have requests waiting upon 3392 * third party fences. We marked all inflight requests as EIO, and 3393 * every execbuf since returned EIO, for consistency we want all 3394 * the currently pending requests to also be marked as EIO, which 3395 * is done inside our nop_submit_request - and so we must wait. 3396 * 3397 * No more can be submitted until we reset the wedged bit. 3398 */ 3399 list_for_each_entry(tl, &i915->gt.timelines, link) { 3400 struct i915_request *rq; 3401 3402 rq = i915_gem_active_peek(&tl->last_request, 3403 &i915->drm.struct_mutex); 3404 if (!rq) 3405 continue; 3406 3407 /* 3408 * We can't use our normal waiter as we want to 3409 * avoid recursively trying to handle the current 3410 * reset. The basic dma_fence_default_wait() installs 3411 * a callback for dma_fence_signal(), which is 3412 * triggered by our nop handler (indirectly, the 3413 * callback enables the signaler thread which is 3414 * woken by the nop_submit_request() advancing the seqno 3415 * and when the seqno passes the fence, the signaler 3416 * then signals the fence waking us up). 3417 */ 3418 if (dma_fence_default_wait(&rq->fence, true, 3419 MAX_SCHEDULE_TIMEOUT) < 0) 3420 return false; 3421 } 3422 i915_retire_requests(i915); 3423 GEM_BUG_ON(i915->gt.active_requests); 3424 3425 if (!intel_gpu_reset(i915, ALL_ENGINES)) 3426 intel_engines_sanitize(i915); 3427 3428 /* 3429 * Undo nop_submit_request. We prevent all new i915 requests from 3430 * being queued (by disallowing execbuf whilst wedged) so having 3431 * waited for all active requests above, we know the system is idle 3432 * and do not have to worry about a thread being inside 3433 * engine->submit_request() as we swap over. So unlike installing 3434 * the nop_submit_request on reset, we can do this from normal 3435 * context and do not require stop_machine(). 3436 */ 3437 intel_engines_reset_default_submission(i915); 3438 i915_gem_contexts_lost(i915); 3439 3440 GEM_TRACE("end\n"); 3441 3442 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3443 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3444 3445 return true; 3446 } 3447 3448 static void 3449 i915_gem_retire_work_handler(struct work_struct *work) 3450 { 3451 struct drm_i915_private *dev_priv = 3452 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3453 struct drm_device *dev = &dev_priv->drm; 3454 3455 /* Come back later if the device is busy... */ 3456 if (mutex_trylock(&dev->struct_mutex)) { 3457 i915_retire_requests(dev_priv); 3458 mutex_unlock(&dev->struct_mutex); 3459 } 3460 3461 /* 3462 * Keep the retire handler running until we are finally idle. 3463 * We do not need to do this test under locking as in the worst-case 3464 * we queue the retire worker once too often. 3465 */ 3466 if (READ_ONCE(dev_priv->gt.awake)) 3467 queue_delayed_work(dev_priv->wq, 3468 &dev_priv->gt.retire_work, 3469 round_jiffies_up_relative(HZ)); 3470 } 3471 3472 static void shrink_caches(struct drm_i915_private *i915) 3473 { 3474 /* 3475 * kmem_cache_shrink() discards empty slabs and reorders partially 3476 * filled slabs to prioritise allocating from the mostly full slabs, 3477 * with the aim of reducing fragmentation. 3478 */ 3479 kmem_cache_shrink(i915->priorities); 3480 kmem_cache_shrink(i915->dependencies); 3481 kmem_cache_shrink(i915->requests); 3482 kmem_cache_shrink(i915->luts); 3483 kmem_cache_shrink(i915->vmas); 3484 kmem_cache_shrink(i915->objects); 3485 } 3486 3487 struct sleep_rcu_work { 3488 union { 3489 struct rcu_head rcu; 3490 struct work_struct work; 3491 }; 3492 struct drm_i915_private *i915; 3493 unsigned int epoch; 3494 }; 3495 3496 static inline bool 3497 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3498 { 3499 /* 3500 * There is a small chance that the epoch wrapped since we started 3501 * sleeping. If we assume that epoch is at least a u32, then it will 3502 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3503 */ 3504 return epoch == READ_ONCE(i915->gt.epoch); 3505 } 3506 3507 static void __sleep_work(struct work_struct *work) 3508 { 3509 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3510 struct drm_i915_private *i915 = s->i915; 3511 unsigned int epoch = s->epoch; 3512 3513 kfree(s); 3514 if (same_epoch(i915, epoch)) 3515 shrink_caches(i915); 3516 } 3517 3518 static void __sleep_rcu(struct rcu_head *rcu) 3519 { 3520 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3521 struct drm_i915_private *i915 = s->i915; 3522 3523 destroy_rcu_head(&s->rcu); 3524 3525 if (same_epoch(i915, s->epoch)) { 3526 INIT_WORK(&s->work, __sleep_work); 3527 queue_work(i915->wq, &s->work); 3528 } else { 3529 kfree(s); 3530 } 3531 } 3532 3533 static inline bool 3534 new_requests_since_last_retire(const struct drm_i915_private *i915) 3535 { 3536 return (READ_ONCE(i915->gt.active_requests) || 3537 work_pending(&i915->gt.idle_work.work)); 3538 } 3539 3540 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3541 { 3542 struct intel_engine_cs *engine; 3543 enum intel_engine_id id; 3544 3545 if (i915_terminally_wedged(&i915->gpu_error)) 3546 return; 3547 3548 GEM_BUG_ON(i915->gt.active_requests); 3549 for_each_engine(engine, i915, id) { 3550 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3551 GEM_BUG_ON(engine->last_retired_context != 3552 to_intel_context(i915->kernel_context, engine)); 3553 } 3554 } 3555 3556 static void 3557 i915_gem_idle_work_handler(struct work_struct *work) 3558 { 3559 struct drm_i915_private *dev_priv = 3560 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3561 unsigned int epoch = I915_EPOCH_INVALID; 3562 bool rearm_hangcheck; 3563 3564 if (!READ_ONCE(dev_priv->gt.awake)) 3565 return; 3566 3567 if (READ_ONCE(dev_priv->gt.active_requests)) 3568 return; 3569 3570 /* 3571 * Flush out the last user context, leaving only the pinned 3572 * kernel context resident. When we are idling on the kernel_context, 3573 * no more new requests (with a context switch) are emitted and we 3574 * can finally rest. A consequence is that the idle work handler is 3575 * always called at least twice before idling (and if the system is 3576 * idle that implies a round trip through the retire worker). 3577 */ 3578 mutex_lock(&dev_priv->drm.struct_mutex); 3579 i915_gem_switch_to_kernel_context(dev_priv); 3580 mutex_unlock(&dev_priv->drm.struct_mutex); 3581 3582 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3583 READ_ONCE(dev_priv->gt.active_requests)); 3584 3585 /* 3586 * Wait for last execlists context complete, but bail out in case a 3587 * new request is submitted. As we don't trust the hardware, we 3588 * continue on if the wait times out. This is necessary to allow 3589 * the machine to suspend even if the hardware dies, and we will 3590 * try to recover in resume (after depriving the hardware of power, 3591 * it may be in a better mmod). 3592 */ 3593 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3594 intel_engines_are_idle(dev_priv), 3595 I915_IDLE_ENGINES_TIMEOUT * 1000, 3596 10, 500); 3597 3598 rearm_hangcheck = 3599 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3600 3601 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3602 /* Currently busy, come back later */ 3603 mod_delayed_work(dev_priv->wq, 3604 &dev_priv->gt.idle_work, 3605 msecs_to_jiffies(50)); 3606 goto out_rearm; 3607 } 3608 3609 /* 3610 * New request retired after this work handler started, extend active 3611 * period until next instance of the work. 3612 */ 3613 if (new_requests_since_last_retire(dev_priv)) 3614 goto out_unlock; 3615 3616 epoch = __i915_gem_park(dev_priv); 3617 3618 assert_kernel_context_is_current(dev_priv); 3619 3620 rearm_hangcheck = false; 3621 out_unlock: 3622 mutex_unlock(&dev_priv->drm.struct_mutex); 3623 3624 out_rearm: 3625 if (rearm_hangcheck) { 3626 GEM_BUG_ON(!dev_priv->gt.awake); 3627 i915_queue_hangcheck(dev_priv); 3628 } 3629 3630 /* 3631 * When we are idle, it is an opportune time to reap our caches. 3632 * However, we have many objects that utilise RCU and the ordered 3633 * i915->wq that this work is executing on. To try and flush any 3634 * pending frees now we are idle, we first wait for an RCU grace 3635 * period, and then queue a task (that will run last on the wq) to 3636 * shrink and re-optimize the caches. 3637 */ 3638 if (same_epoch(dev_priv, epoch)) { 3639 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3640 if (s) { 3641 init_rcu_head(&s->rcu); 3642 s->i915 = dev_priv; 3643 s->epoch = epoch; 3644 call_rcu(&s->rcu, __sleep_rcu); 3645 } 3646 } 3647 } 3648 3649 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3650 { 3651 struct drm_i915_private *i915 = to_i915(gem->dev); 3652 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3653 struct drm_i915_file_private *fpriv = file->driver_priv; 3654 struct i915_lut_handle *lut, *ln; 3655 3656 mutex_lock(&i915->drm.struct_mutex); 3657 3658 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3659 struct i915_gem_context *ctx = lut->ctx; 3660 struct i915_vma *vma; 3661 3662 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3663 if (ctx->file_priv != fpriv) 3664 continue; 3665 3666 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3667 GEM_BUG_ON(vma->obj != obj); 3668 3669 /* We allow the process to have multiple handles to the same 3670 * vma, in the same fd namespace, by virtue of flink/open. 3671 */ 3672 GEM_BUG_ON(!vma->open_count); 3673 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3674 i915_vma_close(vma); 3675 3676 list_del(&lut->obj_link); 3677 list_del(&lut->ctx_link); 3678 3679 kmem_cache_free(i915->luts, lut); 3680 __i915_gem_object_release_unless_active(obj); 3681 } 3682 3683 mutex_unlock(&i915->drm.struct_mutex); 3684 } 3685 3686 static unsigned long to_wait_timeout(s64 timeout_ns) 3687 { 3688 if (timeout_ns < 0) 3689 return MAX_SCHEDULE_TIMEOUT; 3690 3691 if (timeout_ns == 0) 3692 return 0; 3693 3694 return nsecs_to_jiffies_timeout(timeout_ns); 3695 } 3696 3697 /** 3698 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3699 * @dev: drm device pointer 3700 * @data: ioctl data blob 3701 * @file: drm file pointer 3702 * 3703 * Returns 0 if successful, else an error is returned with the remaining time in 3704 * the timeout parameter. 3705 * -ETIME: object is still busy after timeout 3706 * -ERESTARTSYS: signal interrupted the wait 3707 * -ENONENT: object doesn't exist 3708 * Also possible, but rare: 3709 * -EAGAIN: incomplete, restart syscall 3710 * -ENOMEM: damn 3711 * -ENODEV: Internal IRQ fail 3712 * -E?: The add request failed 3713 * 3714 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3715 * non-zero timeout parameter the wait ioctl will wait for the given number of 3716 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3717 * without holding struct_mutex the object may become re-busied before this 3718 * function completes. A similar but shorter * race condition exists in the busy 3719 * ioctl 3720 */ 3721 int 3722 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3723 { 3724 struct drm_i915_gem_wait *args = data; 3725 struct drm_i915_gem_object *obj; 3726 ktime_t start; 3727 long ret; 3728 3729 if (args->flags != 0) 3730 return -EINVAL; 3731 3732 obj = i915_gem_object_lookup(file, args->bo_handle); 3733 if (!obj) 3734 return -ENOENT; 3735 3736 start = ktime_get(); 3737 3738 ret = i915_gem_object_wait(obj, 3739 I915_WAIT_INTERRUPTIBLE | 3740 I915_WAIT_PRIORITY | 3741 I915_WAIT_ALL, 3742 to_wait_timeout(args->timeout_ns), 3743 to_rps_client(file)); 3744 3745 if (args->timeout_ns > 0) { 3746 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3747 if (args->timeout_ns < 0) 3748 args->timeout_ns = 0; 3749 3750 /* 3751 * Apparently ktime isn't accurate enough and occasionally has a 3752 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3753 * things up to make the test happy. We allow up to 1 jiffy. 3754 * 3755 * This is a regression from the timespec->ktime conversion. 3756 */ 3757 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3758 args->timeout_ns = 0; 3759 3760 /* Asked to wait beyond the jiffie/scheduler precision? */ 3761 if (ret == -ETIME && args->timeout_ns) 3762 ret = -EAGAIN; 3763 } 3764 3765 i915_gem_object_put(obj); 3766 return ret; 3767 } 3768 3769 static long wait_for_timeline(struct i915_timeline *tl, 3770 unsigned int flags, long timeout) 3771 { 3772 struct i915_request *rq; 3773 3774 rq = i915_gem_active_get_unlocked(&tl->last_request); 3775 if (!rq) 3776 return timeout; 3777 3778 /* 3779 * "Race-to-idle". 3780 * 3781 * Switching to the kernel context is often used a synchronous 3782 * step prior to idling, e.g. in suspend for flushing all 3783 * current operations to memory before sleeping. These we 3784 * want to complete as quickly as possible to avoid prolonged 3785 * stalls, so allow the gpu to boost to maximum clocks. 3786 */ 3787 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3788 gen6_rps_boost(rq, NULL); 3789 3790 timeout = i915_request_wait(rq, flags, timeout); 3791 i915_request_put(rq); 3792 3793 return timeout; 3794 } 3795 3796 static int wait_for_engines(struct drm_i915_private *i915) 3797 { 3798 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3799 dev_err(i915->drm.dev, 3800 "Failed to idle engines, declaring wedged!\n"); 3801 GEM_TRACE_DUMP(); 3802 i915_gem_set_wedged(i915); 3803 return -EIO; 3804 } 3805 3806 return 0; 3807 } 3808 3809 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3810 unsigned int flags, long timeout) 3811 { 3812 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3813 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3814 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3815 3816 /* If the device is asleep, we have no requests outstanding */ 3817 if (!READ_ONCE(i915->gt.awake)) 3818 return 0; 3819 3820 if (flags & I915_WAIT_LOCKED) { 3821 struct i915_timeline *tl; 3822 int err; 3823 3824 lockdep_assert_held(&i915->drm.struct_mutex); 3825 3826 list_for_each_entry(tl, &i915->gt.timelines, link) { 3827 timeout = wait_for_timeline(tl, flags, timeout); 3828 if (timeout < 0) 3829 return timeout; 3830 } 3831 if (GEM_SHOW_DEBUG() && !timeout) { 3832 /* Presume that timeout was non-zero to begin with! */ 3833 dev_warn(&i915->drm.pdev->dev, 3834 "Missed idle-completion interrupt!\n"); 3835 GEM_TRACE_DUMP(); 3836 } 3837 3838 err = wait_for_engines(i915); 3839 if (err) 3840 return err; 3841 3842 i915_retire_requests(i915); 3843 GEM_BUG_ON(i915->gt.active_requests); 3844 } else { 3845 struct intel_engine_cs *engine; 3846 enum intel_engine_id id; 3847 3848 for_each_engine(engine, i915, id) { 3849 struct i915_timeline *tl = &engine->timeline; 3850 3851 timeout = wait_for_timeline(tl, flags, timeout); 3852 if (timeout < 0) 3853 return timeout; 3854 } 3855 } 3856 3857 return 0; 3858 } 3859 3860 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3861 { 3862 /* 3863 * We manually flush the CPU domain so that we can override and 3864 * force the flush for the display, and perform it asyncrhonously. 3865 */ 3866 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3867 if (obj->cache_dirty) 3868 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3869 obj->write_domain = 0; 3870 } 3871 3872 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3873 { 3874 if (!READ_ONCE(obj->pin_global)) 3875 return; 3876 3877 mutex_lock(&obj->base.dev->struct_mutex); 3878 __i915_gem_object_flush_for_display(obj); 3879 mutex_unlock(&obj->base.dev->struct_mutex); 3880 } 3881 3882 /** 3883 * Moves a single object to the WC read, and possibly write domain. 3884 * @obj: object to act on 3885 * @write: ask for write access or read only 3886 * 3887 * This function returns when the move is complete, including waiting on 3888 * flushes to occur. 3889 */ 3890 int 3891 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3892 { 3893 int ret; 3894 3895 lockdep_assert_held(&obj->base.dev->struct_mutex); 3896 3897 ret = i915_gem_object_wait(obj, 3898 I915_WAIT_INTERRUPTIBLE | 3899 I915_WAIT_LOCKED | 3900 (write ? I915_WAIT_ALL : 0), 3901 MAX_SCHEDULE_TIMEOUT, 3902 NULL); 3903 if (ret) 3904 return ret; 3905 3906 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3907 return 0; 3908 3909 /* Flush and acquire obj->pages so that we are coherent through 3910 * direct access in memory with previous cached writes through 3911 * shmemfs and that our cache domain tracking remains valid. 3912 * For example, if the obj->filp was moved to swap without us 3913 * being notified and releasing the pages, we would mistakenly 3914 * continue to assume that the obj remained out of the CPU cached 3915 * domain. 3916 */ 3917 ret = i915_gem_object_pin_pages(obj); 3918 if (ret) 3919 return ret; 3920 3921 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3922 3923 /* Serialise direct access to this object with the barriers for 3924 * coherent writes from the GPU, by effectively invalidating the 3925 * WC domain upon first access. 3926 */ 3927 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3928 mb(); 3929 3930 /* It should now be out of any other write domains, and we can update 3931 * the domain values for our changes. 3932 */ 3933 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3934 obj->read_domains |= I915_GEM_DOMAIN_WC; 3935 if (write) { 3936 obj->read_domains = I915_GEM_DOMAIN_WC; 3937 obj->write_domain = I915_GEM_DOMAIN_WC; 3938 obj->mm.dirty = true; 3939 } 3940 3941 i915_gem_object_unpin_pages(obj); 3942 return 0; 3943 } 3944 3945 /** 3946 * Moves a single object to the GTT read, and possibly write domain. 3947 * @obj: object to act on 3948 * @write: ask for write access or read only 3949 * 3950 * This function returns when the move is complete, including waiting on 3951 * flushes to occur. 3952 */ 3953 int 3954 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3955 { 3956 int ret; 3957 3958 lockdep_assert_held(&obj->base.dev->struct_mutex); 3959 3960 ret = i915_gem_object_wait(obj, 3961 I915_WAIT_INTERRUPTIBLE | 3962 I915_WAIT_LOCKED | 3963 (write ? I915_WAIT_ALL : 0), 3964 MAX_SCHEDULE_TIMEOUT, 3965 NULL); 3966 if (ret) 3967 return ret; 3968 3969 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3970 return 0; 3971 3972 /* Flush and acquire obj->pages so that we are coherent through 3973 * direct access in memory with previous cached writes through 3974 * shmemfs and that our cache domain tracking remains valid. 3975 * For example, if the obj->filp was moved to swap without us 3976 * being notified and releasing the pages, we would mistakenly 3977 * continue to assume that the obj remained out of the CPU cached 3978 * domain. 3979 */ 3980 ret = i915_gem_object_pin_pages(obj); 3981 if (ret) 3982 return ret; 3983 3984 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3985 3986 /* Serialise direct access to this object with the barriers for 3987 * coherent writes from the GPU, by effectively invalidating the 3988 * GTT domain upon first access. 3989 */ 3990 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3991 mb(); 3992 3993 /* It should now be out of any other write domains, and we can update 3994 * the domain values for our changes. 3995 */ 3996 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3997 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3998 if (write) { 3999 obj->read_domains = I915_GEM_DOMAIN_GTT; 4000 obj->write_domain = I915_GEM_DOMAIN_GTT; 4001 obj->mm.dirty = true; 4002 } 4003 4004 i915_gem_object_unpin_pages(obj); 4005 return 0; 4006 } 4007 4008 /** 4009 * Changes the cache-level of an object across all VMA. 4010 * @obj: object to act on 4011 * @cache_level: new cache level to set for the object 4012 * 4013 * After this function returns, the object will be in the new cache-level 4014 * across all GTT and the contents of the backing storage will be coherent, 4015 * with respect to the new cache-level. In order to keep the backing storage 4016 * coherent for all users, we only allow a single cache level to be set 4017 * globally on the object and prevent it from being changed whilst the 4018 * hardware is reading from the object. That is if the object is currently 4019 * on the scanout it will be set to uncached (or equivalent display 4020 * cache coherency) and all non-MOCS GPU access will also be uncached so 4021 * that all direct access to the scanout remains coherent. 4022 */ 4023 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4024 enum i915_cache_level cache_level) 4025 { 4026 struct i915_vma *vma; 4027 int ret; 4028 4029 lockdep_assert_held(&obj->base.dev->struct_mutex); 4030 4031 if (obj->cache_level == cache_level) 4032 return 0; 4033 4034 /* Inspect the list of currently bound VMA and unbind any that would 4035 * be invalid given the new cache-level. This is principally to 4036 * catch the issue of the CS prefetch crossing page boundaries and 4037 * reading an invalid PTE on older architectures. 4038 */ 4039 restart: 4040 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4041 if (!drm_mm_node_allocated(&vma->node)) 4042 continue; 4043 4044 if (i915_vma_is_pinned(vma)) { 4045 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4046 return -EBUSY; 4047 } 4048 4049 if (!i915_vma_is_closed(vma) && 4050 i915_gem_valid_gtt_space(vma, cache_level)) 4051 continue; 4052 4053 ret = i915_vma_unbind(vma); 4054 if (ret) 4055 return ret; 4056 4057 /* As unbinding may affect other elements in the 4058 * obj->vma_list (due to side-effects from retiring 4059 * an active vma), play safe and restart the iterator. 4060 */ 4061 goto restart; 4062 } 4063 4064 /* We can reuse the existing drm_mm nodes but need to change the 4065 * cache-level on the PTE. We could simply unbind them all and 4066 * rebind with the correct cache-level on next use. However since 4067 * we already have a valid slot, dma mapping, pages etc, we may as 4068 * rewrite the PTE in the belief that doing so tramples upon less 4069 * state and so involves less work. 4070 */ 4071 if (obj->bind_count) { 4072 /* Before we change the PTE, the GPU must not be accessing it. 4073 * If we wait upon the object, we know that all the bound 4074 * VMA are no longer active. 4075 */ 4076 ret = i915_gem_object_wait(obj, 4077 I915_WAIT_INTERRUPTIBLE | 4078 I915_WAIT_LOCKED | 4079 I915_WAIT_ALL, 4080 MAX_SCHEDULE_TIMEOUT, 4081 NULL); 4082 if (ret) 4083 return ret; 4084 4085 if (!HAS_LLC(to_i915(obj->base.dev)) && 4086 cache_level != I915_CACHE_NONE) { 4087 /* Access to snoopable pages through the GTT is 4088 * incoherent and on some machines causes a hard 4089 * lockup. Relinquish the CPU mmaping to force 4090 * userspace to refault in the pages and we can 4091 * then double check if the GTT mapping is still 4092 * valid for that pointer access. 4093 */ 4094 i915_gem_release_mmap(obj); 4095 4096 /* As we no longer need a fence for GTT access, 4097 * we can relinquish it now (and so prevent having 4098 * to steal a fence from someone else on the next 4099 * fence request). Note GPU activity would have 4100 * dropped the fence as all snoopable access is 4101 * supposed to be linear. 4102 */ 4103 for_each_ggtt_vma(vma, obj) { 4104 ret = i915_vma_put_fence(vma); 4105 if (ret) 4106 return ret; 4107 } 4108 } else { 4109 /* We either have incoherent backing store and 4110 * so no GTT access or the architecture is fully 4111 * coherent. In such cases, existing GTT mmaps 4112 * ignore the cache bit in the PTE and we can 4113 * rewrite it without confusing the GPU or having 4114 * to force userspace to fault back in its mmaps. 4115 */ 4116 } 4117 4118 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4119 if (!drm_mm_node_allocated(&vma->node)) 4120 continue; 4121 4122 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4123 if (ret) 4124 return ret; 4125 } 4126 } 4127 4128 list_for_each_entry(vma, &obj->vma_list, obj_link) 4129 vma->node.color = cache_level; 4130 i915_gem_object_set_cache_coherency(obj, cache_level); 4131 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4132 4133 return 0; 4134 } 4135 4136 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4137 struct drm_file *file) 4138 { 4139 struct drm_i915_gem_caching *args = data; 4140 struct drm_i915_gem_object *obj; 4141 int err = 0; 4142 4143 rcu_read_lock(); 4144 obj = i915_gem_object_lookup_rcu(file, args->handle); 4145 if (!obj) { 4146 err = -ENOENT; 4147 goto out; 4148 } 4149 4150 switch (obj->cache_level) { 4151 case I915_CACHE_LLC: 4152 case I915_CACHE_L3_LLC: 4153 args->caching = I915_CACHING_CACHED; 4154 break; 4155 4156 case I915_CACHE_WT: 4157 args->caching = I915_CACHING_DISPLAY; 4158 break; 4159 4160 default: 4161 args->caching = I915_CACHING_NONE; 4162 break; 4163 } 4164 out: 4165 rcu_read_unlock(); 4166 return err; 4167 } 4168 4169 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4170 struct drm_file *file) 4171 { 4172 struct drm_i915_private *i915 = to_i915(dev); 4173 struct drm_i915_gem_caching *args = data; 4174 struct drm_i915_gem_object *obj; 4175 enum i915_cache_level level; 4176 int ret = 0; 4177 4178 switch (args->caching) { 4179 case I915_CACHING_NONE: 4180 level = I915_CACHE_NONE; 4181 break; 4182 case I915_CACHING_CACHED: 4183 /* 4184 * Due to a HW issue on BXT A stepping, GPU stores via a 4185 * snooped mapping may leave stale data in a corresponding CPU 4186 * cacheline, whereas normally such cachelines would get 4187 * invalidated. 4188 */ 4189 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4190 return -ENODEV; 4191 4192 level = I915_CACHE_LLC; 4193 break; 4194 case I915_CACHING_DISPLAY: 4195 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4196 break; 4197 default: 4198 return -EINVAL; 4199 } 4200 4201 obj = i915_gem_object_lookup(file, args->handle); 4202 if (!obj) 4203 return -ENOENT; 4204 4205 /* 4206 * The caching mode of proxy object is handled by its generator, and 4207 * not allowed to be changed by userspace. 4208 */ 4209 if (i915_gem_object_is_proxy(obj)) { 4210 ret = -ENXIO; 4211 goto out; 4212 } 4213 4214 if (obj->cache_level == level) 4215 goto out; 4216 4217 ret = i915_gem_object_wait(obj, 4218 I915_WAIT_INTERRUPTIBLE, 4219 MAX_SCHEDULE_TIMEOUT, 4220 to_rps_client(file)); 4221 if (ret) 4222 goto out; 4223 4224 ret = i915_mutex_lock_interruptible(dev); 4225 if (ret) 4226 goto out; 4227 4228 ret = i915_gem_object_set_cache_level(obj, level); 4229 mutex_unlock(&dev->struct_mutex); 4230 4231 out: 4232 i915_gem_object_put(obj); 4233 return ret; 4234 } 4235 4236 /* 4237 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4238 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4239 * (for pageflips). We only flush the caches while preparing the buffer for 4240 * display, the callers are responsible for frontbuffer flush. 4241 */ 4242 struct i915_vma * 4243 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4244 u32 alignment, 4245 const struct i915_ggtt_view *view, 4246 unsigned int flags) 4247 { 4248 struct i915_vma *vma; 4249 int ret; 4250 4251 lockdep_assert_held(&obj->base.dev->struct_mutex); 4252 4253 /* Mark the global pin early so that we account for the 4254 * display coherency whilst setting up the cache domains. 4255 */ 4256 obj->pin_global++; 4257 4258 /* The display engine is not coherent with the LLC cache on gen6. As 4259 * a result, we make sure that the pinning that is about to occur is 4260 * done with uncached PTEs. This is lowest common denominator for all 4261 * chipsets. 4262 * 4263 * However for gen6+, we could do better by using the GFDT bit instead 4264 * of uncaching, which would allow us to flush all the LLC-cached data 4265 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4266 */ 4267 ret = i915_gem_object_set_cache_level(obj, 4268 HAS_WT(to_i915(obj->base.dev)) ? 4269 I915_CACHE_WT : I915_CACHE_NONE); 4270 if (ret) { 4271 vma = ERR_PTR(ret); 4272 goto err_unpin_global; 4273 } 4274 4275 /* As the user may map the buffer once pinned in the display plane 4276 * (e.g. libkms for the bootup splash), we have to ensure that we 4277 * always use map_and_fenceable for all scanout buffers. However, 4278 * it may simply be too big to fit into mappable, in which case 4279 * put it anyway and hope that userspace can cope (but always first 4280 * try to preserve the existing ABI). 4281 */ 4282 vma = ERR_PTR(-ENOSPC); 4283 if ((flags & PIN_MAPPABLE) == 0 && 4284 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4285 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4286 flags | 4287 PIN_MAPPABLE | 4288 PIN_NONBLOCK); 4289 if (IS_ERR(vma)) 4290 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4291 if (IS_ERR(vma)) 4292 goto err_unpin_global; 4293 4294 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4295 4296 __i915_gem_object_flush_for_display(obj); 4297 4298 /* It should now be out of any other write domains, and we can update 4299 * the domain values for our changes. 4300 */ 4301 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4302 4303 return vma; 4304 4305 err_unpin_global: 4306 obj->pin_global--; 4307 return vma; 4308 } 4309 4310 void 4311 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4312 { 4313 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4314 4315 if (WARN_ON(vma->obj->pin_global == 0)) 4316 return; 4317 4318 if (--vma->obj->pin_global == 0) 4319 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4320 4321 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4322 i915_gem_object_bump_inactive_ggtt(vma->obj); 4323 4324 i915_vma_unpin(vma); 4325 } 4326 4327 /** 4328 * Moves a single object to the CPU read, and possibly write domain. 4329 * @obj: object to act on 4330 * @write: requesting write or read-only access 4331 * 4332 * This function returns when the move is complete, including waiting on 4333 * flushes to occur. 4334 */ 4335 int 4336 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4337 { 4338 int ret; 4339 4340 lockdep_assert_held(&obj->base.dev->struct_mutex); 4341 4342 ret = i915_gem_object_wait(obj, 4343 I915_WAIT_INTERRUPTIBLE | 4344 I915_WAIT_LOCKED | 4345 (write ? I915_WAIT_ALL : 0), 4346 MAX_SCHEDULE_TIMEOUT, 4347 NULL); 4348 if (ret) 4349 return ret; 4350 4351 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4352 4353 /* Flush the CPU cache if it's still invalid. */ 4354 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4355 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4356 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4357 } 4358 4359 /* It should now be out of any other write domains, and we can update 4360 * the domain values for our changes. 4361 */ 4362 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4363 4364 /* If we're writing through the CPU, then the GPU read domains will 4365 * need to be invalidated at next use. 4366 */ 4367 if (write) 4368 __start_cpu_write(obj); 4369 4370 return 0; 4371 } 4372 4373 /* Throttle our rendering by waiting until the ring has completed our requests 4374 * emitted over 20 msec ago. 4375 * 4376 * Note that if we were to use the current jiffies each time around the loop, 4377 * we wouldn't escape the function with any frames outstanding if the time to 4378 * render a frame was over 20ms. 4379 * 4380 * This should get us reasonable parallelism between CPU and GPU but also 4381 * relatively low latency when blocking on a particular request to finish. 4382 */ 4383 static int 4384 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4385 { 4386 struct drm_i915_private *dev_priv = to_i915(dev); 4387 struct drm_i915_file_private *file_priv = file->driver_priv; 4388 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4389 struct i915_request *request, *target = NULL; 4390 long ret; 4391 4392 /* ABI: return -EIO if already wedged */ 4393 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4394 return -EIO; 4395 4396 spin_lock(&file_priv->mm.lock); 4397 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4398 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4399 break; 4400 4401 if (target) { 4402 list_del(&target->client_link); 4403 target->file_priv = NULL; 4404 } 4405 4406 target = request; 4407 } 4408 if (target) 4409 i915_request_get(target); 4410 spin_unlock(&file_priv->mm.lock); 4411 4412 if (target == NULL) 4413 return 0; 4414 4415 ret = i915_request_wait(target, 4416 I915_WAIT_INTERRUPTIBLE, 4417 MAX_SCHEDULE_TIMEOUT); 4418 i915_request_put(target); 4419 4420 return ret < 0 ? ret : 0; 4421 } 4422 4423 struct i915_vma * 4424 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4425 const struct i915_ggtt_view *view, 4426 u64 size, 4427 u64 alignment, 4428 u64 flags) 4429 { 4430 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4431 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4432 struct i915_vma *vma; 4433 int ret; 4434 4435 lockdep_assert_held(&obj->base.dev->struct_mutex); 4436 4437 if (flags & PIN_MAPPABLE && 4438 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4439 /* If the required space is larger than the available 4440 * aperture, we will not able to find a slot for the 4441 * object and unbinding the object now will be in 4442 * vain. Worse, doing so may cause us to ping-pong 4443 * the object in and out of the Global GTT and 4444 * waste a lot of cycles under the mutex. 4445 */ 4446 if (obj->base.size > dev_priv->ggtt.mappable_end) 4447 return ERR_PTR(-E2BIG); 4448 4449 /* If NONBLOCK is set the caller is optimistically 4450 * trying to cache the full object within the mappable 4451 * aperture, and *must* have a fallback in place for 4452 * situations where we cannot bind the object. We 4453 * can be a little more lax here and use the fallback 4454 * more often to avoid costly migrations of ourselves 4455 * and other objects within the aperture. 4456 * 4457 * Half-the-aperture is used as a simple heuristic. 4458 * More interesting would to do search for a free 4459 * block prior to making the commitment to unbind. 4460 * That caters for the self-harm case, and with a 4461 * little more heuristics (e.g. NOFAULT, NOEVICT) 4462 * we could try to minimise harm to others. 4463 */ 4464 if (flags & PIN_NONBLOCK && 4465 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4466 return ERR_PTR(-ENOSPC); 4467 } 4468 4469 vma = i915_vma_instance(obj, vm, view); 4470 if (unlikely(IS_ERR(vma))) 4471 return vma; 4472 4473 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4474 if (flags & PIN_NONBLOCK) { 4475 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4476 return ERR_PTR(-ENOSPC); 4477 4478 if (flags & PIN_MAPPABLE && 4479 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4480 return ERR_PTR(-ENOSPC); 4481 } 4482 4483 WARN(i915_vma_is_pinned(vma), 4484 "bo is already pinned in ggtt with incorrect alignment:" 4485 " offset=%08x, req.alignment=%llx," 4486 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4487 i915_ggtt_offset(vma), alignment, 4488 !!(flags & PIN_MAPPABLE), 4489 i915_vma_is_map_and_fenceable(vma)); 4490 ret = i915_vma_unbind(vma); 4491 if (ret) 4492 return ERR_PTR(ret); 4493 } 4494 4495 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4496 if (ret) 4497 return ERR_PTR(ret); 4498 4499 return vma; 4500 } 4501 4502 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4503 { 4504 /* Note that we could alias engines in the execbuf API, but 4505 * that would be very unwise as it prevents userspace from 4506 * fine control over engine selection. Ahem. 4507 * 4508 * This should be something like EXEC_MAX_ENGINE instead of 4509 * I915_NUM_ENGINES. 4510 */ 4511 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4512 return 0x10000 << id; 4513 } 4514 4515 static __always_inline unsigned int __busy_write_id(unsigned int id) 4516 { 4517 /* The uABI guarantees an active writer is also amongst the read 4518 * engines. This would be true if we accessed the activity tracking 4519 * under the lock, but as we perform the lookup of the object and 4520 * its activity locklessly we can not guarantee that the last_write 4521 * being active implies that we have set the same engine flag from 4522 * last_read - hence we always set both read and write busy for 4523 * last_write. 4524 */ 4525 return id | __busy_read_flag(id); 4526 } 4527 4528 static __always_inline unsigned int 4529 __busy_set_if_active(const struct dma_fence *fence, 4530 unsigned int (*flag)(unsigned int id)) 4531 { 4532 struct i915_request *rq; 4533 4534 /* We have to check the current hw status of the fence as the uABI 4535 * guarantees forward progress. We could rely on the idle worker 4536 * to eventually flush us, but to minimise latency just ask the 4537 * hardware. 4538 * 4539 * Note we only report on the status of native fences. 4540 */ 4541 if (!dma_fence_is_i915(fence)) 4542 return 0; 4543 4544 /* opencode to_request() in order to avoid const warnings */ 4545 rq = container_of(fence, struct i915_request, fence); 4546 if (i915_request_completed(rq)) 4547 return 0; 4548 4549 return flag(rq->engine->uabi_id); 4550 } 4551 4552 static __always_inline unsigned int 4553 busy_check_reader(const struct dma_fence *fence) 4554 { 4555 return __busy_set_if_active(fence, __busy_read_flag); 4556 } 4557 4558 static __always_inline unsigned int 4559 busy_check_writer(const struct dma_fence *fence) 4560 { 4561 if (!fence) 4562 return 0; 4563 4564 return __busy_set_if_active(fence, __busy_write_id); 4565 } 4566 4567 int 4568 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4569 struct drm_file *file) 4570 { 4571 struct drm_i915_gem_busy *args = data; 4572 struct drm_i915_gem_object *obj; 4573 struct reservation_object_list *list; 4574 unsigned int seq; 4575 int err; 4576 4577 err = -ENOENT; 4578 rcu_read_lock(); 4579 obj = i915_gem_object_lookup_rcu(file, args->handle); 4580 if (!obj) 4581 goto out; 4582 4583 /* A discrepancy here is that we do not report the status of 4584 * non-i915 fences, i.e. even though we may report the object as idle, 4585 * a call to set-domain may still stall waiting for foreign rendering. 4586 * This also means that wait-ioctl may report an object as busy, 4587 * where busy-ioctl considers it idle. 4588 * 4589 * We trade the ability to warn of foreign fences to report on which 4590 * i915 engines are active for the object. 4591 * 4592 * Alternatively, we can trade that extra information on read/write 4593 * activity with 4594 * args->busy = 4595 * !reservation_object_test_signaled_rcu(obj->resv, true); 4596 * to report the overall busyness. This is what the wait-ioctl does. 4597 * 4598 */ 4599 retry: 4600 seq = raw_read_seqcount(&obj->resv->seq); 4601 4602 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4603 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4604 4605 /* Translate shared fences to READ set of engines */ 4606 list = rcu_dereference(obj->resv->fence); 4607 if (list) { 4608 unsigned int shared_count = list->shared_count, i; 4609 4610 for (i = 0; i < shared_count; ++i) { 4611 struct dma_fence *fence = 4612 rcu_dereference(list->shared[i]); 4613 4614 args->busy |= busy_check_reader(fence); 4615 } 4616 } 4617 4618 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4619 goto retry; 4620 4621 err = 0; 4622 out: 4623 rcu_read_unlock(); 4624 return err; 4625 } 4626 4627 int 4628 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4629 struct drm_file *file_priv) 4630 { 4631 return i915_gem_ring_throttle(dev, file_priv); 4632 } 4633 4634 int 4635 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4636 struct drm_file *file_priv) 4637 { 4638 struct drm_i915_private *dev_priv = to_i915(dev); 4639 struct drm_i915_gem_madvise *args = data; 4640 struct drm_i915_gem_object *obj; 4641 int err; 4642 4643 switch (args->madv) { 4644 case I915_MADV_DONTNEED: 4645 case I915_MADV_WILLNEED: 4646 break; 4647 default: 4648 return -EINVAL; 4649 } 4650 4651 obj = i915_gem_object_lookup(file_priv, args->handle); 4652 if (!obj) 4653 return -ENOENT; 4654 4655 err = mutex_lock_interruptible(&obj->mm.lock); 4656 if (err) 4657 goto out; 4658 4659 if (i915_gem_object_has_pages(obj) && 4660 i915_gem_object_is_tiled(obj) && 4661 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4662 if (obj->mm.madv == I915_MADV_WILLNEED) { 4663 GEM_BUG_ON(!obj->mm.quirked); 4664 __i915_gem_object_unpin_pages(obj); 4665 obj->mm.quirked = false; 4666 } 4667 if (args->madv == I915_MADV_WILLNEED) { 4668 GEM_BUG_ON(obj->mm.quirked); 4669 __i915_gem_object_pin_pages(obj); 4670 obj->mm.quirked = true; 4671 } 4672 } 4673 4674 if (obj->mm.madv != __I915_MADV_PURGED) 4675 obj->mm.madv = args->madv; 4676 4677 /* if the object is no longer attached, discard its backing storage */ 4678 if (obj->mm.madv == I915_MADV_DONTNEED && 4679 !i915_gem_object_has_pages(obj)) 4680 i915_gem_object_truncate(obj); 4681 4682 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4683 mutex_unlock(&obj->mm.lock); 4684 4685 out: 4686 i915_gem_object_put(obj); 4687 return err; 4688 } 4689 4690 static void 4691 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4692 { 4693 struct drm_i915_gem_object *obj = 4694 container_of(active, typeof(*obj), frontbuffer_write); 4695 4696 intel_fb_obj_flush(obj, ORIGIN_CS); 4697 } 4698 4699 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4700 const struct drm_i915_gem_object_ops *ops) 4701 { 4702 mutex_init(&obj->mm.lock); 4703 4704 INIT_LIST_HEAD(&obj->vma_list); 4705 INIT_LIST_HEAD(&obj->lut_list); 4706 INIT_LIST_HEAD(&obj->batch_pool_link); 4707 4708 init_rcu_head(&obj->rcu); 4709 4710 obj->ops = ops; 4711 4712 reservation_object_init(&obj->__builtin_resv); 4713 obj->resv = &obj->__builtin_resv; 4714 4715 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4716 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4717 4718 obj->mm.madv = I915_MADV_WILLNEED; 4719 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4720 mutex_init(&obj->mm.get_page.lock); 4721 4722 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4723 } 4724 4725 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4726 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4727 I915_GEM_OBJECT_IS_SHRINKABLE, 4728 4729 .get_pages = i915_gem_object_get_pages_gtt, 4730 .put_pages = i915_gem_object_put_pages_gtt, 4731 4732 .pwrite = i915_gem_object_pwrite_gtt, 4733 }; 4734 4735 static int i915_gem_object_create_shmem(struct drm_device *dev, 4736 struct drm_gem_object *obj, 4737 size_t size) 4738 { 4739 struct drm_i915_private *i915 = to_i915(dev); 4740 unsigned long flags = VM_NORESERVE; 4741 struct file *filp; 4742 4743 drm_gem_private_object_init(dev, obj, size); 4744 4745 if (i915->mm.gemfs) 4746 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4747 flags); 4748 else 4749 filp = shmem_file_setup("i915", size, flags); 4750 4751 if (IS_ERR(filp)) 4752 return PTR_ERR(filp); 4753 4754 obj->filp = filp; 4755 4756 return 0; 4757 } 4758 4759 struct drm_i915_gem_object * 4760 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4761 { 4762 struct drm_i915_gem_object *obj; 4763 struct address_space *mapping; 4764 unsigned int cache_level; 4765 gfp_t mask; 4766 int ret; 4767 4768 /* There is a prevalence of the assumption that we fit the object's 4769 * page count inside a 32bit _signed_ variable. Let's document this and 4770 * catch if we ever need to fix it. In the meantime, if you do spot 4771 * such a local variable, please consider fixing! 4772 */ 4773 if (size >> PAGE_SHIFT > INT_MAX) 4774 return ERR_PTR(-E2BIG); 4775 4776 if (overflows_type(size, obj->base.size)) 4777 return ERR_PTR(-E2BIG); 4778 4779 obj = i915_gem_object_alloc(dev_priv); 4780 if (obj == NULL) 4781 return ERR_PTR(-ENOMEM); 4782 4783 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4784 if (ret) 4785 goto fail; 4786 4787 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4788 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4789 /* 965gm cannot relocate objects above 4GiB. */ 4790 mask &= ~__GFP_HIGHMEM; 4791 mask |= __GFP_DMA32; 4792 } 4793 4794 mapping = obj->base.filp->f_mapping; 4795 mapping_set_gfp_mask(mapping, mask); 4796 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4797 4798 i915_gem_object_init(obj, &i915_gem_object_ops); 4799 4800 obj->write_domain = I915_GEM_DOMAIN_CPU; 4801 obj->read_domains = I915_GEM_DOMAIN_CPU; 4802 4803 if (HAS_LLC(dev_priv)) 4804 /* On some devices, we can have the GPU use the LLC (the CPU 4805 * cache) for about a 10% performance improvement 4806 * compared to uncached. Graphics requests other than 4807 * display scanout are coherent with the CPU in 4808 * accessing this cache. This means in this mode we 4809 * don't need to clflush on the CPU side, and on the 4810 * GPU side we only need to flush internal caches to 4811 * get data visible to the CPU. 4812 * 4813 * However, we maintain the display planes as UC, and so 4814 * need to rebind when first used as such. 4815 */ 4816 cache_level = I915_CACHE_LLC; 4817 else 4818 cache_level = I915_CACHE_NONE; 4819 4820 i915_gem_object_set_cache_coherency(obj, cache_level); 4821 4822 trace_i915_gem_object_create(obj); 4823 4824 return obj; 4825 4826 fail: 4827 i915_gem_object_free(obj); 4828 return ERR_PTR(ret); 4829 } 4830 4831 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4832 { 4833 /* If we are the last user of the backing storage (be it shmemfs 4834 * pages or stolen etc), we know that the pages are going to be 4835 * immediately released. In this case, we can then skip copying 4836 * back the contents from the GPU. 4837 */ 4838 4839 if (obj->mm.madv != I915_MADV_WILLNEED) 4840 return false; 4841 4842 if (obj->base.filp == NULL) 4843 return true; 4844 4845 /* At first glance, this looks racy, but then again so would be 4846 * userspace racing mmap against close. However, the first external 4847 * reference to the filp can only be obtained through the 4848 * i915_gem_mmap_ioctl() which safeguards us against the user 4849 * acquiring such a reference whilst we are in the middle of 4850 * freeing the object. 4851 */ 4852 return atomic_long_read(&obj->base.filp->f_count) == 1; 4853 } 4854 4855 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4856 struct llist_node *freed) 4857 { 4858 struct drm_i915_gem_object *obj, *on; 4859 4860 intel_runtime_pm_get(i915); 4861 llist_for_each_entry_safe(obj, on, freed, freed) { 4862 struct i915_vma *vma, *vn; 4863 4864 trace_i915_gem_object_destroy(obj); 4865 4866 mutex_lock(&i915->drm.struct_mutex); 4867 4868 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4869 list_for_each_entry_safe(vma, vn, 4870 &obj->vma_list, obj_link) { 4871 GEM_BUG_ON(i915_vma_is_active(vma)); 4872 vma->flags &= ~I915_VMA_PIN_MASK; 4873 i915_vma_destroy(vma); 4874 } 4875 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4876 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4877 4878 /* This serializes freeing with the shrinker. Since the free 4879 * is delayed, first by RCU then by the workqueue, we want the 4880 * shrinker to be able to free pages of unreferenced objects, 4881 * or else we may oom whilst there are plenty of deferred 4882 * freed objects. 4883 */ 4884 if (i915_gem_object_has_pages(obj)) { 4885 spin_lock(&i915->mm.obj_lock); 4886 list_del_init(&obj->mm.link); 4887 spin_unlock(&i915->mm.obj_lock); 4888 } 4889 4890 mutex_unlock(&i915->drm.struct_mutex); 4891 4892 GEM_BUG_ON(obj->bind_count); 4893 GEM_BUG_ON(obj->userfault_count); 4894 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4895 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4896 4897 if (obj->ops->release) 4898 obj->ops->release(obj); 4899 4900 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4901 atomic_set(&obj->mm.pages_pin_count, 0); 4902 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4903 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4904 4905 if (obj->base.import_attach) 4906 drm_prime_gem_destroy(&obj->base, NULL); 4907 4908 reservation_object_fini(&obj->__builtin_resv); 4909 drm_gem_object_release(&obj->base); 4910 i915_gem_info_remove_obj(i915, obj->base.size); 4911 4912 kfree(obj->bit_17); 4913 i915_gem_object_free(obj); 4914 4915 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4916 atomic_dec(&i915->mm.free_count); 4917 4918 if (on) 4919 cond_resched(); 4920 } 4921 intel_runtime_pm_put(i915); 4922 } 4923 4924 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4925 { 4926 struct llist_node *freed; 4927 4928 /* Free the oldest, most stale object to keep the free_list short */ 4929 freed = NULL; 4930 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4931 /* Only one consumer of llist_del_first() allowed */ 4932 spin_lock(&i915->mm.free_lock); 4933 freed = llist_del_first(&i915->mm.free_list); 4934 spin_unlock(&i915->mm.free_lock); 4935 } 4936 if (unlikely(freed)) { 4937 freed->next = NULL; 4938 __i915_gem_free_objects(i915, freed); 4939 } 4940 } 4941 4942 static void __i915_gem_free_work(struct work_struct *work) 4943 { 4944 struct drm_i915_private *i915 = 4945 container_of(work, struct drm_i915_private, mm.free_work); 4946 struct llist_node *freed; 4947 4948 /* 4949 * All file-owned VMA should have been released by this point through 4950 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4951 * However, the object may also be bound into the global GTT (e.g. 4952 * older GPUs without per-process support, or for direct access through 4953 * the GTT either for the user or for scanout). Those VMA still need to 4954 * unbound now. 4955 */ 4956 4957 spin_lock(&i915->mm.free_lock); 4958 while ((freed = llist_del_all(&i915->mm.free_list))) { 4959 spin_unlock(&i915->mm.free_lock); 4960 4961 __i915_gem_free_objects(i915, freed); 4962 if (need_resched()) 4963 return; 4964 4965 spin_lock(&i915->mm.free_lock); 4966 } 4967 spin_unlock(&i915->mm.free_lock); 4968 } 4969 4970 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4971 { 4972 struct drm_i915_gem_object *obj = 4973 container_of(head, typeof(*obj), rcu); 4974 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4975 4976 /* 4977 * We reuse obj->rcu for the freed list, so we had better not treat 4978 * it like a rcu_head from this point forwards. And we expect all 4979 * objects to be freed via this path. 4980 */ 4981 destroy_rcu_head(&obj->rcu); 4982 4983 /* 4984 * Since we require blocking on struct_mutex to unbind the freed 4985 * object from the GPU before releasing resources back to the 4986 * system, we can not do that directly from the RCU callback (which may 4987 * be a softirq context), but must instead then defer that work onto a 4988 * kthread. We use the RCU callback rather than move the freed object 4989 * directly onto the work queue so that we can mix between using the 4990 * worker and performing frees directly from subsequent allocations for 4991 * crude but effective memory throttling. 4992 */ 4993 if (llist_add(&obj->freed, &i915->mm.free_list)) 4994 queue_work(i915->wq, &i915->mm.free_work); 4995 } 4996 4997 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4998 { 4999 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 5000 5001 if (obj->mm.quirked) 5002 __i915_gem_object_unpin_pages(obj); 5003 5004 if (discard_backing_storage(obj)) 5005 obj->mm.madv = I915_MADV_DONTNEED; 5006 5007 /* 5008 * Before we free the object, make sure any pure RCU-only 5009 * read-side critical sections are complete, e.g. 5010 * i915_gem_busy_ioctl(). For the corresponding synchronized 5011 * lookup see i915_gem_object_lookup_rcu(). 5012 */ 5013 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5014 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5015 } 5016 5017 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5018 { 5019 lockdep_assert_held(&obj->base.dev->struct_mutex); 5020 5021 if (!i915_gem_object_has_active_reference(obj) && 5022 i915_gem_object_is_active(obj)) 5023 i915_gem_object_set_active_reference(obj); 5024 else 5025 i915_gem_object_put(obj); 5026 } 5027 5028 void i915_gem_sanitize(struct drm_i915_private *i915) 5029 { 5030 int err; 5031 5032 GEM_TRACE("\n"); 5033 5034 mutex_lock(&i915->drm.struct_mutex); 5035 5036 intel_runtime_pm_get(i915); 5037 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5038 5039 /* 5040 * As we have just resumed the machine and woken the device up from 5041 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5042 * back to defaults, recovering from whatever wedged state we left it 5043 * in and so worth trying to use the device once more. 5044 */ 5045 if (i915_terminally_wedged(&i915->gpu_error)) 5046 i915_gem_unset_wedged(i915); 5047 5048 /* 5049 * If we inherit context state from the BIOS or earlier occupants 5050 * of the GPU, the GPU may be in an inconsistent state when we 5051 * try to take over. The only way to remove the earlier state 5052 * is by resetting. However, resetting on earlier gen is tricky as 5053 * it may impact the display and we are uncertain about the stability 5054 * of the reset, so this could be applied to even earlier gen. 5055 */ 5056 err = -ENODEV; 5057 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5058 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5059 if (!err) 5060 intel_engines_sanitize(i915); 5061 5062 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5063 intel_runtime_pm_put(i915); 5064 5065 i915_gem_contexts_lost(i915); 5066 mutex_unlock(&i915->drm.struct_mutex); 5067 } 5068 5069 int i915_gem_suspend(struct drm_i915_private *i915) 5070 { 5071 int ret; 5072 5073 GEM_TRACE("\n"); 5074 5075 intel_runtime_pm_get(i915); 5076 intel_suspend_gt_powersave(i915); 5077 5078 mutex_lock(&i915->drm.struct_mutex); 5079 5080 /* 5081 * We have to flush all the executing contexts to main memory so 5082 * that they can saved in the hibernation image. To ensure the last 5083 * context image is coherent, we have to switch away from it. That 5084 * leaves the i915->kernel_context still active when 5085 * we actually suspend, and its image in memory may not match the GPU 5086 * state. Fortunately, the kernel_context is disposable and we do 5087 * not rely on its state. 5088 */ 5089 if (!i915_terminally_wedged(&i915->gpu_error)) { 5090 ret = i915_gem_switch_to_kernel_context(i915); 5091 if (ret) 5092 goto err_unlock; 5093 5094 ret = i915_gem_wait_for_idle(i915, 5095 I915_WAIT_INTERRUPTIBLE | 5096 I915_WAIT_LOCKED | 5097 I915_WAIT_FOR_IDLE_BOOST, 5098 MAX_SCHEDULE_TIMEOUT); 5099 if (ret && ret != -EIO) 5100 goto err_unlock; 5101 5102 assert_kernel_context_is_current(i915); 5103 } 5104 i915_retire_requests(i915); /* ensure we flush after wedging */ 5105 5106 mutex_unlock(&i915->drm.struct_mutex); 5107 5108 intel_uc_suspend(i915); 5109 5110 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5111 cancel_delayed_work_sync(&i915->gt.retire_work); 5112 5113 /* 5114 * As the idle_work is rearming if it detects a race, play safe and 5115 * repeat the flush until it is definitely idle. 5116 */ 5117 drain_delayed_work(&i915->gt.idle_work); 5118 5119 /* 5120 * Assert that we successfully flushed all the work and 5121 * reset the GPU back to its idle, low power state. 5122 */ 5123 WARN_ON(i915->gt.awake); 5124 if (WARN_ON(!intel_engines_are_idle(i915))) 5125 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5126 5127 intel_runtime_pm_put(i915); 5128 return 0; 5129 5130 err_unlock: 5131 mutex_unlock(&i915->drm.struct_mutex); 5132 intel_runtime_pm_put(i915); 5133 return ret; 5134 } 5135 5136 void i915_gem_suspend_late(struct drm_i915_private *i915) 5137 { 5138 struct drm_i915_gem_object *obj; 5139 struct list_head *phases[] = { 5140 &i915->mm.unbound_list, 5141 &i915->mm.bound_list, 5142 NULL 5143 }, **phase; 5144 5145 /* 5146 * Neither the BIOS, ourselves or any other kernel 5147 * expects the system to be in execlists mode on startup, 5148 * so we need to reset the GPU back to legacy mode. And the only 5149 * known way to disable logical contexts is through a GPU reset. 5150 * 5151 * So in order to leave the system in a known default configuration, 5152 * always reset the GPU upon unload and suspend. Afterwards we then 5153 * clean up the GEM state tracking, flushing off the requests and 5154 * leaving the system in a known idle state. 5155 * 5156 * Note that is of the upmost importance that the GPU is idle and 5157 * all stray writes are flushed *before* we dismantle the backing 5158 * storage for the pinned objects. 5159 * 5160 * However, since we are uncertain that resetting the GPU on older 5161 * machines is a good idea, we don't - just in case it leaves the 5162 * machine in an unusable condition. 5163 */ 5164 5165 mutex_lock(&i915->drm.struct_mutex); 5166 for (phase = phases; *phase; phase++) { 5167 list_for_each_entry(obj, *phase, mm.link) 5168 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5169 } 5170 mutex_unlock(&i915->drm.struct_mutex); 5171 5172 intel_uc_sanitize(i915); 5173 i915_gem_sanitize(i915); 5174 } 5175 5176 void i915_gem_resume(struct drm_i915_private *i915) 5177 { 5178 GEM_TRACE("\n"); 5179 5180 WARN_ON(i915->gt.awake); 5181 5182 mutex_lock(&i915->drm.struct_mutex); 5183 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5184 5185 i915_gem_restore_gtt_mappings(i915); 5186 i915_gem_restore_fences(i915); 5187 5188 /* 5189 * As we didn't flush the kernel context before suspend, we cannot 5190 * guarantee that the context image is complete. So let's just reset 5191 * it and start again. 5192 */ 5193 i915->gt.resume(i915); 5194 5195 if (i915_gem_init_hw(i915)) 5196 goto err_wedged; 5197 5198 intel_uc_resume(i915); 5199 5200 /* Always reload a context for powersaving. */ 5201 if (i915_gem_switch_to_kernel_context(i915)) 5202 goto err_wedged; 5203 5204 out_unlock: 5205 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5206 mutex_unlock(&i915->drm.struct_mutex); 5207 return; 5208 5209 err_wedged: 5210 if (!i915_terminally_wedged(&i915->gpu_error)) { 5211 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5212 i915_gem_set_wedged(i915); 5213 } 5214 goto out_unlock; 5215 } 5216 5217 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5218 { 5219 if (INTEL_GEN(dev_priv) < 5 || 5220 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5221 return; 5222 5223 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5224 DISP_TILE_SURFACE_SWIZZLING); 5225 5226 if (IS_GEN5(dev_priv)) 5227 return; 5228 5229 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5230 if (IS_GEN6(dev_priv)) 5231 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5232 else if (IS_GEN7(dev_priv)) 5233 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5234 else if (IS_GEN8(dev_priv)) 5235 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5236 else 5237 BUG(); 5238 } 5239 5240 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5241 { 5242 I915_WRITE(RING_CTL(base), 0); 5243 I915_WRITE(RING_HEAD(base), 0); 5244 I915_WRITE(RING_TAIL(base), 0); 5245 I915_WRITE(RING_START(base), 0); 5246 } 5247 5248 static void init_unused_rings(struct drm_i915_private *dev_priv) 5249 { 5250 if (IS_I830(dev_priv)) { 5251 init_unused_ring(dev_priv, PRB1_BASE); 5252 init_unused_ring(dev_priv, SRB0_BASE); 5253 init_unused_ring(dev_priv, SRB1_BASE); 5254 init_unused_ring(dev_priv, SRB2_BASE); 5255 init_unused_ring(dev_priv, SRB3_BASE); 5256 } else if (IS_GEN2(dev_priv)) { 5257 init_unused_ring(dev_priv, SRB0_BASE); 5258 init_unused_ring(dev_priv, SRB1_BASE); 5259 } else if (IS_GEN3(dev_priv)) { 5260 init_unused_ring(dev_priv, PRB1_BASE); 5261 init_unused_ring(dev_priv, PRB2_BASE); 5262 } 5263 } 5264 5265 static int __i915_gem_restart_engines(void *data) 5266 { 5267 struct drm_i915_private *i915 = data; 5268 struct intel_engine_cs *engine; 5269 enum intel_engine_id id; 5270 int err; 5271 5272 for_each_engine(engine, i915, id) { 5273 err = engine->init_hw(engine); 5274 if (err) { 5275 DRM_ERROR("Failed to restart %s (%d)\n", 5276 engine->name, err); 5277 return err; 5278 } 5279 } 5280 5281 return 0; 5282 } 5283 5284 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5285 { 5286 int ret; 5287 5288 dev_priv->gt.last_init_time = ktime_get(); 5289 5290 /* Double layer security blanket, see i915_gem_init() */ 5291 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5292 5293 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5294 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5295 5296 if (IS_HASWELL(dev_priv)) 5297 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5298 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5299 5300 /* Apply the GT workarounds... */ 5301 intel_gt_apply_workarounds(dev_priv); 5302 /* ...and determine whether they are sticking. */ 5303 intel_gt_verify_workarounds(dev_priv, "init"); 5304 5305 i915_gem_init_swizzling(dev_priv); 5306 5307 /* 5308 * At least 830 can leave some of the unused rings 5309 * "active" (ie. head != tail) after resume which 5310 * will prevent c3 entry. Makes sure all unused rings 5311 * are totally idle. 5312 */ 5313 init_unused_rings(dev_priv); 5314 5315 BUG_ON(!dev_priv->kernel_context); 5316 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5317 ret = -EIO; 5318 goto out; 5319 } 5320 5321 ret = i915_ppgtt_init_hw(dev_priv); 5322 if (ret) { 5323 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5324 goto out; 5325 } 5326 5327 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5328 if (ret) { 5329 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5330 goto out; 5331 } 5332 5333 /* We can't enable contexts until all firmware is loaded */ 5334 ret = intel_uc_init_hw(dev_priv); 5335 if (ret) { 5336 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5337 goto out; 5338 } 5339 5340 intel_mocs_init_l3cc_table(dev_priv); 5341 5342 /* Only when the HW is re-initialised, can we replay the requests */ 5343 ret = __i915_gem_restart_engines(dev_priv); 5344 if (ret) 5345 goto cleanup_uc; 5346 5347 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5348 5349 return 0; 5350 5351 cleanup_uc: 5352 intel_uc_fini_hw(dev_priv); 5353 out: 5354 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5355 5356 return ret; 5357 } 5358 5359 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5360 { 5361 struct i915_gem_context *ctx; 5362 struct intel_engine_cs *engine; 5363 enum intel_engine_id id; 5364 int err; 5365 5366 /* 5367 * As we reset the gpu during very early sanitisation, the current 5368 * register state on the GPU should reflect its defaults values. 5369 * We load a context onto the hw (with restore-inhibit), then switch 5370 * over to a second context to save that default register state. We 5371 * can then prime every new context with that state so they all start 5372 * from the same default HW values. 5373 */ 5374 5375 ctx = i915_gem_context_create_kernel(i915, 0); 5376 if (IS_ERR(ctx)) 5377 return PTR_ERR(ctx); 5378 5379 for_each_engine(engine, i915, id) { 5380 struct i915_request *rq; 5381 5382 rq = i915_request_alloc(engine, ctx); 5383 if (IS_ERR(rq)) { 5384 err = PTR_ERR(rq); 5385 goto out_ctx; 5386 } 5387 5388 err = 0; 5389 if (engine->init_context) 5390 err = engine->init_context(rq); 5391 5392 i915_request_add(rq); 5393 if (err) 5394 goto err_active; 5395 } 5396 5397 err = i915_gem_switch_to_kernel_context(i915); 5398 if (err) 5399 goto err_active; 5400 5401 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5402 i915_gem_set_wedged(i915); 5403 err = -EIO; /* Caller will declare us wedged */ 5404 goto err_active; 5405 } 5406 5407 assert_kernel_context_is_current(i915); 5408 5409 /* 5410 * Immediately park the GPU so that we enable powersaving and 5411 * treat it as idle. The next time we issue a request, we will 5412 * unpark and start using the engine->pinned_default_state, otherwise 5413 * it is in limbo and an early reset may fail. 5414 */ 5415 __i915_gem_park(i915); 5416 5417 for_each_engine(engine, i915, id) { 5418 struct i915_vma *state; 5419 void *vaddr; 5420 5421 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 5422 5423 state = to_intel_context(ctx, engine)->state; 5424 if (!state) 5425 continue; 5426 5427 /* 5428 * As we will hold a reference to the logical state, it will 5429 * not be torn down with the context, and importantly the 5430 * object will hold onto its vma (making it possible for a 5431 * stray GTT write to corrupt our defaults). Unmap the vma 5432 * from the GTT to prevent such accidents and reclaim the 5433 * space. 5434 */ 5435 err = i915_vma_unbind(state); 5436 if (err) 5437 goto err_active; 5438 5439 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5440 if (err) 5441 goto err_active; 5442 5443 engine->default_state = i915_gem_object_get(state->obj); 5444 5445 /* Check we can acquire the image of the context state */ 5446 vaddr = i915_gem_object_pin_map(engine->default_state, 5447 I915_MAP_FORCE_WB); 5448 if (IS_ERR(vaddr)) { 5449 err = PTR_ERR(vaddr); 5450 goto err_active; 5451 } 5452 5453 i915_gem_object_unpin_map(engine->default_state); 5454 } 5455 5456 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5457 unsigned int found = intel_engines_has_context_isolation(i915); 5458 5459 /* 5460 * Make sure that classes with multiple engine instances all 5461 * share the same basic configuration. 5462 */ 5463 for_each_engine(engine, i915, id) { 5464 unsigned int bit = BIT(engine->uabi_class); 5465 unsigned int expected = engine->default_state ? bit : 0; 5466 5467 if ((found & bit) != expected) { 5468 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5469 engine->uabi_class, engine->name); 5470 } 5471 } 5472 } 5473 5474 out_ctx: 5475 i915_gem_context_set_closed(ctx); 5476 i915_gem_context_put(ctx); 5477 return err; 5478 5479 err_active: 5480 /* 5481 * If we have to abandon now, we expect the engines to be idle 5482 * and ready to be torn-down. First try to flush any remaining 5483 * request, ensure we are pointing at the kernel context and 5484 * then remove it. 5485 */ 5486 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5487 goto out_ctx; 5488 5489 if (WARN_ON(i915_gem_wait_for_idle(i915, 5490 I915_WAIT_LOCKED, 5491 MAX_SCHEDULE_TIMEOUT))) 5492 goto out_ctx; 5493 5494 i915_gem_contexts_lost(i915); 5495 goto out_ctx; 5496 } 5497 5498 static int 5499 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5500 { 5501 struct drm_i915_gem_object *obj; 5502 struct i915_vma *vma; 5503 int ret; 5504 5505 obj = i915_gem_object_create_stolen(i915, size); 5506 if (!obj) 5507 obj = i915_gem_object_create_internal(i915, size); 5508 if (IS_ERR(obj)) { 5509 DRM_ERROR("Failed to allocate scratch page\n"); 5510 return PTR_ERR(obj); 5511 } 5512 5513 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5514 if (IS_ERR(vma)) { 5515 ret = PTR_ERR(vma); 5516 goto err_unref; 5517 } 5518 5519 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5520 if (ret) 5521 goto err_unref; 5522 5523 i915->gt.scratch = vma; 5524 return 0; 5525 5526 err_unref: 5527 i915_gem_object_put(obj); 5528 return ret; 5529 } 5530 5531 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5532 { 5533 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5534 } 5535 5536 int i915_gem_init(struct drm_i915_private *dev_priv) 5537 { 5538 int ret; 5539 5540 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5541 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5542 mkwrite_device_info(dev_priv)->page_sizes = 5543 I915_GTT_PAGE_SIZE_4K; 5544 5545 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5546 5547 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5548 dev_priv->gt.resume = intel_lr_context_resume; 5549 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5550 } else { 5551 dev_priv->gt.resume = intel_legacy_submission_resume; 5552 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5553 } 5554 5555 ret = i915_gem_init_userptr(dev_priv); 5556 if (ret) 5557 return ret; 5558 5559 ret = intel_uc_init_misc(dev_priv); 5560 if (ret) 5561 return ret; 5562 5563 ret = intel_wopcm_init(&dev_priv->wopcm); 5564 if (ret) 5565 goto err_uc_misc; 5566 5567 /* This is just a security blanket to placate dragons. 5568 * On some systems, we very sporadically observe that the first TLBs 5569 * used by the CS may be stale, despite us poking the TLB reset. If 5570 * we hold the forcewake during initialisation these problems 5571 * just magically go away. 5572 */ 5573 mutex_lock(&dev_priv->drm.struct_mutex); 5574 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5575 5576 ret = i915_gem_init_ggtt(dev_priv); 5577 if (ret) { 5578 GEM_BUG_ON(ret == -EIO); 5579 goto err_unlock; 5580 } 5581 5582 ret = i915_gem_init_scratch(dev_priv, 5583 IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); 5584 if (ret) { 5585 GEM_BUG_ON(ret == -EIO); 5586 goto err_ggtt; 5587 } 5588 5589 ret = i915_gem_contexts_init(dev_priv); 5590 if (ret) { 5591 GEM_BUG_ON(ret == -EIO); 5592 goto err_scratch; 5593 } 5594 5595 ret = intel_engines_init(dev_priv); 5596 if (ret) { 5597 GEM_BUG_ON(ret == -EIO); 5598 goto err_context; 5599 } 5600 5601 intel_init_gt_powersave(dev_priv); 5602 5603 ret = intel_uc_init(dev_priv); 5604 if (ret) 5605 goto err_pm; 5606 5607 ret = i915_gem_init_hw(dev_priv); 5608 if (ret) 5609 goto err_uc_init; 5610 5611 /* 5612 * Despite its name intel_init_clock_gating applies both display 5613 * clock gating workarounds; GT mmio workarounds and the occasional 5614 * GT power context workaround. Worse, sometimes it includes a context 5615 * register workaround which we need to apply before we record the 5616 * default HW state for all contexts. 5617 * 5618 * FIXME: break up the workarounds and apply them at the right time! 5619 */ 5620 intel_init_clock_gating(dev_priv); 5621 5622 ret = __intel_engines_record_defaults(dev_priv); 5623 if (ret) 5624 goto err_init_hw; 5625 5626 if (i915_inject_load_failure()) { 5627 ret = -ENODEV; 5628 goto err_init_hw; 5629 } 5630 5631 if (i915_inject_load_failure()) { 5632 ret = -EIO; 5633 goto err_init_hw; 5634 } 5635 5636 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5637 mutex_unlock(&dev_priv->drm.struct_mutex); 5638 5639 return 0; 5640 5641 /* 5642 * Unwinding is complicated by that we want to handle -EIO to mean 5643 * disable GPU submission but keep KMS alive. We want to mark the 5644 * HW as irrevisibly wedged, but keep enough state around that the 5645 * driver doesn't explode during runtime. 5646 */ 5647 err_init_hw: 5648 mutex_unlock(&dev_priv->drm.struct_mutex); 5649 5650 WARN_ON(i915_gem_suspend(dev_priv)); 5651 i915_gem_suspend_late(dev_priv); 5652 5653 i915_gem_drain_workqueue(dev_priv); 5654 5655 mutex_lock(&dev_priv->drm.struct_mutex); 5656 intel_uc_fini_hw(dev_priv); 5657 err_uc_init: 5658 intel_uc_fini(dev_priv); 5659 err_pm: 5660 if (ret != -EIO) { 5661 intel_cleanup_gt_powersave(dev_priv); 5662 i915_gem_cleanup_engines(dev_priv); 5663 } 5664 err_context: 5665 if (ret != -EIO) 5666 i915_gem_contexts_fini(dev_priv); 5667 err_scratch: 5668 i915_gem_fini_scratch(dev_priv); 5669 err_ggtt: 5670 err_unlock: 5671 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5672 mutex_unlock(&dev_priv->drm.struct_mutex); 5673 5674 err_uc_misc: 5675 intel_uc_fini_misc(dev_priv); 5676 5677 if (ret != -EIO) 5678 i915_gem_cleanup_userptr(dev_priv); 5679 5680 if (ret == -EIO) { 5681 mutex_lock(&dev_priv->drm.struct_mutex); 5682 5683 /* 5684 * Allow engine initialisation to fail by marking the GPU as 5685 * wedged. But we only want to do this where the GPU is angry, 5686 * for all other failure, such as an allocation failure, bail. 5687 */ 5688 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5689 i915_load_error(dev_priv, 5690 "Failed to initialize GPU, declaring it wedged!\n"); 5691 i915_gem_set_wedged(dev_priv); 5692 } 5693 5694 /* Minimal basic recovery for KMS */ 5695 ret = i915_ggtt_enable_hw(dev_priv); 5696 i915_gem_restore_gtt_mappings(dev_priv); 5697 i915_gem_restore_fences(dev_priv); 5698 intel_init_clock_gating(dev_priv); 5699 5700 mutex_unlock(&dev_priv->drm.struct_mutex); 5701 } 5702 5703 i915_gem_drain_freed_objects(dev_priv); 5704 return ret; 5705 } 5706 5707 void i915_gem_fini(struct drm_i915_private *dev_priv) 5708 { 5709 i915_gem_suspend_late(dev_priv); 5710 intel_disable_gt_powersave(dev_priv); 5711 5712 /* Flush any outstanding unpin_work. */ 5713 i915_gem_drain_workqueue(dev_priv); 5714 5715 mutex_lock(&dev_priv->drm.struct_mutex); 5716 intel_uc_fini_hw(dev_priv); 5717 intel_uc_fini(dev_priv); 5718 i915_gem_cleanup_engines(dev_priv); 5719 i915_gem_contexts_fini(dev_priv); 5720 i915_gem_fini_scratch(dev_priv); 5721 mutex_unlock(&dev_priv->drm.struct_mutex); 5722 5723 intel_wa_list_free(&dev_priv->gt_wa_list); 5724 5725 intel_cleanup_gt_powersave(dev_priv); 5726 5727 intel_uc_fini_misc(dev_priv); 5728 i915_gem_cleanup_userptr(dev_priv); 5729 5730 i915_gem_drain_freed_objects(dev_priv); 5731 5732 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5733 } 5734 5735 void i915_gem_init_mmio(struct drm_i915_private *i915) 5736 { 5737 i915_gem_sanitize(i915); 5738 } 5739 5740 void 5741 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5742 { 5743 struct intel_engine_cs *engine; 5744 enum intel_engine_id id; 5745 5746 for_each_engine(engine, dev_priv, id) 5747 dev_priv->gt.cleanup_engine(engine); 5748 } 5749 5750 void 5751 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5752 { 5753 int i; 5754 5755 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5756 !IS_CHERRYVIEW(dev_priv)) 5757 dev_priv->num_fence_regs = 32; 5758 else if (INTEL_GEN(dev_priv) >= 4 || 5759 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5760 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5761 dev_priv->num_fence_regs = 16; 5762 else 5763 dev_priv->num_fence_regs = 8; 5764 5765 if (intel_vgpu_active(dev_priv)) 5766 dev_priv->num_fence_regs = 5767 I915_READ(vgtif_reg(avail_rs.fence_num)); 5768 5769 /* Initialize fence registers to zero */ 5770 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5771 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5772 5773 fence->i915 = dev_priv; 5774 fence->id = i; 5775 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5776 } 5777 i915_gem_restore_fences(dev_priv); 5778 5779 i915_gem_detect_bit_6_swizzle(dev_priv); 5780 } 5781 5782 static void i915_gem_init__mm(struct drm_i915_private *i915) 5783 { 5784 spin_lock_init(&i915->mm.object_stat_lock); 5785 spin_lock_init(&i915->mm.obj_lock); 5786 spin_lock_init(&i915->mm.free_lock); 5787 5788 init_llist_head(&i915->mm.free_list); 5789 5790 INIT_LIST_HEAD(&i915->mm.unbound_list); 5791 INIT_LIST_HEAD(&i915->mm.bound_list); 5792 INIT_LIST_HEAD(&i915->mm.fence_list); 5793 INIT_LIST_HEAD(&i915->mm.userfault_list); 5794 5795 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5796 } 5797 5798 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5799 { 5800 int err = -ENOMEM; 5801 5802 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5803 if (!dev_priv->objects) 5804 goto err_out; 5805 5806 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5807 if (!dev_priv->vmas) 5808 goto err_objects; 5809 5810 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5811 if (!dev_priv->luts) 5812 goto err_vmas; 5813 5814 dev_priv->requests = KMEM_CACHE(i915_request, 5815 SLAB_HWCACHE_ALIGN | 5816 SLAB_RECLAIM_ACCOUNT | 5817 SLAB_TYPESAFE_BY_RCU); 5818 if (!dev_priv->requests) 5819 goto err_luts; 5820 5821 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5822 SLAB_HWCACHE_ALIGN | 5823 SLAB_RECLAIM_ACCOUNT); 5824 if (!dev_priv->dependencies) 5825 goto err_requests; 5826 5827 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5828 if (!dev_priv->priorities) 5829 goto err_dependencies; 5830 5831 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5832 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5833 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5834 5835 i915_gem_init__mm(dev_priv); 5836 5837 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5838 i915_gem_retire_work_handler); 5839 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5840 i915_gem_idle_work_handler); 5841 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5842 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5843 5844 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5845 5846 spin_lock_init(&dev_priv->fb_tracking.lock); 5847 5848 err = i915_gemfs_init(dev_priv); 5849 if (err) 5850 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5851 5852 return 0; 5853 5854 err_dependencies: 5855 kmem_cache_destroy(dev_priv->dependencies); 5856 err_requests: 5857 kmem_cache_destroy(dev_priv->requests); 5858 err_luts: 5859 kmem_cache_destroy(dev_priv->luts); 5860 err_vmas: 5861 kmem_cache_destroy(dev_priv->vmas); 5862 err_objects: 5863 kmem_cache_destroy(dev_priv->objects); 5864 err_out: 5865 return err; 5866 } 5867 5868 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5869 { 5870 i915_gem_drain_freed_objects(dev_priv); 5871 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5872 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5873 WARN_ON(dev_priv->mm.object_count); 5874 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5875 5876 kmem_cache_destroy(dev_priv->priorities); 5877 kmem_cache_destroy(dev_priv->dependencies); 5878 kmem_cache_destroy(dev_priv->requests); 5879 kmem_cache_destroy(dev_priv->luts); 5880 kmem_cache_destroy(dev_priv->vmas); 5881 kmem_cache_destroy(dev_priv->objects); 5882 5883 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5884 rcu_barrier(); 5885 5886 i915_gemfs_fini(dev_priv); 5887 } 5888 5889 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5890 { 5891 /* Discard all purgeable objects, let userspace recover those as 5892 * required after resuming. 5893 */ 5894 i915_gem_shrink_all(dev_priv); 5895 5896 return 0; 5897 } 5898 5899 int i915_gem_freeze_late(struct drm_i915_private *i915) 5900 { 5901 struct drm_i915_gem_object *obj; 5902 struct list_head *phases[] = { 5903 &i915->mm.unbound_list, 5904 &i915->mm.bound_list, 5905 NULL 5906 }, **phase; 5907 5908 /* 5909 * Called just before we write the hibernation image. 5910 * 5911 * We need to update the domain tracking to reflect that the CPU 5912 * will be accessing all the pages to create and restore from the 5913 * hibernation, and so upon restoration those pages will be in the 5914 * CPU domain. 5915 * 5916 * To make sure the hibernation image contains the latest state, 5917 * we update that state just before writing out the image. 5918 * 5919 * To try and reduce the hibernation image, we manually shrink 5920 * the objects as well, see i915_gem_freeze() 5921 */ 5922 5923 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5924 i915_gem_drain_freed_objects(i915); 5925 5926 mutex_lock(&i915->drm.struct_mutex); 5927 for (phase = phases; *phase; phase++) { 5928 list_for_each_entry(obj, *phase, mm.link) 5929 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5930 } 5931 mutex_unlock(&i915->drm.struct_mutex); 5932 5933 return 0; 5934 } 5935 5936 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5937 { 5938 struct drm_i915_file_private *file_priv = file->driver_priv; 5939 struct i915_request *request; 5940 5941 /* Clean up our request list when the client is going away, so that 5942 * later retire_requests won't dereference our soon-to-be-gone 5943 * file_priv. 5944 */ 5945 spin_lock(&file_priv->mm.lock); 5946 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5947 request->file_priv = NULL; 5948 spin_unlock(&file_priv->mm.lock); 5949 } 5950 5951 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5952 { 5953 struct drm_i915_file_private *file_priv; 5954 int ret; 5955 5956 DRM_DEBUG("\n"); 5957 5958 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5959 if (!file_priv) 5960 return -ENOMEM; 5961 5962 file->driver_priv = file_priv; 5963 file_priv->dev_priv = i915; 5964 file_priv->file = file; 5965 5966 spin_lock_init(&file_priv->mm.lock); 5967 INIT_LIST_HEAD(&file_priv->mm.request_list); 5968 5969 file_priv->bsd_engine = -1; 5970 file_priv->hang_timestamp = jiffies; 5971 5972 ret = i915_gem_context_open(i915, file); 5973 if (ret) 5974 kfree(file_priv); 5975 5976 return ret; 5977 } 5978 5979 /** 5980 * i915_gem_track_fb - update frontbuffer tracking 5981 * @old: current GEM buffer for the frontbuffer slots 5982 * @new: new GEM buffer for the frontbuffer slots 5983 * @frontbuffer_bits: bitmask of frontbuffer slots 5984 * 5985 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5986 * from @old and setting them in @new. Both @old and @new can be NULL. 5987 */ 5988 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5989 struct drm_i915_gem_object *new, 5990 unsigned frontbuffer_bits) 5991 { 5992 /* Control of individual bits within the mask are guarded by 5993 * the owning plane->mutex, i.e. we can never see concurrent 5994 * manipulation of individual bits. But since the bitfield as a whole 5995 * is updated using RMW, we need to use atomics in order to update 5996 * the bits. 5997 */ 5998 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5999 BITS_PER_TYPE(atomic_t)); 6000 6001 if (old) { 6002 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 6003 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 6004 } 6005 6006 if (new) { 6007 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 6008 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 6009 } 6010 } 6011 6012 /* Allocate a new GEM object and fill it with the supplied data */ 6013 struct drm_i915_gem_object * 6014 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 6015 const void *data, size_t size) 6016 { 6017 struct drm_i915_gem_object *obj; 6018 struct file *file; 6019 size_t offset; 6020 int err; 6021 6022 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 6023 if (IS_ERR(obj)) 6024 return obj; 6025 6026 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 6027 6028 file = obj->base.filp; 6029 offset = 0; 6030 do { 6031 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 6032 struct page *page; 6033 void *pgdata, *vaddr; 6034 6035 err = pagecache_write_begin(file, file->f_mapping, 6036 offset, len, 0, 6037 &page, &pgdata); 6038 if (err < 0) 6039 goto fail; 6040 6041 vaddr = kmap(page); 6042 memcpy(vaddr, data, len); 6043 kunmap(page); 6044 6045 err = pagecache_write_end(file, file->f_mapping, 6046 offset, len, len, 6047 page, pgdata); 6048 if (err < 0) 6049 goto fail; 6050 6051 size -= len; 6052 data += len; 6053 offset += len; 6054 } while (size); 6055 6056 return obj; 6057 6058 fail: 6059 i915_gem_object_put(obj); 6060 return ERR_PTR(err); 6061 } 6062 6063 struct scatterlist * 6064 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6065 unsigned int n, 6066 unsigned int *offset) 6067 { 6068 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6069 struct scatterlist *sg; 6070 unsigned int idx, count; 6071 6072 might_sleep(); 6073 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6074 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6075 6076 /* As we iterate forward through the sg, we record each entry in a 6077 * radixtree for quick repeated (backwards) lookups. If we have seen 6078 * this index previously, we will have an entry for it. 6079 * 6080 * Initial lookup is O(N), but this is amortized to O(1) for 6081 * sequential page access (where each new request is consecutive 6082 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6083 * i.e. O(1) with a large constant! 6084 */ 6085 if (n < READ_ONCE(iter->sg_idx)) 6086 goto lookup; 6087 6088 mutex_lock(&iter->lock); 6089 6090 /* We prefer to reuse the last sg so that repeated lookup of this 6091 * (or the subsequent) sg are fast - comparing against the last 6092 * sg is faster than going through the radixtree. 6093 */ 6094 6095 sg = iter->sg_pos; 6096 idx = iter->sg_idx; 6097 count = __sg_page_count(sg); 6098 6099 while (idx + count <= n) { 6100 void *entry; 6101 unsigned long i; 6102 int ret; 6103 6104 /* If we cannot allocate and insert this entry, or the 6105 * individual pages from this range, cancel updating the 6106 * sg_idx so that on this lookup we are forced to linearly 6107 * scan onwards, but on future lookups we will try the 6108 * insertion again (in which case we need to be careful of 6109 * the error return reporting that we have already inserted 6110 * this index). 6111 */ 6112 ret = radix_tree_insert(&iter->radix, idx, sg); 6113 if (ret && ret != -EEXIST) 6114 goto scan; 6115 6116 entry = xa_mk_value(idx); 6117 for (i = 1; i < count; i++) { 6118 ret = radix_tree_insert(&iter->radix, idx + i, entry); 6119 if (ret && ret != -EEXIST) 6120 goto scan; 6121 } 6122 6123 idx += count; 6124 sg = ____sg_next(sg); 6125 count = __sg_page_count(sg); 6126 } 6127 6128 scan: 6129 iter->sg_pos = sg; 6130 iter->sg_idx = idx; 6131 6132 mutex_unlock(&iter->lock); 6133 6134 if (unlikely(n < idx)) /* insertion completed by another thread */ 6135 goto lookup; 6136 6137 /* In case we failed to insert the entry into the radixtree, we need 6138 * to look beyond the current sg. 6139 */ 6140 while (idx + count <= n) { 6141 idx += count; 6142 sg = ____sg_next(sg); 6143 count = __sg_page_count(sg); 6144 } 6145 6146 *offset = n - idx; 6147 return sg; 6148 6149 lookup: 6150 rcu_read_lock(); 6151 6152 sg = radix_tree_lookup(&iter->radix, n); 6153 GEM_BUG_ON(!sg); 6154 6155 /* If this index is in the middle of multi-page sg entry, 6156 * the radix tree will contain a value entry that points 6157 * to the start of that range. We will return the pointer to 6158 * the base page and the offset of this page within the 6159 * sg entry's range. 6160 */ 6161 *offset = 0; 6162 if (unlikely(xa_is_value(sg))) { 6163 unsigned long base = xa_to_value(sg); 6164 6165 sg = radix_tree_lookup(&iter->radix, base); 6166 GEM_BUG_ON(!sg); 6167 6168 *offset = n - base; 6169 } 6170 6171 rcu_read_unlock(); 6172 6173 return sg; 6174 } 6175 6176 struct page * 6177 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6178 { 6179 struct scatterlist *sg; 6180 unsigned int offset; 6181 6182 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6183 6184 sg = i915_gem_object_get_sg(obj, n, &offset); 6185 return nth_page(sg_page(sg), offset); 6186 } 6187 6188 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6189 struct page * 6190 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6191 unsigned int n) 6192 { 6193 struct page *page; 6194 6195 page = i915_gem_object_get_page(obj, n); 6196 if (!obj->mm.dirty) 6197 set_page_dirty(page); 6198 6199 return page; 6200 } 6201 6202 dma_addr_t 6203 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6204 unsigned long n) 6205 { 6206 struct scatterlist *sg; 6207 unsigned int offset; 6208 6209 sg = i915_gem_object_get_sg(obj, n, &offset); 6210 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6211 } 6212 6213 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6214 { 6215 struct sg_table *pages; 6216 int err; 6217 6218 if (align > obj->base.size) 6219 return -EINVAL; 6220 6221 if (obj->ops == &i915_gem_phys_ops) 6222 return 0; 6223 6224 if (obj->ops != &i915_gem_object_ops) 6225 return -EINVAL; 6226 6227 err = i915_gem_object_unbind(obj); 6228 if (err) 6229 return err; 6230 6231 mutex_lock(&obj->mm.lock); 6232 6233 if (obj->mm.madv != I915_MADV_WILLNEED) { 6234 err = -EFAULT; 6235 goto err_unlock; 6236 } 6237 6238 if (obj->mm.quirked) { 6239 err = -EFAULT; 6240 goto err_unlock; 6241 } 6242 6243 if (obj->mm.mapping) { 6244 err = -EBUSY; 6245 goto err_unlock; 6246 } 6247 6248 pages = __i915_gem_object_unset_pages(obj); 6249 6250 obj->ops = &i915_gem_phys_ops; 6251 6252 err = ____i915_gem_object_get_pages(obj); 6253 if (err) 6254 goto err_xfer; 6255 6256 /* Perma-pin (until release) the physical set of pages */ 6257 __i915_gem_object_pin_pages(obj); 6258 6259 if (!IS_ERR_OR_NULL(pages)) 6260 i915_gem_object_ops.put_pages(obj, pages); 6261 mutex_unlock(&obj->mm.lock); 6262 return 0; 6263 6264 err_xfer: 6265 obj->ops = &i915_gem_object_ops; 6266 if (!IS_ERR_OR_NULL(pages)) { 6267 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6268 6269 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6270 } 6271 err_unlock: 6272 mutex_unlock(&obj->mm.lock); 6273 return err; 6274 } 6275 6276 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6277 #include "selftests/scatterlist.c" 6278 #include "selftests/mock_gem_device.c" 6279 #include "selftests/huge_gem_object.c" 6280 #include "selftests/huge_pages.c" 6281 #include "selftests/i915_gem_object.c" 6282 #include "selftests/i915_gem_coherency.c" 6283 #include "selftests/i915_gem.c" 6284 #endif 6285