1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 lockdep_assert_held(&i915->drm.struct_mutex); 143 GEM_BUG_ON(i915->gt.active_requests); 144 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 145 146 if (!i915->gt.awake) 147 return I915_EPOCH_INVALID; 148 149 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 150 151 /* 152 * Be paranoid and flush a concurrent interrupt to make sure 153 * we don't reactivate any irq tasklets after parking. 154 * 155 * FIXME: Note that even though we have waited for execlists to be idle, 156 * there may still be an in-flight interrupt even though the CSB 157 * is now empty. synchronize_irq() makes sure that a residual interrupt 158 * is completed before we continue, but it doesn't prevent the HW from 159 * raising a spurious interrupt later. To complete the shield we should 160 * coordinate disabling the CS irq with flushing the interrupts. 161 */ 162 synchronize_irq(i915->drm.irq); 163 164 intel_engines_park(i915); 165 i915_timelines_park(i915); 166 167 i915_pmu_gt_parked(i915); 168 i915_vma_parked(i915); 169 170 i915->gt.awake = false; 171 172 if (INTEL_GEN(i915) >= 6) 173 gen6_rps_idle(i915); 174 175 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 176 177 intel_runtime_pm_put(i915); 178 179 return i915->gt.epoch; 180 } 181 182 void i915_gem_park(struct drm_i915_private *i915) 183 { 184 lockdep_assert_held(&i915->drm.struct_mutex); 185 GEM_BUG_ON(i915->gt.active_requests); 186 187 if (!i915->gt.awake) 188 return; 189 190 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 191 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 192 } 193 194 void i915_gem_unpark(struct drm_i915_private *i915) 195 { 196 lockdep_assert_held(&i915->drm.struct_mutex); 197 GEM_BUG_ON(!i915->gt.active_requests); 198 199 if (i915->gt.awake) 200 return; 201 202 intel_runtime_pm_get_noresume(i915); 203 204 /* 205 * It seems that the DMC likes to transition between the DC states a lot 206 * when there are no connected displays (no active power domains) during 207 * command submission. 208 * 209 * This activity has negative impact on the performance of the chip with 210 * huge latencies observed in the interrupt handler and elsewhere. 211 * 212 * Work around it by grabbing a GT IRQ power domain whilst there is any 213 * GT activity, preventing any DC state transitions. 214 */ 215 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 216 217 i915->gt.awake = true; 218 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 219 i915->gt.epoch = 1; 220 221 intel_enable_gt_powersave(i915); 222 i915_update_gfx_val(i915); 223 if (INTEL_GEN(i915) >= 6) 224 gen6_rps_busy(i915); 225 i915_pmu_gt_unparked(i915); 226 227 intel_engines_unpark(i915); 228 229 i915_queue_hangcheck(i915); 230 231 queue_delayed_work(i915->wq, 232 &i915->gt.retire_work, 233 round_jiffies_up_relative(HZ)); 234 } 235 236 int 237 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 238 struct drm_file *file) 239 { 240 struct drm_i915_private *dev_priv = to_i915(dev); 241 struct i915_ggtt *ggtt = &dev_priv->ggtt; 242 struct drm_i915_gem_get_aperture *args = data; 243 struct i915_vma *vma; 244 u64 pinned; 245 246 pinned = ggtt->base.reserved; 247 mutex_lock(&dev->struct_mutex); 248 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 249 if (i915_vma_is_pinned(vma)) 250 pinned += vma->node.size; 251 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 252 if (i915_vma_is_pinned(vma)) 253 pinned += vma->node.size; 254 mutex_unlock(&dev->struct_mutex); 255 256 args->aper_size = ggtt->base.total; 257 args->aper_available_size = args->aper_size - pinned; 258 259 return 0; 260 } 261 262 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 263 { 264 struct address_space *mapping = obj->base.filp->f_mapping; 265 drm_dma_handle_t *phys; 266 struct sg_table *st; 267 struct scatterlist *sg; 268 char *vaddr; 269 int i; 270 int err; 271 272 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 273 return -EINVAL; 274 275 /* Always aligning to the object size, allows a single allocation 276 * to handle all possible callers, and given typical object sizes, 277 * the alignment of the buddy allocation will naturally match. 278 */ 279 phys = drm_pci_alloc(obj->base.dev, 280 roundup_pow_of_two(obj->base.size), 281 roundup_pow_of_two(obj->base.size)); 282 if (!phys) 283 return -ENOMEM; 284 285 vaddr = phys->vaddr; 286 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 287 struct page *page; 288 char *src; 289 290 page = shmem_read_mapping_page(mapping, i); 291 if (IS_ERR(page)) { 292 err = PTR_ERR(page); 293 goto err_phys; 294 } 295 296 src = kmap_atomic(page); 297 memcpy(vaddr, src, PAGE_SIZE); 298 drm_clflush_virt_range(vaddr, PAGE_SIZE); 299 kunmap_atomic(src); 300 301 put_page(page); 302 vaddr += PAGE_SIZE; 303 } 304 305 i915_gem_chipset_flush(to_i915(obj->base.dev)); 306 307 st = kmalloc(sizeof(*st), GFP_KERNEL); 308 if (!st) { 309 err = -ENOMEM; 310 goto err_phys; 311 } 312 313 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 314 kfree(st); 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 sg = st->sgl; 320 sg->offset = 0; 321 sg->length = obj->base.size; 322 323 sg_dma_address(sg) = phys->busaddr; 324 sg_dma_len(sg) = obj->base.size; 325 326 obj->phys_handle = phys; 327 328 __i915_gem_object_set_pages(obj, st, sg->length); 329 330 return 0; 331 332 err_phys: 333 drm_pci_free(obj->base.dev, phys); 334 335 return err; 336 } 337 338 static void __start_cpu_write(struct drm_i915_gem_object *obj) 339 { 340 obj->read_domains = I915_GEM_DOMAIN_CPU; 341 obj->write_domain = I915_GEM_DOMAIN_CPU; 342 if (cpu_write_needs_clflush(obj)) 343 obj->cache_dirty = true; 344 } 345 346 static void 347 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 348 struct sg_table *pages, 349 bool needs_clflush) 350 { 351 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 352 353 if (obj->mm.madv == I915_MADV_DONTNEED) 354 obj->mm.dirty = false; 355 356 if (needs_clflush && 357 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 358 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 359 drm_clflush_sg(pages); 360 361 __start_cpu_write(obj); 362 } 363 364 static void 365 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 366 struct sg_table *pages) 367 { 368 __i915_gem_object_release_shmem(obj, pages, false); 369 370 if (obj->mm.dirty) { 371 struct address_space *mapping = obj->base.filp->f_mapping; 372 char *vaddr = obj->phys_handle->vaddr; 373 int i; 374 375 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 376 struct page *page; 377 char *dst; 378 379 page = shmem_read_mapping_page(mapping, i); 380 if (IS_ERR(page)) 381 continue; 382 383 dst = kmap_atomic(page); 384 drm_clflush_virt_range(vaddr, PAGE_SIZE); 385 memcpy(dst, vaddr, PAGE_SIZE); 386 kunmap_atomic(dst); 387 388 set_page_dirty(page); 389 if (obj->mm.madv == I915_MADV_WILLNEED) 390 mark_page_accessed(page); 391 put_page(page); 392 vaddr += PAGE_SIZE; 393 } 394 obj->mm.dirty = false; 395 } 396 397 sg_free_table(pages); 398 kfree(pages); 399 400 drm_pci_free(obj->base.dev, obj->phys_handle); 401 } 402 403 static void 404 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 405 { 406 i915_gem_object_unpin_pages(obj); 407 } 408 409 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 410 .get_pages = i915_gem_object_get_pages_phys, 411 .put_pages = i915_gem_object_put_pages_phys, 412 .release = i915_gem_object_release_phys, 413 }; 414 415 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 416 417 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 418 { 419 struct i915_vma *vma; 420 LIST_HEAD(still_in_list); 421 int ret; 422 423 lockdep_assert_held(&obj->base.dev->struct_mutex); 424 425 /* Closed vma are removed from the obj->vma_list - but they may 426 * still have an active binding on the object. To remove those we 427 * must wait for all rendering to complete to the object (as unbinding 428 * must anyway), and retire the requests. 429 */ 430 ret = i915_gem_object_set_to_cpu_domain(obj, false); 431 if (ret) 432 return ret; 433 434 while ((vma = list_first_entry_or_null(&obj->vma_list, 435 struct i915_vma, 436 obj_link))) { 437 list_move_tail(&vma->obj_link, &still_in_list); 438 ret = i915_vma_unbind(vma); 439 if (ret) 440 break; 441 } 442 list_splice(&still_in_list, &obj->vma_list); 443 444 return ret; 445 } 446 447 static long 448 i915_gem_object_wait_fence(struct dma_fence *fence, 449 unsigned int flags, 450 long timeout, 451 struct intel_rps_client *rps_client) 452 { 453 struct i915_request *rq; 454 455 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 456 457 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 458 return timeout; 459 460 if (!dma_fence_is_i915(fence)) 461 return dma_fence_wait_timeout(fence, 462 flags & I915_WAIT_INTERRUPTIBLE, 463 timeout); 464 465 rq = to_request(fence); 466 if (i915_request_completed(rq)) 467 goto out; 468 469 /* 470 * This client is about to stall waiting for the GPU. In many cases 471 * this is undesirable and limits the throughput of the system, as 472 * many clients cannot continue processing user input/output whilst 473 * blocked. RPS autotuning may take tens of milliseconds to respond 474 * to the GPU load and thus incurs additional latency for the client. 475 * We can circumvent that by promoting the GPU frequency to maximum 476 * before we wait. This makes the GPU throttle up much more quickly 477 * (good for benchmarks and user experience, e.g. window animations), 478 * but at a cost of spending more power processing the workload 479 * (bad for battery). Not all clients even want their results 480 * immediately and for them we should just let the GPU select its own 481 * frequency to maximise efficiency. To prevent a single client from 482 * forcing the clocks too high for the whole system, we only allow 483 * each client to waitboost once in a busy period. 484 */ 485 if (rps_client && !i915_request_started(rq)) { 486 if (INTEL_GEN(rq->i915) >= 6) 487 gen6_rps_boost(rq, rps_client); 488 } 489 490 timeout = i915_request_wait(rq, flags, timeout); 491 492 out: 493 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 494 i915_request_retire_upto(rq); 495 496 return timeout; 497 } 498 499 static long 500 i915_gem_object_wait_reservation(struct reservation_object *resv, 501 unsigned int flags, 502 long timeout, 503 struct intel_rps_client *rps_client) 504 { 505 unsigned int seq = __read_seqcount_begin(&resv->seq); 506 struct dma_fence *excl; 507 bool prune_fences = false; 508 509 if (flags & I915_WAIT_ALL) { 510 struct dma_fence **shared; 511 unsigned int count, i; 512 int ret; 513 514 ret = reservation_object_get_fences_rcu(resv, 515 &excl, &count, &shared); 516 if (ret) 517 return ret; 518 519 for (i = 0; i < count; i++) { 520 timeout = i915_gem_object_wait_fence(shared[i], 521 flags, timeout, 522 rps_client); 523 if (timeout < 0) 524 break; 525 526 dma_fence_put(shared[i]); 527 } 528 529 for (; i < count; i++) 530 dma_fence_put(shared[i]); 531 kfree(shared); 532 533 /* 534 * If both shared fences and an exclusive fence exist, 535 * then by construction the shared fences must be later 536 * than the exclusive fence. If we successfully wait for 537 * all the shared fences, we know that the exclusive fence 538 * must all be signaled. If all the shared fences are 539 * signaled, we can prune the array and recover the 540 * floating references on the fences/requests. 541 */ 542 prune_fences = count && timeout >= 0; 543 } else { 544 excl = reservation_object_get_excl_rcu(resv); 545 } 546 547 if (excl && timeout >= 0) 548 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 549 rps_client); 550 551 dma_fence_put(excl); 552 553 /* 554 * Opportunistically prune the fences iff we know they have *all* been 555 * signaled and that the reservation object has not been changed (i.e. 556 * no new fences have been added). 557 */ 558 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 559 if (reservation_object_trylock(resv)) { 560 if (!__read_seqcount_retry(&resv->seq, seq)) 561 reservation_object_add_excl_fence(resv, NULL); 562 reservation_object_unlock(resv); 563 } 564 } 565 566 return timeout; 567 } 568 569 static void __fence_set_priority(struct dma_fence *fence, 570 const struct i915_sched_attr *attr) 571 { 572 struct i915_request *rq; 573 struct intel_engine_cs *engine; 574 575 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 576 return; 577 578 rq = to_request(fence); 579 engine = rq->engine; 580 581 local_bh_disable(); 582 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 583 if (engine->schedule) 584 engine->schedule(rq, attr); 585 rcu_read_unlock(); 586 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 587 } 588 589 static void fence_set_priority(struct dma_fence *fence, 590 const struct i915_sched_attr *attr) 591 { 592 /* Recurse once into a fence-array */ 593 if (dma_fence_is_array(fence)) { 594 struct dma_fence_array *array = to_dma_fence_array(fence); 595 int i; 596 597 for (i = 0; i < array->num_fences; i++) 598 __fence_set_priority(array->fences[i], attr); 599 } else { 600 __fence_set_priority(fence, attr); 601 } 602 } 603 604 int 605 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 606 unsigned int flags, 607 const struct i915_sched_attr *attr) 608 { 609 struct dma_fence *excl; 610 611 if (flags & I915_WAIT_ALL) { 612 struct dma_fence **shared; 613 unsigned int count, i; 614 int ret; 615 616 ret = reservation_object_get_fences_rcu(obj->resv, 617 &excl, &count, &shared); 618 if (ret) 619 return ret; 620 621 for (i = 0; i < count; i++) { 622 fence_set_priority(shared[i], attr); 623 dma_fence_put(shared[i]); 624 } 625 626 kfree(shared); 627 } else { 628 excl = reservation_object_get_excl_rcu(obj->resv); 629 } 630 631 if (excl) { 632 fence_set_priority(excl, attr); 633 dma_fence_put(excl); 634 } 635 return 0; 636 } 637 638 /** 639 * Waits for rendering to the object to be completed 640 * @obj: i915 gem object 641 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 642 * @timeout: how long to wait 643 * @rps_client: client (user process) to charge for any waitboosting 644 */ 645 int 646 i915_gem_object_wait(struct drm_i915_gem_object *obj, 647 unsigned int flags, 648 long timeout, 649 struct intel_rps_client *rps_client) 650 { 651 might_sleep(); 652 #if IS_ENABLED(CONFIG_LOCKDEP) 653 GEM_BUG_ON(debug_locks && 654 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 655 !!(flags & I915_WAIT_LOCKED)); 656 #endif 657 GEM_BUG_ON(timeout < 0); 658 659 timeout = i915_gem_object_wait_reservation(obj->resv, 660 flags, timeout, 661 rps_client); 662 return timeout < 0 ? timeout : 0; 663 } 664 665 static struct intel_rps_client *to_rps_client(struct drm_file *file) 666 { 667 struct drm_i915_file_private *fpriv = file->driver_priv; 668 669 return &fpriv->rps_client; 670 } 671 672 static int 673 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 674 struct drm_i915_gem_pwrite *args, 675 struct drm_file *file) 676 { 677 void *vaddr = obj->phys_handle->vaddr + args->offset; 678 char __user *user_data = u64_to_user_ptr(args->data_ptr); 679 680 /* We manually control the domain here and pretend that it 681 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 682 */ 683 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 684 if (copy_from_user(vaddr, user_data, args->size)) 685 return -EFAULT; 686 687 drm_clflush_virt_range(vaddr, args->size); 688 i915_gem_chipset_flush(to_i915(obj->base.dev)); 689 690 intel_fb_obj_flush(obj, ORIGIN_CPU); 691 return 0; 692 } 693 694 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 695 { 696 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 697 } 698 699 void i915_gem_object_free(struct drm_i915_gem_object *obj) 700 { 701 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 702 kmem_cache_free(dev_priv->objects, obj); 703 } 704 705 static int 706 i915_gem_create(struct drm_file *file, 707 struct drm_i915_private *dev_priv, 708 uint64_t size, 709 uint32_t *handle_p) 710 { 711 struct drm_i915_gem_object *obj; 712 int ret; 713 u32 handle; 714 715 size = roundup(size, PAGE_SIZE); 716 if (size == 0) 717 return -EINVAL; 718 719 /* Allocate the new object */ 720 obj = i915_gem_object_create(dev_priv, size); 721 if (IS_ERR(obj)) 722 return PTR_ERR(obj); 723 724 ret = drm_gem_handle_create(file, &obj->base, &handle); 725 /* drop reference from allocate - handle holds it now */ 726 i915_gem_object_put(obj); 727 if (ret) 728 return ret; 729 730 *handle_p = handle; 731 return 0; 732 } 733 734 int 735 i915_gem_dumb_create(struct drm_file *file, 736 struct drm_device *dev, 737 struct drm_mode_create_dumb *args) 738 { 739 /* have to work out size/pitch and return them */ 740 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 741 args->size = args->pitch * args->height; 742 return i915_gem_create(file, to_i915(dev), 743 args->size, &args->handle); 744 } 745 746 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 747 { 748 return !(obj->cache_level == I915_CACHE_NONE || 749 obj->cache_level == I915_CACHE_WT); 750 } 751 752 /** 753 * Creates a new mm object and returns a handle to it. 754 * @dev: drm device pointer 755 * @data: ioctl data blob 756 * @file: drm file pointer 757 */ 758 int 759 i915_gem_create_ioctl(struct drm_device *dev, void *data, 760 struct drm_file *file) 761 { 762 struct drm_i915_private *dev_priv = to_i915(dev); 763 struct drm_i915_gem_create *args = data; 764 765 i915_gem_flush_free_objects(dev_priv); 766 767 return i915_gem_create(file, dev_priv, 768 args->size, &args->handle); 769 } 770 771 static inline enum fb_op_origin 772 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 773 { 774 return (domain == I915_GEM_DOMAIN_GTT ? 775 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 776 } 777 778 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 779 { 780 /* 781 * No actual flushing is required for the GTT write domain for reads 782 * from the GTT domain. Writes to it "immediately" go to main memory 783 * as far as we know, so there's no chipset flush. It also doesn't 784 * land in the GPU render cache. 785 * 786 * However, we do have to enforce the order so that all writes through 787 * the GTT land before any writes to the device, such as updates to 788 * the GATT itself. 789 * 790 * We also have to wait a bit for the writes to land from the GTT. 791 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 792 * timing. This issue has only been observed when switching quickly 793 * between GTT writes and CPU reads from inside the kernel on recent hw, 794 * and it appears to only affect discrete GTT blocks (i.e. on LLC 795 * system agents we cannot reproduce this behaviour, until Cannonlake 796 * that was!). 797 */ 798 799 wmb(); 800 801 intel_runtime_pm_get(dev_priv); 802 spin_lock_irq(&dev_priv->uncore.lock); 803 804 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 805 806 spin_unlock_irq(&dev_priv->uncore.lock); 807 intel_runtime_pm_put(dev_priv); 808 } 809 810 static void 811 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 812 { 813 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 814 struct i915_vma *vma; 815 816 if (!(obj->write_domain & flush_domains)) 817 return; 818 819 switch (obj->write_domain) { 820 case I915_GEM_DOMAIN_GTT: 821 i915_gem_flush_ggtt_writes(dev_priv); 822 823 intel_fb_obj_flush(obj, 824 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 825 826 for_each_ggtt_vma(vma, obj) { 827 if (vma->iomap) 828 continue; 829 830 i915_vma_unset_ggtt_write(vma); 831 } 832 break; 833 834 case I915_GEM_DOMAIN_CPU: 835 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 836 break; 837 838 case I915_GEM_DOMAIN_RENDER: 839 if (gpu_write_needs_clflush(obj)) 840 obj->cache_dirty = true; 841 break; 842 } 843 844 obj->write_domain = 0; 845 } 846 847 static inline int 848 __copy_to_user_swizzled(char __user *cpu_vaddr, 849 const char *gpu_vaddr, int gpu_offset, 850 int length) 851 { 852 int ret, cpu_offset = 0; 853 854 while (length > 0) { 855 int cacheline_end = ALIGN(gpu_offset + 1, 64); 856 int this_length = min(cacheline_end - gpu_offset, length); 857 int swizzled_gpu_offset = gpu_offset ^ 64; 858 859 ret = __copy_to_user(cpu_vaddr + cpu_offset, 860 gpu_vaddr + swizzled_gpu_offset, 861 this_length); 862 if (ret) 863 return ret + length; 864 865 cpu_offset += this_length; 866 gpu_offset += this_length; 867 length -= this_length; 868 } 869 870 return 0; 871 } 872 873 static inline int 874 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 875 const char __user *cpu_vaddr, 876 int length) 877 { 878 int ret, cpu_offset = 0; 879 880 while (length > 0) { 881 int cacheline_end = ALIGN(gpu_offset + 1, 64); 882 int this_length = min(cacheline_end - gpu_offset, length); 883 int swizzled_gpu_offset = gpu_offset ^ 64; 884 885 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 886 cpu_vaddr + cpu_offset, 887 this_length); 888 if (ret) 889 return ret + length; 890 891 cpu_offset += this_length; 892 gpu_offset += this_length; 893 length -= this_length; 894 } 895 896 return 0; 897 } 898 899 /* 900 * Pins the specified object's pages and synchronizes the object with 901 * GPU accesses. Sets needs_clflush to non-zero if the caller should 902 * flush the object from the CPU cache. 903 */ 904 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 905 unsigned int *needs_clflush) 906 { 907 int ret; 908 909 lockdep_assert_held(&obj->base.dev->struct_mutex); 910 911 *needs_clflush = 0; 912 if (!i915_gem_object_has_struct_page(obj)) 913 return -ENODEV; 914 915 ret = i915_gem_object_wait(obj, 916 I915_WAIT_INTERRUPTIBLE | 917 I915_WAIT_LOCKED, 918 MAX_SCHEDULE_TIMEOUT, 919 NULL); 920 if (ret) 921 return ret; 922 923 ret = i915_gem_object_pin_pages(obj); 924 if (ret) 925 return ret; 926 927 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 928 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 929 ret = i915_gem_object_set_to_cpu_domain(obj, false); 930 if (ret) 931 goto err_unpin; 932 else 933 goto out; 934 } 935 936 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 937 938 /* If we're not in the cpu read domain, set ourself into the gtt 939 * read domain and manually flush cachelines (if required). This 940 * optimizes for the case when the gpu will dirty the data 941 * anyway again before the next pread happens. 942 */ 943 if (!obj->cache_dirty && 944 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 945 *needs_clflush = CLFLUSH_BEFORE; 946 947 out: 948 /* return with the pages pinned */ 949 return 0; 950 951 err_unpin: 952 i915_gem_object_unpin_pages(obj); 953 return ret; 954 } 955 956 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 957 unsigned int *needs_clflush) 958 { 959 int ret; 960 961 lockdep_assert_held(&obj->base.dev->struct_mutex); 962 963 *needs_clflush = 0; 964 if (!i915_gem_object_has_struct_page(obj)) 965 return -ENODEV; 966 967 ret = i915_gem_object_wait(obj, 968 I915_WAIT_INTERRUPTIBLE | 969 I915_WAIT_LOCKED | 970 I915_WAIT_ALL, 971 MAX_SCHEDULE_TIMEOUT, 972 NULL); 973 if (ret) 974 return ret; 975 976 ret = i915_gem_object_pin_pages(obj); 977 if (ret) 978 return ret; 979 980 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 981 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 982 ret = i915_gem_object_set_to_cpu_domain(obj, true); 983 if (ret) 984 goto err_unpin; 985 else 986 goto out; 987 } 988 989 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 990 991 /* If we're not in the cpu write domain, set ourself into the 992 * gtt write domain and manually flush cachelines (as required). 993 * This optimizes for the case when the gpu will use the data 994 * right away and we therefore have to clflush anyway. 995 */ 996 if (!obj->cache_dirty) { 997 *needs_clflush |= CLFLUSH_AFTER; 998 999 /* 1000 * Same trick applies to invalidate partially written 1001 * cachelines read before writing. 1002 */ 1003 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1004 *needs_clflush |= CLFLUSH_BEFORE; 1005 } 1006 1007 out: 1008 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1009 obj->mm.dirty = true; 1010 /* return with the pages pinned */ 1011 return 0; 1012 1013 err_unpin: 1014 i915_gem_object_unpin_pages(obj); 1015 return ret; 1016 } 1017 1018 static void 1019 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1020 bool swizzled) 1021 { 1022 if (unlikely(swizzled)) { 1023 unsigned long start = (unsigned long) addr; 1024 unsigned long end = (unsigned long) addr + length; 1025 1026 /* For swizzling simply ensure that we always flush both 1027 * channels. Lame, but simple and it works. Swizzled 1028 * pwrite/pread is far from a hotpath - current userspace 1029 * doesn't use it at all. */ 1030 start = round_down(start, 128); 1031 end = round_up(end, 128); 1032 1033 drm_clflush_virt_range((void *)start, end - start); 1034 } else { 1035 drm_clflush_virt_range(addr, length); 1036 } 1037 1038 } 1039 1040 /* Only difference to the fast-path function is that this can handle bit17 1041 * and uses non-atomic copy and kmap functions. */ 1042 static int 1043 shmem_pread_slow(struct page *page, int offset, int length, 1044 char __user *user_data, 1045 bool page_do_bit17_swizzling, bool needs_clflush) 1046 { 1047 char *vaddr; 1048 int ret; 1049 1050 vaddr = kmap(page); 1051 if (needs_clflush) 1052 shmem_clflush_swizzled_range(vaddr + offset, length, 1053 page_do_bit17_swizzling); 1054 1055 if (page_do_bit17_swizzling) 1056 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1057 else 1058 ret = __copy_to_user(user_data, vaddr + offset, length); 1059 kunmap(page); 1060 1061 return ret ? - EFAULT : 0; 1062 } 1063 1064 static int 1065 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1066 bool page_do_bit17_swizzling, bool needs_clflush) 1067 { 1068 int ret; 1069 1070 ret = -ENODEV; 1071 if (!page_do_bit17_swizzling) { 1072 char *vaddr = kmap_atomic(page); 1073 1074 if (needs_clflush) 1075 drm_clflush_virt_range(vaddr + offset, length); 1076 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1077 kunmap_atomic(vaddr); 1078 } 1079 if (ret == 0) 1080 return 0; 1081 1082 return shmem_pread_slow(page, offset, length, user_data, 1083 page_do_bit17_swizzling, needs_clflush); 1084 } 1085 1086 static int 1087 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1088 struct drm_i915_gem_pread *args) 1089 { 1090 char __user *user_data; 1091 u64 remain; 1092 unsigned int obj_do_bit17_swizzling; 1093 unsigned int needs_clflush; 1094 unsigned int idx, offset; 1095 int ret; 1096 1097 obj_do_bit17_swizzling = 0; 1098 if (i915_gem_object_needs_bit17_swizzle(obj)) 1099 obj_do_bit17_swizzling = BIT(17); 1100 1101 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1102 if (ret) 1103 return ret; 1104 1105 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1106 mutex_unlock(&obj->base.dev->struct_mutex); 1107 if (ret) 1108 return ret; 1109 1110 remain = args->size; 1111 user_data = u64_to_user_ptr(args->data_ptr); 1112 offset = offset_in_page(args->offset); 1113 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1114 struct page *page = i915_gem_object_get_page(obj, idx); 1115 int length; 1116 1117 length = remain; 1118 if (offset + length > PAGE_SIZE) 1119 length = PAGE_SIZE - offset; 1120 1121 ret = shmem_pread(page, offset, length, user_data, 1122 page_to_phys(page) & obj_do_bit17_swizzling, 1123 needs_clflush); 1124 if (ret) 1125 break; 1126 1127 remain -= length; 1128 user_data += length; 1129 offset = 0; 1130 } 1131 1132 i915_gem_obj_finish_shmem_access(obj); 1133 return ret; 1134 } 1135 1136 static inline bool 1137 gtt_user_read(struct io_mapping *mapping, 1138 loff_t base, int offset, 1139 char __user *user_data, int length) 1140 { 1141 void __iomem *vaddr; 1142 unsigned long unwritten; 1143 1144 /* We can use the cpu mem copy function because this is X86. */ 1145 vaddr = io_mapping_map_atomic_wc(mapping, base); 1146 unwritten = __copy_to_user_inatomic(user_data, 1147 (void __force *)vaddr + offset, 1148 length); 1149 io_mapping_unmap_atomic(vaddr); 1150 if (unwritten) { 1151 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1152 unwritten = copy_to_user(user_data, 1153 (void __force *)vaddr + offset, 1154 length); 1155 io_mapping_unmap(vaddr); 1156 } 1157 return unwritten; 1158 } 1159 1160 static int 1161 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1162 const struct drm_i915_gem_pread *args) 1163 { 1164 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1165 struct i915_ggtt *ggtt = &i915->ggtt; 1166 struct drm_mm_node node; 1167 struct i915_vma *vma; 1168 void __user *user_data; 1169 u64 remain, offset; 1170 int ret; 1171 1172 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1173 if (ret) 1174 return ret; 1175 1176 intel_runtime_pm_get(i915); 1177 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1178 PIN_MAPPABLE | 1179 PIN_NONFAULT | 1180 PIN_NONBLOCK); 1181 if (!IS_ERR(vma)) { 1182 node.start = i915_ggtt_offset(vma); 1183 node.allocated = false; 1184 ret = i915_vma_put_fence(vma); 1185 if (ret) { 1186 i915_vma_unpin(vma); 1187 vma = ERR_PTR(ret); 1188 } 1189 } 1190 if (IS_ERR(vma)) { 1191 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1192 if (ret) 1193 goto out_unlock; 1194 GEM_BUG_ON(!node.allocated); 1195 } 1196 1197 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1198 if (ret) 1199 goto out_unpin; 1200 1201 mutex_unlock(&i915->drm.struct_mutex); 1202 1203 user_data = u64_to_user_ptr(args->data_ptr); 1204 remain = args->size; 1205 offset = args->offset; 1206 1207 while (remain > 0) { 1208 /* Operation in this page 1209 * 1210 * page_base = page offset within aperture 1211 * page_offset = offset within page 1212 * page_length = bytes to copy for this page 1213 */ 1214 u32 page_base = node.start; 1215 unsigned page_offset = offset_in_page(offset); 1216 unsigned page_length = PAGE_SIZE - page_offset; 1217 page_length = remain < page_length ? remain : page_length; 1218 if (node.allocated) { 1219 wmb(); 1220 ggtt->base.insert_page(&ggtt->base, 1221 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1222 node.start, I915_CACHE_NONE, 0); 1223 wmb(); 1224 } else { 1225 page_base += offset & PAGE_MASK; 1226 } 1227 1228 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1229 user_data, page_length)) { 1230 ret = -EFAULT; 1231 break; 1232 } 1233 1234 remain -= page_length; 1235 user_data += page_length; 1236 offset += page_length; 1237 } 1238 1239 mutex_lock(&i915->drm.struct_mutex); 1240 out_unpin: 1241 if (node.allocated) { 1242 wmb(); 1243 ggtt->base.clear_range(&ggtt->base, 1244 node.start, node.size); 1245 remove_mappable_node(&node); 1246 } else { 1247 i915_vma_unpin(vma); 1248 } 1249 out_unlock: 1250 intel_runtime_pm_put(i915); 1251 mutex_unlock(&i915->drm.struct_mutex); 1252 1253 return ret; 1254 } 1255 1256 /** 1257 * Reads data from the object referenced by handle. 1258 * @dev: drm device pointer 1259 * @data: ioctl data blob 1260 * @file: drm file pointer 1261 * 1262 * On error, the contents of *data are undefined. 1263 */ 1264 int 1265 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1266 struct drm_file *file) 1267 { 1268 struct drm_i915_gem_pread *args = data; 1269 struct drm_i915_gem_object *obj; 1270 int ret; 1271 1272 if (args->size == 0) 1273 return 0; 1274 1275 if (!access_ok(VERIFY_WRITE, 1276 u64_to_user_ptr(args->data_ptr), 1277 args->size)) 1278 return -EFAULT; 1279 1280 obj = i915_gem_object_lookup(file, args->handle); 1281 if (!obj) 1282 return -ENOENT; 1283 1284 /* Bounds check source. */ 1285 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1286 ret = -EINVAL; 1287 goto out; 1288 } 1289 1290 trace_i915_gem_object_pread(obj, args->offset, args->size); 1291 1292 ret = i915_gem_object_wait(obj, 1293 I915_WAIT_INTERRUPTIBLE, 1294 MAX_SCHEDULE_TIMEOUT, 1295 to_rps_client(file)); 1296 if (ret) 1297 goto out; 1298 1299 ret = i915_gem_object_pin_pages(obj); 1300 if (ret) 1301 goto out; 1302 1303 ret = i915_gem_shmem_pread(obj, args); 1304 if (ret == -EFAULT || ret == -ENODEV) 1305 ret = i915_gem_gtt_pread(obj, args); 1306 1307 i915_gem_object_unpin_pages(obj); 1308 out: 1309 i915_gem_object_put(obj); 1310 return ret; 1311 } 1312 1313 /* This is the fast write path which cannot handle 1314 * page faults in the source data 1315 */ 1316 1317 static inline bool 1318 ggtt_write(struct io_mapping *mapping, 1319 loff_t base, int offset, 1320 char __user *user_data, int length) 1321 { 1322 void __iomem *vaddr; 1323 unsigned long unwritten; 1324 1325 /* We can use the cpu mem copy function because this is X86. */ 1326 vaddr = io_mapping_map_atomic_wc(mapping, base); 1327 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1328 user_data, length); 1329 io_mapping_unmap_atomic(vaddr); 1330 if (unwritten) { 1331 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1332 unwritten = copy_from_user((void __force *)vaddr + offset, 1333 user_data, length); 1334 io_mapping_unmap(vaddr); 1335 } 1336 1337 return unwritten; 1338 } 1339 1340 /** 1341 * This is the fast pwrite path, where we copy the data directly from the 1342 * user into the GTT, uncached. 1343 * @obj: i915 GEM object 1344 * @args: pwrite arguments structure 1345 */ 1346 static int 1347 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1348 const struct drm_i915_gem_pwrite *args) 1349 { 1350 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1351 struct i915_ggtt *ggtt = &i915->ggtt; 1352 struct drm_mm_node node; 1353 struct i915_vma *vma; 1354 u64 remain, offset; 1355 void __user *user_data; 1356 int ret; 1357 1358 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1359 if (ret) 1360 return ret; 1361 1362 if (i915_gem_object_has_struct_page(obj)) { 1363 /* 1364 * Avoid waking the device up if we can fallback, as 1365 * waking/resuming is very slow (worst-case 10-100 ms 1366 * depending on PCI sleeps and our own resume time). 1367 * This easily dwarfs any performance advantage from 1368 * using the cache bypass of indirect GGTT access. 1369 */ 1370 if (!intel_runtime_pm_get_if_in_use(i915)) { 1371 ret = -EFAULT; 1372 goto out_unlock; 1373 } 1374 } else { 1375 /* No backing pages, no fallback, we must force GGTT access */ 1376 intel_runtime_pm_get(i915); 1377 } 1378 1379 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1380 PIN_MAPPABLE | 1381 PIN_NONFAULT | 1382 PIN_NONBLOCK); 1383 if (!IS_ERR(vma)) { 1384 node.start = i915_ggtt_offset(vma); 1385 node.allocated = false; 1386 ret = i915_vma_put_fence(vma); 1387 if (ret) { 1388 i915_vma_unpin(vma); 1389 vma = ERR_PTR(ret); 1390 } 1391 } 1392 if (IS_ERR(vma)) { 1393 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1394 if (ret) 1395 goto out_rpm; 1396 GEM_BUG_ON(!node.allocated); 1397 } 1398 1399 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1400 if (ret) 1401 goto out_unpin; 1402 1403 mutex_unlock(&i915->drm.struct_mutex); 1404 1405 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1406 1407 user_data = u64_to_user_ptr(args->data_ptr); 1408 offset = args->offset; 1409 remain = args->size; 1410 while (remain) { 1411 /* Operation in this page 1412 * 1413 * page_base = page offset within aperture 1414 * page_offset = offset within page 1415 * page_length = bytes to copy for this page 1416 */ 1417 u32 page_base = node.start; 1418 unsigned int page_offset = offset_in_page(offset); 1419 unsigned int page_length = PAGE_SIZE - page_offset; 1420 page_length = remain < page_length ? remain : page_length; 1421 if (node.allocated) { 1422 wmb(); /* flush the write before we modify the GGTT */ 1423 ggtt->base.insert_page(&ggtt->base, 1424 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1425 node.start, I915_CACHE_NONE, 0); 1426 wmb(); /* flush modifications to the GGTT (insert_page) */ 1427 } else { 1428 page_base += offset & PAGE_MASK; 1429 } 1430 /* If we get a fault while copying data, then (presumably) our 1431 * source page isn't available. Return the error and we'll 1432 * retry in the slow path. 1433 * If the object is non-shmem backed, we retry again with the 1434 * path that handles page fault. 1435 */ 1436 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1437 user_data, page_length)) { 1438 ret = -EFAULT; 1439 break; 1440 } 1441 1442 remain -= page_length; 1443 user_data += page_length; 1444 offset += page_length; 1445 } 1446 intel_fb_obj_flush(obj, ORIGIN_CPU); 1447 1448 mutex_lock(&i915->drm.struct_mutex); 1449 out_unpin: 1450 if (node.allocated) { 1451 wmb(); 1452 ggtt->base.clear_range(&ggtt->base, 1453 node.start, node.size); 1454 remove_mappable_node(&node); 1455 } else { 1456 i915_vma_unpin(vma); 1457 } 1458 out_rpm: 1459 intel_runtime_pm_put(i915); 1460 out_unlock: 1461 mutex_unlock(&i915->drm.struct_mutex); 1462 return ret; 1463 } 1464 1465 static int 1466 shmem_pwrite_slow(struct page *page, int offset, int length, 1467 char __user *user_data, 1468 bool page_do_bit17_swizzling, 1469 bool needs_clflush_before, 1470 bool needs_clflush_after) 1471 { 1472 char *vaddr; 1473 int ret; 1474 1475 vaddr = kmap(page); 1476 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1477 shmem_clflush_swizzled_range(vaddr + offset, length, 1478 page_do_bit17_swizzling); 1479 if (page_do_bit17_swizzling) 1480 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1481 length); 1482 else 1483 ret = __copy_from_user(vaddr + offset, user_data, length); 1484 if (needs_clflush_after) 1485 shmem_clflush_swizzled_range(vaddr + offset, length, 1486 page_do_bit17_swizzling); 1487 kunmap(page); 1488 1489 return ret ? -EFAULT : 0; 1490 } 1491 1492 /* Per-page copy function for the shmem pwrite fastpath. 1493 * Flushes invalid cachelines before writing to the target if 1494 * needs_clflush_before is set and flushes out any written cachelines after 1495 * writing if needs_clflush is set. 1496 */ 1497 static int 1498 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1499 bool page_do_bit17_swizzling, 1500 bool needs_clflush_before, 1501 bool needs_clflush_after) 1502 { 1503 int ret; 1504 1505 ret = -ENODEV; 1506 if (!page_do_bit17_swizzling) { 1507 char *vaddr = kmap_atomic(page); 1508 1509 if (needs_clflush_before) 1510 drm_clflush_virt_range(vaddr + offset, len); 1511 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1512 if (needs_clflush_after) 1513 drm_clflush_virt_range(vaddr + offset, len); 1514 1515 kunmap_atomic(vaddr); 1516 } 1517 if (ret == 0) 1518 return ret; 1519 1520 return shmem_pwrite_slow(page, offset, len, user_data, 1521 page_do_bit17_swizzling, 1522 needs_clflush_before, 1523 needs_clflush_after); 1524 } 1525 1526 static int 1527 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1528 const struct drm_i915_gem_pwrite *args) 1529 { 1530 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1531 void __user *user_data; 1532 u64 remain; 1533 unsigned int obj_do_bit17_swizzling; 1534 unsigned int partial_cacheline_write; 1535 unsigned int needs_clflush; 1536 unsigned int offset, idx; 1537 int ret; 1538 1539 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1540 if (ret) 1541 return ret; 1542 1543 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1544 mutex_unlock(&i915->drm.struct_mutex); 1545 if (ret) 1546 return ret; 1547 1548 obj_do_bit17_swizzling = 0; 1549 if (i915_gem_object_needs_bit17_swizzle(obj)) 1550 obj_do_bit17_swizzling = BIT(17); 1551 1552 /* If we don't overwrite a cacheline completely we need to be 1553 * careful to have up-to-date data by first clflushing. Don't 1554 * overcomplicate things and flush the entire patch. 1555 */ 1556 partial_cacheline_write = 0; 1557 if (needs_clflush & CLFLUSH_BEFORE) 1558 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1559 1560 user_data = u64_to_user_ptr(args->data_ptr); 1561 remain = args->size; 1562 offset = offset_in_page(args->offset); 1563 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1564 struct page *page = i915_gem_object_get_page(obj, idx); 1565 int length; 1566 1567 length = remain; 1568 if (offset + length > PAGE_SIZE) 1569 length = PAGE_SIZE - offset; 1570 1571 ret = shmem_pwrite(page, offset, length, user_data, 1572 page_to_phys(page) & obj_do_bit17_swizzling, 1573 (offset | length) & partial_cacheline_write, 1574 needs_clflush & CLFLUSH_AFTER); 1575 if (ret) 1576 break; 1577 1578 remain -= length; 1579 user_data += length; 1580 offset = 0; 1581 } 1582 1583 intel_fb_obj_flush(obj, ORIGIN_CPU); 1584 i915_gem_obj_finish_shmem_access(obj); 1585 return ret; 1586 } 1587 1588 /** 1589 * Writes data to the object referenced by handle. 1590 * @dev: drm device 1591 * @data: ioctl data blob 1592 * @file: drm file 1593 * 1594 * On error, the contents of the buffer that were to be modified are undefined. 1595 */ 1596 int 1597 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1598 struct drm_file *file) 1599 { 1600 struct drm_i915_gem_pwrite *args = data; 1601 struct drm_i915_gem_object *obj; 1602 int ret; 1603 1604 if (args->size == 0) 1605 return 0; 1606 1607 if (!access_ok(VERIFY_READ, 1608 u64_to_user_ptr(args->data_ptr), 1609 args->size)) 1610 return -EFAULT; 1611 1612 obj = i915_gem_object_lookup(file, args->handle); 1613 if (!obj) 1614 return -ENOENT; 1615 1616 /* Bounds check destination. */ 1617 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1618 ret = -EINVAL; 1619 goto err; 1620 } 1621 1622 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1623 1624 ret = -ENODEV; 1625 if (obj->ops->pwrite) 1626 ret = obj->ops->pwrite(obj, args); 1627 if (ret != -ENODEV) 1628 goto err; 1629 1630 ret = i915_gem_object_wait(obj, 1631 I915_WAIT_INTERRUPTIBLE | 1632 I915_WAIT_ALL, 1633 MAX_SCHEDULE_TIMEOUT, 1634 to_rps_client(file)); 1635 if (ret) 1636 goto err; 1637 1638 ret = i915_gem_object_pin_pages(obj); 1639 if (ret) 1640 goto err; 1641 1642 ret = -EFAULT; 1643 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1644 * it would end up going through the fenced access, and we'll get 1645 * different detiling behavior between reading and writing. 1646 * pread/pwrite currently are reading and writing from the CPU 1647 * perspective, requiring manual detiling by the client. 1648 */ 1649 if (!i915_gem_object_has_struct_page(obj) || 1650 cpu_write_needs_clflush(obj)) 1651 /* Note that the gtt paths might fail with non-page-backed user 1652 * pointers (e.g. gtt mappings when moving data between 1653 * textures). Fallback to the shmem path in that case. 1654 */ 1655 ret = i915_gem_gtt_pwrite_fast(obj, args); 1656 1657 if (ret == -EFAULT || ret == -ENOSPC) { 1658 if (obj->phys_handle) 1659 ret = i915_gem_phys_pwrite(obj, args, file); 1660 else 1661 ret = i915_gem_shmem_pwrite(obj, args); 1662 } 1663 1664 i915_gem_object_unpin_pages(obj); 1665 err: 1666 i915_gem_object_put(obj); 1667 return ret; 1668 } 1669 1670 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1671 { 1672 struct drm_i915_private *i915; 1673 struct list_head *list; 1674 struct i915_vma *vma; 1675 1676 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1677 1678 for_each_ggtt_vma(vma, obj) { 1679 if (i915_vma_is_active(vma)) 1680 continue; 1681 1682 if (!drm_mm_node_allocated(&vma->node)) 1683 continue; 1684 1685 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1686 } 1687 1688 i915 = to_i915(obj->base.dev); 1689 spin_lock(&i915->mm.obj_lock); 1690 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1691 list_move_tail(&obj->mm.link, list); 1692 spin_unlock(&i915->mm.obj_lock); 1693 } 1694 1695 /** 1696 * Called when user space prepares to use an object with the CPU, either 1697 * through the mmap ioctl's mapping or a GTT mapping. 1698 * @dev: drm device 1699 * @data: ioctl data blob 1700 * @file: drm file 1701 */ 1702 int 1703 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1704 struct drm_file *file) 1705 { 1706 struct drm_i915_gem_set_domain *args = data; 1707 struct drm_i915_gem_object *obj; 1708 uint32_t read_domains = args->read_domains; 1709 uint32_t write_domain = args->write_domain; 1710 int err; 1711 1712 /* Only handle setting domains to types used by the CPU. */ 1713 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1714 return -EINVAL; 1715 1716 /* Having something in the write domain implies it's in the read 1717 * domain, and only that read domain. Enforce that in the request. 1718 */ 1719 if (write_domain != 0 && read_domains != write_domain) 1720 return -EINVAL; 1721 1722 obj = i915_gem_object_lookup(file, args->handle); 1723 if (!obj) 1724 return -ENOENT; 1725 1726 /* Try to flush the object off the GPU without holding the lock. 1727 * We will repeat the flush holding the lock in the normal manner 1728 * to catch cases where we are gazumped. 1729 */ 1730 err = i915_gem_object_wait(obj, 1731 I915_WAIT_INTERRUPTIBLE | 1732 (write_domain ? I915_WAIT_ALL : 0), 1733 MAX_SCHEDULE_TIMEOUT, 1734 to_rps_client(file)); 1735 if (err) 1736 goto out; 1737 1738 /* 1739 * Proxy objects do not control access to the backing storage, ergo 1740 * they cannot be used as a means to manipulate the cache domain 1741 * tracking for that backing storage. The proxy object is always 1742 * considered to be outside of any cache domain. 1743 */ 1744 if (i915_gem_object_is_proxy(obj)) { 1745 err = -ENXIO; 1746 goto out; 1747 } 1748 1749 /* 1750 * Flush and acquire obj->pages so that we are coherent through 1751 * direct access in memory with previous cached writes through 1752 * shmemfs and that our cache domain tracking remains valid. 1753 * For example, if the obj->filp was moved to swap without us 1754 * being notified and releasing the pages, we would mistakenly 1755 * continue to assume that the obj remained out of the CPU cached 1756 * domain. 1757 */ 1758 err = i915_gem_object_pin_pages(obj); 1759 if (err) 1760 goto out; 1761 1762 err = i915_mutex_lock_interruptible(dev); 1763 if (err) 1764 goto out_unpin; 1765 1766 if (read_domains & I915_GEM_DOMAIN_WC) 1767 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1768 else if (read_domains & I915_GEM_DOMAIN_GTT) 1769 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1770 else 1771 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1772 1773 /* And bump the LRU for this access */ 1774 i915_gem_object_bump_inactive_ggtt(obj); 1775 1776 mutex_unlock(&dev->struct_mutex); 1777 1778 if (write_domain != 0) 1779 intel_fb_obj_invalidate(obj, 1780 fb_write_origin(obj, write_domain)); 1781 1782 out_unpin: 1783 i915_gem_object_unpin_pages(obj); 1784 out: 1785 i915_gem_object_put(obj); 1786 return err; 1787 } 1788 1789 /** 1790 * Called when user space has done writes to this buffer 1791 * @dev: drm device 1792 * @data: ioctl data blob 1793 * @file: drm file 1794 */ 1795 int 1796 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1797 struct drm_file *file) 1798 { 1799 struct drm_i915_gem_sw_finish *args = data; 1800 struct drm_i915_gem_object *obj; 1801 1802 obj = i915_gem_object_lookup(file, args->handle); 1803 if (!obj) 1804 return -ENOENT; 1805 1806 /* 1807 * Proxy objects are barred from CPU access, so there is no 1808 * need to ban sw_finish as it is a nop. 1809 */ 1810 1811 /* Pinned buffers may be scanout, so flush the cache */ 1812 i915_gem_object_flush_if_display(obj); 1813 i915_gem_object_put(obj); 1814 1815 return 0; 1816 } 1817 1818 /** 1819 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1820 * it is mapped to. 1821 * @dev: drm device 1822 * @data: ioctl data blob 1823 * @file: drm file 1824 * 1825 * While the mapping holds a reference on the contents of the object, it doesn't 1826 * imply a ref on the object itself. 1827 * 1828 * IMPORTANT: 1829 * 1830 * DRM driver writers who look a this function as an example for how to do GEM 1831 * mmap support, please don't implement mmap support like here. The modern way 1832 * to implement DRM mmap support is with an mmap offset ioctl (like 1833 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1834 * That way debug tooling like valgrind will understand what's going on, hiding 1835 * the mmap call in a driver private ioctl will break that. The i915 driver only 1836 * does cpu mmaps this way because we didn't know better. 1837 */ 1838 int 1839 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1840 struct drm_file *file) 1841 { 1842 struct drm_i915_gem_mmap *args = data; 1843 struct drm_i915_gem_object *obj; 1844 unsigned long addr; 1845 1846 if (args->flags & ~(I915_MMAP_WC)) 1847 return -EINVAL; 1848 1849 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1850 return -ENODEV; 1851 1852 obj = i915_gem_object_lookup(file, args->handle); 1853 if (!obj) 1854 return -ENOENT; 1855 1856 /* prime objects have no backing filp to GEM mmap 1857 * pages from. 1858 */ 1859 if (!obj->base.filp) { 1860 i915_gem_object_put(obj); 1861 return -ENXIO; 1862 } 1863 1864 addr = vm_mmap(obj->base.filp, 0, args->size, 1865 PROT_READ | PROT_WRITE, MAP_SHARED, 1866 args->offset); 1867 if (args->flags & I915_MMAP_WC) { 1868 struct mm_struct *mm = current->mm; 1869 struct vm_area_struct *vma; 1870 1871 if (down_write_killable(&mm->mmap_sem)) { 1872 i915_gem_object_put(obj); 1873 return -EINTR; 1874 } 1875 vma = find_vma(mm, addr); 1876 if (vma) 1877 vma->vm_page_prot = 1878 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1879 else 1880 addr = -ENOMEM; 1881 up_write(&mm->mmap_sem); 1882 1883 /* This may race, but that's ok, it only gets set */ 1884 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1885 } 1886 i915_gem_object_put(obj); 1887 if (IS_ERR((void *)addr)) 1888 return addr; 1889 1890 args->addr_ptr = (uint64_t) addr; 1891 1892 return 0; 1893 } 1894 1895 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1896 { 1897 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1898 } 1899 1900 /** 1901 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1902 * 1903 * A history of the GTT mmap interface: 1904 * 1905 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1906 * aligned and suitable for fencing, and still fit into the available 1907 * mappable space left by the pinned display objects. A classic problem 1908 * we called the page-fault-of-doom where we would ping-pong between 1909 * two objects that could not fit inside the GTT and so the memcpy 1910 * would page one object in at the expense of the other between every 1911 * single byte. 1912 * 1913 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1914 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1915 * object is too large for the available space (or simply too large 1916 * for the mappable aperture!), a view is created instead and faulted 1917 * into userspace. (This view is aligned and sized appropriately for 1918 * fenced access.) 1919 * 1920 * 2 - Recognise WC as a separate cache domain so that we can flush the 1921 * delayed writes via GTT before performing direct access via WC. 1922 * 1923 * Restrictions: 1924 * 1925 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1926 * hangs on some architectures, corruption on others. An attempt to service 1927 * a GTT page fault from a snoopable object will generate a SIGBUS. 1928 * 1929 * * the object must be able to fit into RAM (physical memory, though no 1930 * limited to the mappable aperture). 1931 * 1932 * 1933 * Caveats: 1934 * 1935 * * a new GTT page fault will synchronize rendering from the GPU and flush 1936 * all data to system memory. Subsequent access will not be synchronized. 1937 * 1938 * * all mappings are revoked on runtime device suspend. 1939 * 1940 * * there are only 8, 16 or 32 fence registers to share between all users 1941 * (older machines require fence register for display and blitter access 1942 * as well). Contention of the fence registers will cause the previous users 1943 * to be unmapped and any new access will generate new page faults. 1944 * 1945 * * running out of memory while servicing a fault may generate a SIGBUS, 1946 * rather than the expected SIGSEGV. 1947 */ 1948 int i915_gem_mmap_gtt_version(void) 1949 { 1950 return 2; 1951 } 1952 1953 static inline struct i915_ggtt_view 1954 compute_partial_view(struct drm_i915_gem_object *obj, 1955 pgoff_t page_offset, 1956 unsigned int chunk) 1957 { 1958 struct i915_ggtt_view view; 1959 1960 if (i915_gem_object_is_tiled(obj)) 1961 chunk = roundup(chunk, tile_row_pages(obj)); 1962 1963 view.type = I915_GGTT_VIEW_PARTIAL; 1964 view.partial.offset = rounddown(page_offset, chunk); 1965 view.partial.size = 1966 min_t(unsigned int, chunk, 1967 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1968 1969 /* If the partial covers the entire object, just create a normal VMA. */ 1970 if (chunk >= obj->base.size >> PAGE_SHIFT) 1971 view.type = I915_GGTT_VIEW_NORMAL; 1972 1973 return view; 1974 } 1975 1976 /** 1977 * i915_gem_fault - fault a page into the GTT 1978 * @vmf: fault info 1979 * 1980 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1981 * from userspace. The fault handler takes care of binding the object to 1982 * the GTT (if needed), allocating and programming a fence register (again, 1983 * only if needed based on whether the old reg is still valid or the object 1984 * is tiled) and inserting a new PTE into the faulting process. 1985 * 1986 * Note that the faulting process may involve evicting existing objects 1987 * from the GTT and/or fence registers to make room. So performance may 1988 * suffer if the GTT working set is large or there are few fence registers 1989 * left. 1990 * 1991 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1992 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1993 */ 1994 int i915_gem_fault(struct vm_fault *vmf) 1995 { 1996 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1997 struct vm_area_struct *area = vmf->vma; 1998 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1999 struct drm_device *dev = obj->base.dev; 2000 struct drm_i915_private *dev_priv = to_i915(dev); 2001 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2002 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2003 struct i915_vma *vma; 2004 pgoff_t page_offset; 2005 unsigned int flags; 2006 int ret; 2007 2008 /* We don't use vmf->pgoff since that has the fake offset */ 2009 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2010 2011 trace_i915_gem_object_fault(obj, page_offset, true, write); 2012 2013 /* Try to flush the object off the GPU first without holding the lock. 2014 * Upon acquiring the lock, we will perform our sanity checks and then 2015 * repeat the flush holding the lock in the normal manner to catch cases 2016 * where we are gazumped. 2017 */ 2018 ret = i915_gem_object_wait(obj, 2019 I915_WAIT_INTERRUPTIBLE, 2020 MAX_SCHEDULE_TIMEOUT, 2021 NULL); 2022 if (ret) 2023 goto err; 2024 2025 ret = i915_gem_object_pin_pages(obj); 2026 if (ret) 2027 goto err; 2028 2029 intel_runtime_pm_get(dev_priv); 2030 2031 ret = i915_mutex_lock_interruptible(dev); 2032 if (ret) 2033 goto err_rpm; 2034 2035 /* Access to snoopable pages through the GTT is incoherent. */ 2036 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2037 ret = -EFAULT; 2038 goto err_unlock; 2039 } 2040 2041 /* If the object is smaller than a couple of partial vma, it is 2042 * not worth only creating a single partial vma - we may as well 2043 * clear enough space for the full object. 2044 */ 2045 flags = PIN_MAPPABLE; 2046 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) 2047 flags |= PIN_NONBLOCK | PIN_NONFAULT; 2048 2049 /* Now pin it into the GTT as needed */ 2050 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); 2051 if (IS_ERR(vma)) { 2052 /* Use a partial view if it is bigger than available space */ 2053 struct i915_ggtt_view view = 2054 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2055 2056 /* Userspace is now writing through an untracked VMA, abandon 2057 * all hope that the hardware is able to track future writes. 2058 */ 2059 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2060 2061 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 2062 } 2063 if (IS_ERR(vma)) { 2064 ret = PTR_ERR(vma); 2065 goto err_unlock; 2066 } 2067 2068 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2069 if (ret) 2070 goto err_unpin; 2071 2072 ret = i915_vma_pin_fence(vma); 2073 if (ret) 2074 goto err_unpin; 2075 2076 /* Finally, remap it using the new GTT offset */ 2077 ret = remap_io_mapping(area, 2078 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2079 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2080 min_t(u64, vma->size, area->vm_end - area->vm_start), 2081 &ggtt->iomap); 2082 if (ret) 2083 goto err_fence; 2084 2085 /* Mark as being mmapped into userspace for later revocation */ 2086 assert_rpm_wakelock_held(dev_priv); 2087 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2088 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2089 GEM_BUG_ON(!obj->userfault_count); 2090 2091 i915_vma_set_ggtt_write(vma); 2092 2093 err_fence: 2094 i915_vma_unpin_fence(vma); 2095 err_unpin: 2096 __i915_vma_unpin(vma); 2097 err_unlock: 2098 mutex_unlock(&dev->struct_mutex); 2099 err_rpm: 2100 intel_runtime_pm_put(dev_priv); 2101 i915_gem_object_unpin_pages(obj); 2102 err: 2103 switch (ret) { 2104 case -EIO: 2105 /* 2106 * We eat errors when the gpu is terminally wedged to avoid 2107 * userspace unduly crashing (gl has no provisions for mmaps to 2108 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2109 * and so needs to be reported. 2110 */ 2111 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2112 ret = VM_FAULT_SIGBUS; 2113 break; 2114 } 2115 case -EAGAIN: 2116 /* 2117 * EAGAIN means the gpu is hung and we'll wait for the error 2118 * handler to reset everything when re-faulting in 2119 * i915_mutex_lock_interruptible. 2120 */ 2121 case 0: 2122 case -ERESTARTSYS: 2123 case -EINTR: 2124 case -EBUSY: 2125 /* 2126 * EBUSY is ok: this just means that another thread 2127 * already did the job. 2128 */ 2129 ret = VM_FAULT_NOPAGE; 2130 break; 2131 case -ENOMEM: 2132 ret = VM_FAULT_OOM; 2133 break; 2134 case -ENOSPC: 2135 case -EFAULT: 2136 ret = VM_FAULT_SIGBUS; 2137 break; 2138 default: 2139 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2140 ret = VM_FAULT_SIGBUS; 2141 break; 2142 } 2143 return ret; 2144 } 2145 2146 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2147 { 2148 struct i915_vma *vma; 2149 2150 GEM_BUG_ON(!obj->userfault_count); 2151 2152 obj->userfault_count = 0; 2153 list_del(&obj->userfault_link); 2154 drm_vma_node_unmap(&obj->base.vma_node, 2155 obj->base.dev->anon_inode->i_mapping); 2156 2157 for_each_ggtt_vma(vma, obj) 2158 i915_vma_unset_userfault(vma); 2159 } 2160 2161 /** 2162 * i915_gem_release_mmap - remove physical page mappings 2163 * @obj: obj in question 2164 * 2165 * Preserve the reservation of the mmapping with the DRM core code, but 2166 * relinquish ownership of the pages back to the system. 2167 * 2168 * It is vital that we remove the page mapping if we have mapped a tiled 2169 * object through the GTT and then lose the fence register due to 2170 * resource pressure. Similarly if the object has been moved out of the 2171 * aperture, than pages mapped into userspace must be revoked. Removing the 2172 * mapping will then trigger a page fault on the next user access, allowing 2173 * fixup by i915_gem_fault(). 2174 */ 2175 void 2176 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2177 { 2178 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2179 2180 /* Serialisation between user GTT access and our code depends upon 2181 * revoking the CPU's PTE whilst the mutex is held. The next user 2182 * pagefault then has to wait until we release the mutex. 2183 * 2184 * Note that RPM complicates somewhat by adding an additional 2185 * requirement that operations to the GGTT be made holding the RPM 2186 * wakeref. 2187 */ 2188 lockdep_assert_held(&i915->drm.struct_mutex); 2189 intel_runtime_pm_get(i915); 2190 2191 if (!obj->userfault_count) 2192 goto out; 2193 2194 __i915_gem_object_release_mmap(obj); 2195 2196 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2197 * memory transactions from userspace before we return. The TLB 2198 * flushing implied above by changing the PTE above *should* be 2199 * sufficient, an extra barrier here just provides us with a bit 2200 * of paranoid documentation about our requirement to serialise 2201 * memory writes before touching registers / GSM. 2202 */ 2203 wmb(); 2204 2205 out: 2206 intel_runtime_pm_put(i915); 2207 } 2208 2209 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2210 { 2211 struct drm_i915_gem_object *obj, *on; 2212 int i; 2213 2214 /* 2215 * Only called during RPM suspend. All users of the userfault_list 2216 * must be holding an RPM wakeref to ensure that this can not 2217 * run concurrently with themselves (and use the struct_mutex for 2218 * protection between themselves). 2219 */ 2220 2221 list_for_each_entry_safe(obj, on, 2222 &dev_priv->mm.userfault_list, userfault_link) 2223 __i915_gem_object_release_mmap(obj); 2224 2225 /* The fence will be lost when the device powers down. If any were 2226 * in use by hardware (i.e. they are pinned), we should not be powering 2227 * down! All other fences will be reacquired by the user upon waking. 2228 */ 2229 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2230 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2231 2232 /* Ideally we want to assert that the fence register is not 2233 * live at this point (i.e. that no piece of code will be 2234 * trying to write through fence + GTT, as that both violates 2235 * our tracking of activity and associated locking/barriers, 2236 * but also is illegal given that the hw is powered down). 2237 * 2238 * Previously we used reg->pin_count as a "liveness" indicator. 2239 * That is not sufficient, and we need a more fine-grained 2240 * tool if we want to have a sanity check here. 2241 */ 2242 2243 if (!reg->vma) 2244 continue; 2245 2246 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2247 reg->dirty = true; 2248 } 2249 } 2250 2251 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2252 { 2253 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2254 int err; 2255 2256 err = drm_gem_create_mmap_offset(&obj->base); 2257 if (likely(!err)) 2258 return 0; 2259 2260 /* Attempt to reap some mmap space from dead objects */ 2261 do { 2262 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2263 if (err) 2264 break; 2265 2266 i915_gem_drain_freed_objects(dev_priv); 2267 err = drm_gem_create_mmap_offset(&obj->base); 2268 if (!err) 2269 break; 2270 2271 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2272 2273 return err; 2274 } 2275 2276 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2277 { 2278 drm_gem_free_mmap_offset(&obj->base); 2279 } 2280 2281 int 2282 i915_gem_mmap_gtt(struct drm_file *file, 2283 struct drm_device *dev, 2284 uint32_t handle, 2285 uint64_t *offset) 2286 { 2287 struct drm_i915_gem_object *obj; 2288 int ret; 2289 2290 obj = i915_gem_object_lookup(file, handle); 2291 if (!obj) 2292 return -ENOENT; 2293 2294 ret = i915_gem_object_create_mmap_offset(obj); 2295 if (ret == 0) 2296 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2297 2298 i915_gem_object_put(obj); 2299 return ret; 2300 } 2301 2302 /** 2303 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2304 * @dev: DRM device 2305 * @data: GTT mapping ioctl data 2306 * @file: GEM object info 2307 * 2308 * Simply returns the fake offset to userspace so it can mmap it. 2309 * The mmap call will end up in drm_gem_mmap(), which will set things 2310 * up so we can get faults in the handler above. 2311 * 2312 * The fault handler will take care of binding the object into the GTT 2313 * (since it may have been evicted to make room for something), allocating 2314 * a fence register, and mapping the appropriate aperture address into 2315 * userspace. 2316 */ 2317 int 2318 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2319 struct drm_file *file) 2320 { 2321 struct drm_i915_gem_mmap_gtt *args = data; 2322 2323 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2324 } 2325 2326 /* Immediately discard the backing storage */ 2327 static void 2328 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2329 { 2330 i915_gem_object_free_mmap_offset(obj); 2331 2332 if (obj->base.filp == NULL) 2333 return; 2334 2335 /* Our goal here is to return as much of the memory as 2336 * is possible back to the system as we are called from OOM. 2337 * To do this we must instruct the shmfs to drop all of its 2338 * backing pages, *now*. 2339 */ 2340 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2341 obj->mm.madv = __I915_MADV_PURGED; 2342 obj->mm.pages = ERR_PTR(-EFAULT); 2343 } 2344 2345 /* Try to discard unwanted pages */ 2346 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2347 { 2348 struct address_space *mapping; 2349 2350 lockdep_assert_held(&obj->mm.lock); 2351 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2352 2353 switch (obj->mm.madv) { 2354 case I915_MADV_DONTNEED: 2355 i915_gem_object_truncate(obj); 2356 case __I915_MADV_PURGED: 2357 return; 2358 } 2359 2360 if (obj->base.filp == NULL) 2361 return; 2362 2363 mapping = obj->base.filp->f_mapping, 2364 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2365 } 2366 2367 static void 2368 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2369 struct sg_table *pages) 2370 { 2371 struct sgt_iter sgt_iter; 2372 struct page *page; 2373 2374 __i915_gem_object_release_shmem(obj, pages, true); 2375 2376 i915_gem_gtt_finish_pages(obj, pages); 2377 2378 if (i915_gem_object_needs_bit17_swizzle(obj)) 2379 i915_gem_object_save_bit_17_swizzle(obj, pages); 2380 2381 for_each_sgt_page(page, sgt_iter, pages) { 2382 if (obj->mm.dirty) 2383 set_page_dirty(page); 2384 2385 if (obj->mm.madv == I915_MADV_WILLNEED) 2386 mark_page_accessed(page); 2387 2388 put_page(page); 2389 } 2390 obj->mm.dirty = false; 2391 2392 sg_free_table(pages); 2393 kfree(pages); 2394 } 2395 2396 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2397 { 2398 struct radix_tree_iter iter; 2399 void __rcu **slot; 2400 2401 rcu_read_lock(); 2402 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2403 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2404 rcu_read_unlock(); 2405 } 2406 2407 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2408 enum i915_mm_subclass subclass) 2409 { 2410 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2411 struct sg_table *pages; 2412 2413 if (i915_gem_object_has_pinned_pages(obj)) 2414 return; 2415 2416 GEM_BUG_ON(obj->bind_count); 2417 if (!i915_gem_object_has_pages(obj)) 2418 return; 2419 2420 /* May be called by shrinker from within get_pages() (on another bo) */ 2421 mutex_lock_nested(&obj->mm.lock, subclass); 2422 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2423 goto unlock; 2424 2425 /* ->put_pages might need to allocate memory for the bit17 swizzle 2426 * array, hence protect them from being reaped by removing them from gtt 2427 * lists early. */ 2428 pages = fetch_and_zero(&obj->mm.pages); 2429 GEM_BUG_ON(!pages); 2430 2431 spin_lock(&i915->mm.obj_lock); 2432 list_del(&obj->mm.link); 2433 spin_unlock(&i915->mm.obj_lock); 2434 2435 if (obj->mm.mapping) { 2436 void *ptr; 2437 2438 ptr = page_mask_bits(obj->mm.mapping); 2439 if (is_vmalloc_addr(ptr)) 2440 vunmap(ptr); 2441 else 2442 kunmap(kmap_to_page(ptr)); 2443 2444 obj->mm.mapping = NULL; 2445 } 2446 2447 __i915_gem_object_reset_page_iter(obj); 2448 2449 if (!IS_ERR(pages)) 2450 obj->ops->put_pages(obj, pages); 2451 2452 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2453 2454 unlock: 2455 mutex_unlock(&obj->mm.lock); 2456 } 2457 2458 static bool i915_sg_trim(struct sg_table *orig_st) 2459 { 2460 struct sg_table new_st; 2461 struct scatterlist *sg, *new_sg; 2462 unsigned int i; 2463 2464 if (orig_st->nents == orig_st->orig_nents) 2465 return false; 2466 2467 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2468 return false; 2469 2470 new_sg = new_st.sgl; 2471 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2472 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2473 /* called before being DMA mapped, no need to copy sg->dma_* */ 2474 new_sg = sg_next(new_sg); 2475 } 2476 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2477 2478 sg_free_table(orig_st); 2479 2480 *orig_st = new_st; 2481 return true; 2482 } 2483 2484 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2485 { 2486 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2487 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2488 unsigned long i; 2489 struct address_space *mapping; 2490 struct sg_table *st; 2491 struct scatterlist *sg; 2492 struct sgt_iter sgt_iter; 2493 struct page *page; 2494 unsigned long last_pfn = 0; /* suppress gcc warning */ 2495 unsigned int max_segment = i915_sg_segment_size(); 2496 unsigned int sg_page_sizes; 2497 gfp_t noreclaim; 2498 int ret; 2499 2500 /* Assert that the object is not currently in any GPU domain. As it 2501 * wasn't in the GTT, there shouldn't be any way it could have been in 2502 * a GPU cache 2503 */ 2504 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2505 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2506 2507 st = kmalloc(sizeof(*st), GFP_KERNEL); 2508 if (st == NULL) 2509 return -ENOMEM; 2510 2511 rebuild_st: 2512 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2513 kfree(st); 2514 return -ENOMEM; 2515 } 2516 2517 /* Get the list of pages out of our struct file. They'll be pinned 2518 * at this point until we release them. 2519 * 2520 * Fail silently without starting the shrinker 2521 */ 2522 mapping = obj->base.filp->f_mapping; 2523 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2524 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2525 2526 sg = st->sgl; 2527 st->nents = 0; 2528 sg_page_sizes = 0; 2529 for (i = 0; i < page_count; i++) { 2530 const unsigned int shrink[] = { 2531 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2532 0, 2533 }, *s = shrink; 2534 gfp_t gfp = noreclaim; 2535 2536 do { 2537 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2538 if (likely(!IS_ERR(page))) 2539 break; 2540 2541 if (!*s) { 2542 ret = PTR_ERR(page); 2543 goto err_sg; 2544 } 2545 2546 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2547 cond_resched(); 2548 2549 /* We've tried hard to allocate the memory by reaping 2550 * our own buffer, now let the real VM do its job and 2551 * go down in flames if truly OOM. 2552 * 2553 * However, since graphics tend to be disposable, 2554 * defer the oom here by reporting the ENOMEM back 2555 * to userspace. 2556 */ 2557 if (!*s) { 2558 /* reclaim and warn, but no oom */ 2559 gfp = mapping_gfp_mask(mapping); 2560 2561 /* Our bo are always dirty and so we require 2562 * kswapd to reclaim our pages (direct reclaim 2563 * does not effectively begin pageout of our 2564 * buffers on its own). However, direct reclaim 2565 * only waits for kswapd when under allocation 2566 * congestion. So as a result __GFP_RECLAIM is 2567 * unreliable and fails to actually reclaim our 2568 * dirty pages -- unless you try over and over 2569 * again with !__GFP_NORETRY. However, we still 2570 * want to fail this allocation rather than 2571 * trigger the out-of-memory killer and for 2572 * this we want __GFP_RETRY_MAYFAIL. 2573 */ 2574 gfp |= __GFP_RETRY_MAYFAIL; 2575 } 2576 } while (1); 2577 2578 if (!i || 2579 sg->length >= max_segment || 2580 page_to_pfn(page) != last_pfn + 1) { 2581 if (i) { 2582 sg_page_sizes |= sg->length; 2583 sg = sg_next(sg); 2584 } 2585 st->nents++; 2586 sg_set_page(sg, page, PAGE_SIZE, 0); 2587 } else { 2588 sg->length += PAGE_SIZE; 2589 } 2590 last_pfn = page_to_pfn(page); 2591 2592 /* Check that the i965g/gm workaround works. */ 2593 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2594 } 2595 if (sg) { /* loop terminated early; short sg table */ 2596 sg_page_sizes |= sg->length; 2597 sg_mark_end(sg); 2598 } 2599 2600 /* Trim unused sg entries to avoid wasting memory. */ 2601 i915_sg_trim(st); 2602 2603 ret = i915_gem_gtt_prepare_pages(obj, st); 2604 if (ret) { 2605 /* DMA remapping failed? One possible cause is that 2606 * it could not reserve enough large entries, asking 2607 * for PAGE_SIZE chunks instead may be helpful. 2608 */ 2609 if (max_segment > PAGE_SIZE) { 2610 for_each_sgt_page(page, sgt_iter, st) 2611 put_page(page); 2612 sg_free_table(st); 2613 2614 max_segment = PAGE_SIZE; 2615 goto rebuild_st; 2616 } else { 2617 dev_warn(&dev_priv->drm.pdev->dev, 2618 "Failed to DMA remap %lu pages\n", 2619 page_count); 2620 goto err_pages; 2621 } 2622 } 2623 2624 if (i915_gem_object_needs_bit17_swizzle(obj)) 2625 i915_gem_object_do_bit_17_swizzle(obj, st); 2626 2627 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2628 2629 return 0; 2630 2631 err_sg: 2632 sg_mark_end(sg); 2633 err_pages: 2634 for_each_sgt_page(page, sgt_iter, st) 2635 put_page(page); 2636 sg_free_table(st); 2637 kfree(st); 2638 2639 /* shmemfs first checks if there is enough memory to allocate the page 2640 * and reports ENOSPC should there be insufficient, along with the usual 2641 * ENOMEM for a genuine allocation failure. 2642 * 2643 * We use ENOSPC in our driver to mean that we have run out of aperture 2644 * space and so want to translate the error from shmemfs back to our 2645 * usual understanding of ENOMEM. 2646 */ 2647 if (ret == -ENOSPC) 2648 ret = -ENOMEM; 2649 2650 return ret; 2651 } 2652 2653 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2654 struct sg_table *pages, 2655 unsigned int sg_page_sizes) 2656 { 2657 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2658 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2659 int i; 2660 2661 lockdep_assert_held(&obj->mm.lock); 2662 2663 obj->mm.get_page.sg_pos = pages->sgl; 2664 obj->mm.get_page.sg_idx = 0; 2665 2666 obj->mm.pages = pages; 2667 2668 if (i915_gem_object_is_tiled(obj) && 2669 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2670 GEM_BUG_ON(obj->mm.quirked); 2671 __i915_gem_object_pin_pages(obj); 2672 obj->mm.quirked = true; 2673 } 2674 2675 GEM_BUG_ON(!sg_page_sizes); 2676 obj->mm.page_sizes.phys = sg_page_sizes; 2677 2678 /* 2679 * Calculate the supported page-sizes which fit into the given 2680 * sg_page_sizes. This will give us the page-sizes which we may be able 2681 * to use opportunistically when later inserting into the GTT. For 2682 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2683 * 64K or 4K pages, although in practice this will depend on a number of 2684 * other factors. 2685 */ 2686 obj->mm.page_sizes.sg = 0; 2687 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2688 if (obj->mm.page_sizes.phys & ~0u << i) 2689 obj->mm.page_sizes.sg |= BIT(i); 2690 } 2691 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2692 2693 spin_lock(&i915->mm.obj_lock); 2694 list_add(&obj->mm.link, &i915->mm.unbound_list); 2695 spin_unlock(&i915->mm.obj_lock); 2696 } 2697 2698 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2699 { 2700 int err; 2701 2702 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2703 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2704 return -EFAULT; 2705 } 2706 2707 err = obj->ops->get_pages(obj); 2708 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2709 2710 return err; 2711 } 2712 2713 /* Ensure that the associated pages are gathered from the backing storage 2714 * and pinned into our object. i915_gem_object_pin_pages() may be called 2715 * multiple times before they are released by a single call to 2716 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2717 * either as a result of memory pressure (reaping pages under the shrinker) 2718 * or as the object is itself released. 2719 */ 2720 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2721 { 2722 int err; 2723 2724 err = mutex_lock_interruptible(&obj->mm.lock); 2725 if (err) 2726 return err; 2727 2728 if (unlikely(!i915_gem_object_has_pages(obj))) { 2729 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2730 2731 err = ____i915_gem_object_get_pages(obj); 2732 if (err) 2733 goto unlock; 2734 2735 smp_mb__before_atomic(); 2736 } 2737 atomic_inc(&obj->mm.pages_pin_count); 2738 2739 unlock: 2740 mutex_unlock(&obj->mm.lock); 2741 return err; 2742 } 2743 2744 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2745 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2746 enum i915_map_type type) 2747 { 2748 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2749 struct sg_table *sgt = obj->mm.pages; 2750 struct sgt_iter sgt_iter; 2751 struct page *page; 2752 struct page *stack_pages[32]; 2753 struct page **pages = stack_pages; 2754 unsigned long i = 0; 2755 pgprot_t pgprot; 2756 void *addr; 2757 2758 /* A single page can always be kmapped */ 2759 if (n_pages == 1 && type == I915_MAP_WB) 2760 return kmap(sg_page(sgt->sgl)); 2761 2762 if (n_pages > ARRAY_SIZE(stack_pages)) { 2763 /* Too big for stack -- allocate temporary array instead */ 2764 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2765 if (!pages) 2766 return NULL; 2767 } 2768 2769 for_each_sgt_page(page, sgt_iter, sgt) 2770 pages[i++] = page; 2771 2772 /* Check that we have the expected number of pages */ 2773 GEM_BUG_ON(i != n_pages); 2774 2775 switch (type) { 2776 default: 2777 MISSING_CASE(type); 2778 /* fallthrough to use PAGE_KERNEL anyway */ 2779 case I915_MAP_WB: 2780 pgprot = PAGE_KERNEL; 2781 break; 2782 case I915_MAP_WC: 2783 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2784 break; 2785 } 2786 addr = vmap(pages, n_pages, 0, pgprot); 2787 2788 if (pages != stack_pages) 2789 kvfree(pages); 2790 2791 return addr; 2792 } 2793 2794 /* get, pin, and map the pages of the object into kernel space */ 2795 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2796 enum i915_map_type type) 2797 { 2798 enum i915_map_type has_type; 2799 bool pinned; 2800 void *ptr; 2801 int ret; 2802 2803 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2804 return ERR_PTR(-ENXIO); 2805 2806 ret = mutex_lock_interruptible(&obj->mm.lock); 2807 if (ret) 2808 return ERR_PTR(ret); 2809 2810 pinned = !(type & I915_MAP_OVERRIDE); 2811 type &= ~I915_MAP_OVERRIDE; 2812 2813 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2814 if (unlikely(!i915_gem_object_has_pages(obj))) { 2815 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2816 2817 ret = ____i915_gem_object_get_pages(obj); 2818 if (ret) 2819 goto err_unlock; 2820 2821 smp_mb__before_atomic(); 2822 } 2823 atomic_inc(&obj->mm.pages_pin_count); 2824 pinned = false; 2825 } 2826 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2827 2828 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2829 if (ptr && has_type != type) { 2830 if (pinned) { 2831 ret = -EBUSY; 2832 goto err_unpin; 2833 } 2834 2835 if (is_vmalloc_addr(ptr)) 2836 vunmap(ptr); 2837 else 2838 kunmap(kmap_to_page(ptr)); 2839 2840 ptr = obj->mm.mapping = NULL; 2841 } 2842 2843 if (!ptr) { 2844 ptr = i915_gem_object_map(obj, type); 2845 if (!ptr) { 2846 ret = -ENOMEM; 2847 goto err_unpin; 2848 } 2849 2850 obj->mm.mapping = page_pack_bits(ptr, type); 2851 } 2852 2853 out_unlock: 2854 mutex_unlock(&obj->mm.lock); 2855 return ptr; 2856 2857 err_unpin: 2858 atomic_dec(&obj->mm.pages_pin_count); 2859 err_unlock: 2860 ptr = ERR_PTR(ret); 2861 goto out_unlock; 2862 } 2863 2864 static int 2865 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2866 const struct drm_i915_gem_pwrite *arg) 2867 { 2868 struct address_space *mapping = obj->base.filp->f_mapping; 2869 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2870 u64 remain, offset; 2871 unsigned int pg; 2872 2873 /* Before we instantiate/pin the backing store for our use, we 2874 * can prepopulate the shmemfs filp efficiently using a write into 2875 * the pagecache. We avoid the penalty of instantiating all the 2876 * pages, important if the user is just writing to a few and never 2877 * uses the object on the GPU, and using a direct write into shmemfs 2878 * allows it to avoid the cost of retrieving a page (either swapin 2879 * or clearing-before-use) before it is overwritten. 2880 */ 2881 if (i915_gem_object_has_pages(obj)) 2882 return -ENODEV; 2883 2884 if (obj->mm.madv != I915_MADV_WILLNEED) 2885 return -EFAULT; 2886 2887 /* Before the pages are instantiated the object is treated as being 2888 * in the CPU domain. The pages will be clflushed as required before 2889 * use, and we can freely write into the pages directly. If userspace 2890 * races pwrite with any other operation; corruption will ensue - 2891 * that is userspace's prerogative! 2892 */ 2893 2894 remain = arg->size; 2895 offset = arg->offset; 2896 pg = offset_in_page(offset); 2897 2898 do { 2899 unsigned int len, unwritten; 2900 struct page *page; 2901 void *data, *vaddr; 2902 int err; 2903 2904 len = PAGE_SIZE - pg; 2905 if (len > remain) 2906 len = remain; 2907 2908 err = pagecache_write_begin(obj->base.filp, mapping, 2909 offset, len, 0, 2910 &page, &data); 2911 if (err < 0) 2912 return err; 2913 2914 vaddr = kmap(page); 2915 unwritten = copy_from_user(vaddr + pg, user_data, len); 2916 kunmap(page); 2917 2918 err = pagecache_write_end(obj->base.filp, mapping, 2919 offset, len, len - unwritten, 2920 page, data); 2921 if (err < 0) 2922 return err; 2923 2924 if (unwritten) 2925 return -EFAULT; 2926 2927 remain -= len; 2928 user_data += len; 2929 offset += len; 2930 pg = 0; 2931 } while (remain); 2932 2933 return 0; 2934 } 2935 2936 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 2937 const struct i915_gem_context *ctx) 2938 { 2939 unsigned int score; 2940 unsigned long prev_hang; 2941 2942 if (i915_gem_context_is_banned(ctx)) 2943 score = I915_CLIENT_SCORE_CONTEXT_BAN; 2944 else 2945 score = 0; 2946 2947 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 2948 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 2949 score += I915_CLIENT_SCORE_HANG_FAST; 2950 2951 if (score) { 2952 atomic_add(score, &file_priv->ban_score); 2953 2954 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 2955 ctx->name, score, 2956 atomic_read(&file_priv->ban_score)); 2957 } 2958 } 2959 2960 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2961 { 2962 unsigned int score; 2963 bool banned, bannable; 2964 2965 atomic_inc(&ctx->guilty_count); 2966 2967 bannable = i915_gem_context_is_bannable(ctx); 2968 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 2969 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 2970 2971 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n", 2972 ctx->name, atomic_read(&ctx->guilty_count), 2973 score, yesno(banned && bannable)); 2974 2975 /* Cool contexts don't accumulate client ban score */ 2976 if (!bannable) 2977 return; 2978 2979 if (banned) 2980 i915_gem_context_set_banned(ctx); 2981 2982 if (!IS_ERR_OR_NULL(ctx->file_priv)) 2983 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 2984 } 2985 2986 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2987 { 2988 atomic_inc(&ctx->active_count); 2989 } 2990 2991 struct i915_request * 2992 i915_gem_find_active_request(struct intel_engine_cs *engine) 2993 { 2994 struct i915_request *request, *active = NULL; 2995 unsigned long flags; 2996 2997 /* 2998 * We are called by the error capture, reset and to dump engine 2999 * state at random points in time. In particular, note that neither is 3000 * crucially ordered with an interrupt. After a hang, the GPU is dead 3001 * and we assume that no more writes can happen (we waited long enough 3002 * for all writes that were in transaction to be flushed) - adding an 3003 * extra delay for a recent interrupt is pointless. Hence, we do 3004 * not need an engine->irq_seqno_barrier() before the seqno reads. 3005 * At all other times, we must assume the GPU is still running, but 3006 * we only care about the snapshot of this moment. 3007 */ 3008 spin_lock_irqsave(&engine->timeline.lock, flags); 3009 list_for_each_entry(request, &engine->timeline.requests, link) { 3010 if (__i915_request_completed(request, request->global_seqno)) 3011 continue; 3012 3013 active = request; 3014 break; 3015 } 3016 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3017 3018 return active; 3019 } 3020 3021 /* 3022 * Ensure irq handler finishes, and not run again. 3023 * Also return the active request so that we only search for it once. 3024 */ 3025 struct i915_request * 3026 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3027 { 3028 struct i915_request *request = NULL; 3029 3030 /* 3031 * During the reset sequence, we must prevent the engine from 3032 * entering RC6. As the context state is undefined until we restart 3033 * the engine, if it does enter RC6 during the reset, the state 3034 * written to the powercontext is undefined and so we may lose 3035 * GPU state upon resume, i.e. fail to restart after a reset. 3036 */ 3037 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3038 3039 /* 3040 * Prevent the signaler thread from updating the request 3041 * state (by calling dma_fence_signal) as we are processing 3042 * the reset. The write from the GPU of the seqno is 3043 * asynchronous and the signaler thread may see a different 3044 * value to us and declare the request complete, even though 3045 * the reset routine have picked that request as the active 3046 * (incomplete) request. This conflict is not handled 3047 * gracefully! 3048 */ 3049 kthread_park(engine->breadcrumbs.signaler); 3050 3051 /* 3052 * Prevent request submission to the hardware until we have 3053 * completed the reset in i915_gem_reset_finish(). If a request 3054 * is completed by one engine, it may then queue a request 3055 * to a second via its execlists->tasklet *just* as we are 3056 * calling engine->init_hw() and also writing the ELSP. 3057 * Turning off the execlists->tasklet until the reset is over 3058 * prevents the race. 3059 * 3060 * Note that this needs to be a single atomic operation on the 3061 * tasklet (flush existing tasks, prevent new tasks) to prevent 3062 * a race between reset and set-wedged. It is not, so we do the best 3063 * we can atm and make sure we don't lock the machine up in the more 3064 * common case of recursively being called from set-wedged from inside 3065 * i915_reset. 3066 */ 3067 if (!atomic_read(&engine->execlists.tasklet.count)) 3068 tasklet_kill(&engine->execlists.tasklet); 3069 tasklet_disable(&engine->execlists.tasklet); 3070 3071 /* 3072 * We're using worker to queue preemption requests from the tasklet in 3073 * GuC submission mode. 3074 * Even though tasklet was disabled, we may still have a worker queued. 3075 * Let's make sure that all workers scheduled before disabling the 3076 * tasklet are completed before continuing with the reset. 3077 */ 3078 if (engine->i915->guc.preempt_wq) 3079 flush_workqueue(engine->i915->guc.preempt_wq); 3080 3081 if (engine->irq_seqno_barrier) 3082 engine->irq_seqno_barrier(engine); 3083 3084 request = i915_gem_find_active_request(engine); 3085 if (request && request->fence.error == -EIO) 3086 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3087 3088 return request; 3089 } 3090 3091 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3092 { 3093 struct intel_engine_cs *engine; 3094 struct i915_request *request; 3095 enum intel_engine_id id; 3096 int err = 0; 3097 3098 for_each_engine(engine, dev_priv, id) { 3099 request = i915_gem_reset_prepare_engine(engine); 3100 if (IS_ERR(request)) { 3101 err = PTR_ERR(request); 3102 continue; 3103 } 3104 3105 engine->hangcheck.active_request = request; 3106 } 3107 3108 i915_gem_revoke_fences(dev_priv); 3109 intel_uc_sanitize(dev_priv); 3110 3111 return err; 3112 } 3113 3114 static void skip_request(struct i915_request *request) 3115 { 3116 void *vaddr = request->ring->vaddr; 3117 u32 head; 3118 3119 /* As this request likely depends on state from the lost 3120 * context, clear out all the user operations leaving the 3121 * breadcrumb at the end (so we get the fence notifications). 3122 */ 3123 head = request->head; 3124 if (request->postfix < head) { 3125 memset(vaddr + head, 0, request->ring->size - head); 3126 head = 0; 3127 } 3128 memset(vaddr + head, 0, request->postfix - head); 3129 3130 dma_fence_set_error(&request->fence, -EIO); 3131 } 3132 3133 static void engine_skip_context(struct i915_request *request) 3134 { 3135 struct intel_engine_cs *engine = request->engine; 3136 struct i915_gem_context *hung_ctx = request->ctx; 3137 struct i915_timeline *timeline = request->timeline; 3138 unsigned long flags; 3139 3140 GEM_BUG_ON(timeline == &engine->timeline); 3141 3142 spin_lock_irqsave(&engine->timeline.lock, flags); 3143 spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); 3144 3145 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3146 if (request->ctx == hung_ctx) 3147 skip_request(request); 3148 3149 list_for_each_entry(request, &timeline->requests, link) 3150 skip_request(request); 3151 3152 spin_unlock(&timeline->lock); 3153 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3154 } 3155 3156 /* Returns the request if it was guilty of the hang */ 3157 static struct i915_request * 3158 i915_gem_reset_request(struct intel_engine_cs *engine, 3159 struct i915_request *request, 3160 bool stalled) 3161 { 3162 /* The guilty request will get skipped on a hung engine. 3163 * 3164 * Users of client default contexts do not rely on logical 3165 * state preserved between batches so it is safe to execute 3166 * queued requests following the hang. Non default contexts 3167 * rely on preserved state, so skipping a batch loses the 3168 * evolution of the state and it needs to be considered corrupted. 3169 * Executing more queued batches on top of corrupted state is 3170 * risky. But we take the risk by trying to advance through 3171 * the queued requests in order to make the client behaviour 3172 * more predictable around resets, by not throwing away random 3173 * amount of batches it has prepared for execution. Sophisticated 3174 * clients can use gem_reset_stats_ioctl and dma fence status 3175 * (exported via sync_file info ioctl on explicit fences) to observe 3176 * when it loses the context state and should rebuild accordingly. 3177 * 3178 * The context ban, and ultimately the client ban, mechanism are safety 3179 * valves if client submission ends up resulting in nothing more than 3180 * subsequent hangs. 3181 */ 3182 3183 if (i915_request_completed(request)) { 3184 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3185 engine->name, request->global_seqno, 3186 request->fence.context, request->fence.seqno, 3187 intel_engine_get_seqno(engine)); 3188 stalled = false; 3189 } 3190 3191 if (stalled) { 3192 i915_gem_context_mark_guilty(request->ctx); 3193 skip_request(request); 3194 3195 /* If this context is now banned, skip all pending requests. */ 3196 if (i915_gem_context_is_banned(request->ctx)) 3197 engine_skip_context(request); 3198 } else { 3199 /* 3200 * Since this is not the hung engine, it may have advanced 3201 * since the hang declaration. Double check by refinding 3202 * the active request at the time of the reset. 3203 */ 3204 request = i915_gem_find_active_request(engine); 3205 if (request) { 3206 i915_gem_context_mark_innocent(request->ctx); 3207 dma_fence_set_error(&request->fence, -EAGAIN); 3208 3209 /* Rewind the engine to replay the incomplete rq */ 3210 spin_lock_irq(&engine->timeline.lock); 3211 request = list_prev_entry(request, link); 3212 if (&request->link == &engine->timeline.requests) 3213 request = NULL; 3214 spin_unlock_irq(&engine->timeline.lock); 3215 } 3216 } 3217 3218 return request; 3219 } 3220 3221 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3222 struct i915_request *request, 3223 bool stalled) 3224 { 3225 /* 3226 * Make sure this write is visible before we re-enable the interrupt 3227 * handlers on another CPU, as tasklet_enable() resolves to just 3228 * a compiler barrier which is insufficient for our purpose here. 3229 */ 3230 smp_store_mb(engine->irq_posted, 0); 3231 3232 if (request) 3233 request = i915_gem_reset_request(engine, request, stalled); 3234 3235 if (request) { 3236 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3237 engine->name, request->global_seqno); 3238 } 3239 3240 /* Setup the CS to resume from the breadcrumb of the hung request */ 3241 engine->reset_hw(engine, request); 3242 } 3243 3244 void i915_gem_reset(struct drm_i915_private *dev_priv, 3245 unsigned int stalled_mask) 3246 { 3247 struct intel_engine_cs *engine; 3248 enum intel_engine_id id; 3249 3250 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3251 3252 i915_retire_requests(dev_priv); 3253 3254 for_each_engine(engine, dev_priv, id) { 3255 struct i915_gem_context *ctx; 3256 3257 i915_gem_reset_engine(engine, 3258 engine->hangcheck.active_request, 3259 stalled_mask & ENGINE_MASK(id)); 3260 ctx = fetch_and_zero(&engine->last_retired_context); 3261 if (ctx) 3262 intel_context_unpin(ctx, engine); 3263 3264 /* 3265 * Ostensibily, we always want a context loaded for powersaving, 3266 * so if the engine is idle after the reset, send a request 3267 * to load our scratch kernel_context. 3268 * 3269 * More mysteriously, if we leave the engine idle after a reset, 3270 * the next userspace batch may hang, with what appears to be 3271 * an incoherent read by the CS (presumably stale TLB). An 3272 * empty request appears sufficient to paper over the glitch. 3273 */ 3274 if (intel_engine_is_idle(engine)) { 3275 struct i915_request *rq; 3276 3277 rq = i915_request_alloc(engine, 3278 dev_priv->kernel_context); 3279 if (!IS_ERR(rq)) 3280 __i915_request_add(rq, false); 3281 } 3282 } 3283 3284 i915_gem_restore_fences(dev_priv); 3285 } 3286 3287 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3288 { 3289 tasklet_enable(&engine->execlists.tasklet); 3290 kthread_unpark(engine->breadcrumbs.signaler); 3291 3292 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3293 } 3294 3295 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3296 { 3297 struct intel_engine_cs *engine; 3298 enum intel_engine_id id; 3299 3300 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3301 3302 for_each_engine(engine, dev_priv, id) { 3303 engine->hangcheck.active_request = NULL; 3304 i915_gem_reset_finish_engine(engine); 3305 } 3306 } 3307 3308 static void nop_submit_request(struct i915_request *request) 3309 { 3310 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3311 request->engine->name, 3312 request->fence.context, request->fence.seqno); 3313 dma_fence_set_error(&request->fence, -EIO); 3314 3315 i915_request_submit(request); 3316 } 3317 3318 static void nop_complete_submit_request(struct i915_request *request) 3319 { 3320 unsigned long flags; 3321 3322 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3323 request->engine->name, 3324 request->fence.context, request->fence.seqno); 3325 dma_fence_set_error(&request->fence, -EIO); 3326 3327 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3328 __i915_request_submit(request); 3329 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3330 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3331 } 3332 3333 void i915_gem_set_wedged(struct drm_i915_private *i915) 3334 { 3335 struct intel_engine_cs *engine; 3336 enum intel_engine_id id; 3337 3338 GEM_TRACE("start\n"); 3339 3340 if (GEM_SHOW_DEBUG()) { 3341 struct drm_printer p = drm_debug_printer(__func__); 3342 3343 for_each_engine(engine, i915, id) 3344 intel_engine_dump(engine, &p, "%s\n", engine->name); 3345 } 3346 3347 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3348 smp_mb__after_atomic(); 3349 3350 /* 3351 * First, stop submission to hw, but do not yet complete requests by 3352 * rolling the global seqno forward (since this would complete requests 3353 * for which we haven't set the fence error to EIO yet). 3354 */ 3355 for_each_engine(engine, i915, id) { 3356 i915_gem_reset_prepare_engine(engine); 3357 3358 engine->submit_request = nop_submit_request; 3359 engine->schedule = NULL; 3360 } 3361 i915->caps.scheduler = 0; 3362 3363 /* Even if the GPU reset fails, it should still stop the engines */ 3364 intel_gpu_reset(i915, ALL_ENGINES); 3365 3366 /* 3367 * Make sure no one is running the old callback before we proceed with 3368 * cancelling requests and resetting the completion tracking. Otherwise 3369 * we might submit a request to the hardware which never completes. 3370 */ 3371 synchronize_rcu(); 3372 3373 for_each_engine(engine, i915, id) { 3374 /* Mark all executing requests as skipped */ 3375 engine->cancel_requests(engine); 3376 3377 /* 3378 * Only once we've force-cancelled all in-flight requests can we 3379 * start to complete all requests. 3380 */ 3381 engine->submit_request = nop_complete_submit_request; 3382 } 3383 3384 /* 3385 * Make sure no request can slip through without getting completed by 3386 * either this call here to intel_engine_init_global_seqno, or the one 3387 * in nop_complete_submit_request. 3388 */ 3389 synchronize_rcu(); 3390 3391 for_each_engine(engine, i915, id) { 3392 unsigned long flags; 3393 3394 /* 3395 * Mark all pending requests as complete so that any concurrent 3396 * (lockless) lookup doesn't try and wait upon the request as we 3397 * reset it. 3398 */ 3399 spin_lock_irqsave(&engine->timeline.lock, flags); 3400 intel_engine_init_global_seqno(engine, 3401 intel_engine_last_submit(engine)); 3402 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3403 3404 i915_gem_reset_finish_engine(engine); 3405 } 3406 3407 GEM_TRACE("end\n"); 3408 3409 wake_up_all(&i915->gpu_error.reset_queue); 3410 } 3411 3412 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3413 { 3414 struct i915_timeline *tl; 3415 3416 lockdep_assert_held(&i915->drm.struct_mutex); 3417 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3418 return true; 3419 3420 GEM_TRACE("start\n"); 3421 3422 /* 3423 * Before unwedging, make sure that all pending operations 3424 * are flushed and errored out - we may have requests waiting upon 3425 * third party fences. We marked all inflight requests as EIO, and 3426 * every execbuf since returned EIO, for consistency we want all 3427 * the currently pending requests to also be marked as EIO, which 3428 * is done inside our nop_submit_request - and so we must wait. 3429 * 3430 * No more can be submitted until we reset the wedged bit. 3431 */ 3432 list_for_each_entry(tl, &i915->gt.timelines, link) { 3433 struct i915_request *rq; 3434 3435 rq = i915_gem_active_peek(&tl->last_request, 3436 &i915->drm.struct_mutex); 3437 if (!rq) 3438 continue; 3439 3440 /* 3441 * We can't use our normal waiter as we want to 3442 * avoid recursively trying to handle the current 3443 * reset. The basic dma_fence_default_wait() installs 3444 * a callback for dma_fence_signal(), which is 3445 * triggered by our nop handler (indirectly, the 3446 * callback enables the signaler thread which is 3447 * woken by the nop_submit_request() advancing the seqno 3448 * and when the seqno passes the fence, the signaler 3449 * then signals the fence waking us up). 3450 */ 3451 if (dma_fence_default_wait(&rq->fence, true, 3452 MAX_SCHEDULE_TIMEOUT) < 0) 3453 return false; 3454 } 3455 i915_retire_requests(i915); 3456 GEM_BUG_ON(i915->gt.active_requests); 3457 3458 /* 3459 * Undo nop_submit_request. We prevent all new i915 requests from 3460 * being queued (by disallowing execbuf whilst wedged) so having 3461 * waited for all active requests above, we know the system is idle 3462 * and do not have to worry about a thread being inside 3463 * engine->submit_request() as we swap over. So unlike installing 3464 * the nop_submit_request on reset, we can do this from normal 3465 * context and do not require stop_machine(). 3466 */ 3467 intel_engines_reset_default_submission(i915); 3468 i915_gem_contexts_lost(i915); 3469 3470 GEM_TRACE("end\n"); 3471 3472 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3473 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3474 3475 return true; 3476 } 3477 3478 static void 3479 i915_gem_retire_work_handler(struct work_struct *work) 3480 { 3481 struct drm_i915_private *dev_priv = 3482 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3483 struct drm_device *dev = &dev_priv->drm; 3484 3485 /* Come back later if the device is busy... */ 3486 if (mutex_trylock(&dev->struct_mutex)) { 3487 i915_retire_requests(dev_priv); 3488 mutex_unlock(&dev->struct_mutex); 3489 } 3490 3491 /* 3492 * Keep the retire handler running until we are finally idle. 3493 * We do not need to do this test under locking as in the worst-case 3494 * we queue the retire worker once too often. 3495 */ 3496 if (READ_ONCE(dev_priv->gt.awake)) 3497 queue_delayed_work(dev_priv->wq, 3498 &dev_priv->gt.retire_work, 3499 round_jiffies_up_relative(HZ)); 3500 } 3501 3502 static void shrink_caches(struct drm_i915_private *i915) 3503 { 3504 /* 3505 * kmem_cache_shrink() discards empty slabs and reorders partially 3506 * filled slabs to prioritise allocating from the mostly full slabs, 3507 * with the aim of reducing fragmentation. 3508 */ 3509 kmem_cache_shrink(i915->priorities); 3510 kmem_cache_shrink(i915->dependencies); 3511 kmem_cache_shrink(i915->requests); 3512 kmem_cache_shrink(i915->luts); 3513 kmem_cache_shrink(i915->vmas); 3514 kmem_cache_shrink(i915->objects); 3515 } 3516 3517 struct sleep_rcu_work { 3518 union { 3519 struct rcu_head rcu; 3520 struct work_struct work; 3521 }; 3522 struct drm_i915_private *i915; 3523 unsigned int epoch; 3524 }; 3525 3526 static inline bool 3527 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3528 { 3529 /* 3530 * There is a small chance that the epoch wrapped since we started 3531 * sleeping. If we assume that epoch is at least a u32, then it will 3532 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3533 */ 3534 return epoch == READ_ONCE(i915->gt.epoch); 3535 } 3536 3537 static void __sleep_work(struct work_struct *work) 3538 { 3539 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3540 struct drm_i915_private *i915 = s->i915; 3541 unsigned int epoch = s->epoch; 3542 3543 kfree(s); 3544 if (same_epoch(i915, epoch)) 3545 shrink_caches(i915); 3546 } 3547 3548 static void __sleep_rcu(struct rcu_head *rcu) 3549 { 3550 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3551 struct drm_i915_private *i915 = s->i915; 3552 3553 if (same_epoch(i915, s->epoch)) { 3554 INIT_WORK(&s->work, __sleep_work); 3555 queue_work(i915->wq, &s->work); 3556 } else { 3557 kfree(s); 3558 } 3559 } 3560 3561 static inline bool 3562 new_requests_since_last_retire(const struct drm_i915_private *i915) 3563 { 3564 return (READ_ONCE(i915->gt.active_requests) || 3565 work_pending(&i915->gt.idle_work.work)); 3566 } 3567 3568 static void 3569 i915_gem_idle_work_handler(struct work_struct *work) 3570 { 3571 struct drm_i915_private *dev_priv = 3572 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3573 unsigned int epoch = I915_EPOCH_INVALID; 3574 bool rearm_hangcheck; 3575 3576 if (!READ_ONCE(dev_priv->gt.awake)) 3577 return; 3578 3579 /* 3580 * Wait for last execlists context complete, but bail out in case a 3581 * new request is submitted. As we don't trust the hardware, we 3582 * continue on if the wait times out. This is necessary to allow 3583 * the machine to suspend even if the hardware dies, and we will 3584 * try to recover in resume (after depriving the hardware of power, 3585 * it may be in a better mmod). 3586 */ 3587 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3588 intel_engines_are_idle(dev_priv), 3589 I915_IDLE_ENGINES_TIMEOUT * 1000, 3590 10, 500); 3591 3592 rearm_hangcheck = 3593 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3594 3595 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3596 /* Currently busy, come back later */ 3597 mod_delayed_work(dev_priv->wq, 3598 &dev_priv->gt.idle_work, 3599 msecs_to_jiffies(50)); 3600 goto out_rearm; 3601 } 3602 3603 /* 3604 * New request retired after this work handler started, extend active 3605 * period until next instance of the work. 3606 */ 3607 if (new_requests_since_last_retire(dev_priv)) 3608 goto out_unlock; 3609 3610 epoch = __i915_gem_park(dev_priv); 3611 3612 rearm_hangcheck = false; 3613 out_unlock: 3614 mutex_unlock(&dev_priv->drm.struct_mutex); 3615 3616 out_rearm: 3617 if (rearm_hangcheck) { 3618 GEM_BUG_ON(!dev_priv->gt.awake); 3619 i915_queue_hangcheck(dev_priv); 3620 } 3621 3622 /* 3623 * When we are idle, it is an opportune time to reap our caches. 3624 * However, we have many objects that utilise RCU and the ordered 3625 * i915->wq that this work is executing on. To try and flush any 3626 * pending frees now we are idle, we first wait for an RCU grace 3627 * period, and then queue a task (that will run last on the wq) to 3628 * shrink and re-optimize the caches. 3629 */ 3630 if (same_epoch(dev_priv, epoch)) { 3631 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3632 if (s) { 3633 s->i915 = dev_priv; 3634 s->epoch = epoch; 3635 call_rcu(&s->rcu, __sleep_rcu); 3636 } 3637 } 3638 } 3639 3640 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3641 { 3642 struct drm_i915_private *i915 = to_i915(gem->dev); 3643 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3644 struct drm_i915_file_private *fpriv = file->driver_priv; 3645 struct i915_lut_handle *lut, *ln; 3646 3647 mutex_lock(&i915->drm.struct_mutex); 3648 3649 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3650 struct i915_gem_context *ctx = lut->ctx; 3651 struct i915_vma *vma; 3652 3653 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3654 if (ctx->file_priv != fpriv) 3655 continue; 3656 3657 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3658 GEM_BUG_ON(vma->obj != obj); 3659 3660 /* We allow the process to have multiple handles to the same 3661 * vma, in the same fd namespace, by virtue of flink/open. 3662 */ 3663 GEM_BUG_ON(!vma->open_count); 3664 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3665 i915_vma_close(vma); 3666 3667 list_del(&lut->obj_link); 3668 list_del(&lut->ctx_link); 3669 3670 kmem_cache_free(i915->luts, lut); 3671 __i915_gem_object_release_unless_active(obj); 3672 } 3673 3674 mutex_unlock(&i915->drm.struct_mutex); 3675 } 3676 3677 static unsigned long to_wait_timeout(s64 timeout_ns) 3678 { 3679 if (timeout_ns < 0) 3680 return MAX_SCHEDULE_TIMEOUT; 3681 3682 if (timeout_ns == 0) 3683 return 0; 3684 3685 return nsecs_to_jiffies_timeout(timeout_ns); 3686 } 3687 3688 /** 3689 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3690 * @dev: drm device pointer 3691 * @data: ioctl data blob 3692 * @file: drm file pointer 3693 * 3694 * Returns 0 if successful, else an error is returned with the remaining time in 3695 * the timeout parameter. 3696 * -ETIME: object is still busy after timeout 3697 * -ERESTARTSYS: signal interrupted the wait 3698 * -ENONENT: object doesn't exist 3699 * Also possible, but rare: 3700 * -EAGAIN: incomplete, restart syscall 3701 * -ENOMEM: damn 3702 * -ENODEV: Internal IRQ fail 3703 * -E?: The add request failed 3704 * 3705 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3706 * non-zero timeout parameter the wait ioctl will wait for the given number of 3707 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3708 * without holding struct_mutex the object may become re-busied before this 3709 * function completes. A similar but shorter * race condition exists in the busy 3710 * ioctl 3711 */ 3712 int 3713 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3714 { 3715 struct drm_i915_gem_wait *args = data; 3716 struct drm_i915_gem_object *obj; 3717 ktime_t start; 3718 long ret; 3719 3720 if (args->flags != 0) 3721 return -EINVAL; 3722 3723 obj = i915_gem_object_lookup(file, args->bo_handle); 3724 if (!obj) 3725 return -ENOENT; 3726 3727 start = ktime_get(); 3728 3729 ret = i915_gem_object_wait(obj, 3730 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3731 to_wait_timeout(args->timeout_ns), 3732 to_rps_client(file)); 3733 3734 if (args->timeout_ns > 0) { 3735 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3736 if (args->timeout_ns < 0) 3737 args->timeout_ns = 0; 3738 3739 /* 3740 * Apparently ktime isn't accurate enough and occasionally has a 3741 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3742 * things up to make the test happy. We allow up to 1 jiffy. 3743 * 3744 * This is a regression from the timespec->ktime conversion. 3745 */ 3746 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3747 args->timeout_ns = 0; 3748 3749 /* Asked to wait beyond the jiffie/scheduler precision? */ 3750 if (ret == -ETIME && args->timeout_ns) 3751 ret = -EAGAIN; 3752 } 3753 3754 i915_gem_object_put(obj); 3755 return ret; 3756 } 3757 3758 static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) 3759 { 3760 return i915_gem_active_wait(&tl->last_request, flags); 3761 } 3762 3763 static int wait_for_engines(struct drm_i915_private *i915) 3764 { 3765 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3766 dev_err(i915->drm.dev, 3767 "Failed to idle engines, declaring wedged!\n"); 3768 GEM_TRACE_DUMP(); 3769 i915_gem_set_wedged(i915); 3770 return -EIO; 3771 } 3772 3773 return 0; 3774 } 3775 3776 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3777 { 3778 /* If the device is asleep, we have no requests outstanding */ 3779 if (!READ_ONCE(i915->gt.awake)) 3780 return 0; 3781 3782 if (flags & I915_WAIT_LOCKED) { 3783 struct i915_timeline *tl; 3784 int err; 3785 3786 lockdep_assert_held(&i915->drm.struct_mutex); 3787 3788 list_for_each_entry(tl, &i915->gt.timelines, link) { 3789 err = wait_for_timeline(tl, flags); 3790 if (err) 3791 return err; 3792 } 3793 i915_retire_requests(i915); 3794 3795 return wait_for_engines(i915); 3796 } else { 3797 struct intel_engine_cs *engine; 3798 enum intel_engine_id id; 3799 int err; 3800 3801 for_each_engine(engine, i915, id) { 3802 err = wait_for_timeline(&engine->timeline, flags); 3803 if (err) 3804 return err; 3805 } 3806 3807 return 0; 3808 } 3809 } 3810 3811 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3812 { 3813 /* 3814 * We manually flush the CPU domain so that we can override and 3815 * force the flush for the display, and perform it asyncrhonously. 3816 */ 3817 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3818 if (obj->cache_dirty) 3819 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3820 obj->write_domain = 0; 3821 } 3822 3823 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3824 { 3825 if (!READ_ONCE(obj->pin_global)) 3826 return; 3827 3828 mutex_lock(&obj->base.dev->struct_mutex); 3829 __i915_gem_object_flush_for_display(obj); 3830 mutex_unlock(&obj->base.dev->struct_mutex); 3831 } 3832 3833 /** 3834 * Moves a single object to the WC read, and possibly write domain. 3835 * @obj: object to act on 3836 * @write: ask for write access or read only 3837 * 3838 * This function returns when the move is complete, including waiting on 3839 * flushes to occur. 3840 */ 3841 int 3842 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3843 { 3844 int ret; 3845 3846 lockdep_assert_held(&obj->base.dev->struct_mutex); 3847 3848 ret = i915_gem_object_wait(obj, 3849 I915_WAIT_INTERRUPTIBLE | 3850 I915_WAIT_LOCKED | 3851 (write ? I915_WAIT_ALL : 0), 3852 MAX_SCHEDULE_TIMEOUT, 3853 NULL); 3854 if (ret) 3855 return ret; 3856 3857 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3858 return 0; 3859 3860 /* Flush and acquire obj->pages so that we are coherent through 3861 * direct access in memory with previous cached writes through 3862 * shmemfs and that our cache domain tracking remains valid. 3863 * For example, if the obj->filp was moved to swap without us 3864 * being notified and releasing the pages, we would mistakenly 3865 * continue to assume that the obj remained out of the CPU cached 3866 * domain. 3867 */ 3868 ret = i915_gem_object_pin_pages(obj); 3869 if (ret) 3870 return ret; 3871 3872 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3873 3874 /* Serialise direct access to this object with the barriers for 3875 * coherent writes from the GPU, by effectively invalidating the 3876 * WC domain upon first access. 3877 */ 3878 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3879 mb(); 3880 3881 /* It should now be out of any other write domains, and we can update 3882 * the domain values for our changes. 3883 */ 3884 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3885 obj->read_domains |= I915_GEM_DOMAIN_WC; 3886 if (write) { 3887 obj->read_domains = I915_GEM_DOMAIN_WC; 3888 obj->write_domain = I915_GEM_DOMAIN_WC; 3889 obj->mm.dirty = true; 3890 } 3891 3892 i915_gem_object_unpin_pages(obj); 3893 return 0; 3894 } 3895 3896 /** 3897 * Moves a single object to the GTT read, and possibly write domain. 3898 * @obj: object to act on 3899 * @write: ask for write access or read only 3900 * 3901 * This function returns when the move is complete, including waiting on 3902 * flushes to occur. 3903 */ 3904 int 3905 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3906 { 3907 int ret; 3908 3909 lockdep_assert_held(&obj->base.dev->struct_mutex); 3910 3911 ret = i915_gem_object_wait(obj, 3912 I915_WAIT_INTERRUPTIBLE | 3913 I915_WAIT_LOCKED | 3914 (write ? I915_WAIT_ALL : 0), 3915 MAX_SCHEDULE_TIMEOUT, 3916 NULL); 3917 if (ret) 3918 return ret; 3919 3920 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3921 return 0; 3922 3923 /* Flush and acquire obj->pages so that we are coherent through 3924 * direct access in memory with previous cached writes through 3925 * shmemfs and that our cache domain tracking remains valid. 3926 * For example, if the obj->filp was moved to swap without us 3927 * being notified and releasing the pages, we would mistakenly 3928 * continue to assume that the obj remained out of the CPU cached 3929 * domain. 3930 */ 3931 ret = i915_gem_object_pin_pages(obj); 3932 if (ret) 3933 return ret; 3934 3935 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3936 3937 /* Serialise direct access to this object with the barriers for 3938 * coherent writes from the GPU, by effectively invalidating the 3939 * GTT domain upon first access. 3940 */ 3941 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3942 mb(); 3943 3944 /* It should now be out of any other write domains, and we can update 3945 * the domain values for our changes. 3946 */ 3947 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3948 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3949 if (write) { 3950 obj->read_domains = I915_GEM_DOMAIN_GTT; 3951 obj->write_domain = I915_GEM_DOMAIN_GTT; 3952 obj->mm.dirty = true; 3953 } 3954 3955 i915_gem_object_unpin_pages(obj); 3956 return 0; 3957 } 3958 3959 /** 3960 * Changes the cache-level of an object across all VMA. 3961 * @obj: object to act on 3962 * @cache_level: new cache level to set for the object 3963 * 3964 * After this function returns, the object will be in the new cache-level 3965 * across all GTT and the contents of the backing storage will be coherent, 3966 * with respect to the new cache-level. In order to keep the backing storage 3967 * coherent for all users, we only allow a single cache level to be set 3968 * globally on the object and prevent it from being changed whilst the 3969 * hardware is reading from the object. That is if the object is currently 3970 * on the scanout it will be set to uncached (or equivalent display 3971 * cache coherency) and all non-MOCS GPU access will also be uncached so 3972 * that all direct access to the scanout remains coherent. 3973 */ 3974 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3975 enum i915_cache_level cache_level) 3976 { 3977 struct i915_vma *vma; 3978 int ret; 3979 3980 lockdep_assert_held(&obj->base.dev->struct_mutex); 3981 3982 if (obj->cache_level == cache_level) 3983 return 0; 3984 3985 /* Inspect the list of currently bound VMA and unbind any that would 3986 * be invalid given the new cache-level. This is principally to 3987 * catch the issue of the CS prefetch crossing page boundaries and 3988 * reading an invalid PTE on older architectures. 3989 */ 3990 restart: 3991 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3992 if (!drm_mm_node_allocated(&vma->node)) 3993 continue; 3994 3995 if (i915_vma_is_pinned(vma)) { 3996 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3997 return -EBUSY; 3998 } 3999 4000 if (!i915_vma_is_closed(vma) && 4001 i915_gem_valid_gtt_space(vma, cache_level)) 4002 continue; 4003 4004 ret = i915_vma_unbind(vma); 4005 if (ret) 4006 return ret; 4007 4008 /* As unbinding may affect other elements in the 4009 * obj->vma_list (due to side-effects from retiring 4010 * an active vma), play safe and restart the iterator. 4011 */ 4012 goto restart; 4013 } 4014 4015 /* We can reuse the existing drm_mm nodes but need to change the 4016 * cache-level on the PTE. We could simply unbind them all and 4017 * rebind with the correct cache-level on next use. However since 4018 * we already have a valid slot, dma mapping, pages etc, we may as 4019 * rewrite the PTE in the belief that doing so tramples upon less 4020 * state and so involves less work. 4021 */ 4022 if (obj->bind_count) { 4023 /* Before we change the PTE, the GPU must not be accessing it. 4024 * If we wait upon the object, we know that all the bound 4025 * VMA are no longer active. 4026 */ 4027 ret = i915_gem_object_wait(obj, 4028 I915_WAIT_INTERRUPTIBLE | 4029 I915_WAIT_LOCKED | 4030 I915_WAIT_ALL, 4031 MAX_SCHEDULE_TIMEOUT, 4032 NULL); 4033 if (ret) 4034 return ret; 4035 4036 if (!HAS_LLC(to_i915(obj->base.dev)) && 4037 cache_level != I915_CACHE_NONE) { 4038 /* Access to snoopable pages through the GTT is 4039 * incoherent and on some machines causes a hard 4040 * lockup. Relinquish the CPU mmaping to force 4041 * userspace to refault in the pages and we can 4042 * then double check if the GTT mapping is still 4043 * valid for that pointer access. 4044 */ 4045 i915_gem_release_mmap(obj); 4046 4047 /* As we no longer need a fence for GTT access, 4048 * we can relinquish it now (and so prevent having 4049 * to steal a fence from someone else on the next 4050 * fence request). Note GPU activity would have 4051 * dropped the fence as all snoopable access is 4052 * supposed to be linear. 4053 */ 4054 for_each_ggtt_vma(vma, obj) { 4055 ret = i915_vma_put_fence(vma); 4056 if (ret) 4057 return ret; 4058 } 4059 } else { 4060 /* We either have incoherent backing store and 4061 * so no GTT access or the architecture is fully 4062 * coherent. In such cases, existing GTT mmaps 4063 * ignore the cache bit in the PTE and we can 4064 * rewrite it without confusing the GPU or having 4065 * to force userspace to fault back in its mmaps. 4066 */ 4067 } 4068 4069 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4070 if (!drm_mm_node_allocated(&vma->node)) 4071 continue; 4072 4073 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4074 if (ret) 4075 return ret; 4076 } 4077 } 4078 4079 list_for_each_entry(vma, &obj->vma_list, obj_link) 4080 vma->node.color = cache_level; 4081 i915_gem_object_set_cache_coherency(obj, cache_level); 4082 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4083 4084 return 0; 4085 } 4086 4087 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4088 struct drm_file *file) 4089 { 4090 struct drm_i915_gem_caching *args = data; 4091 struct drm_i915_gem_object *obj; 4092 int err = 0; 4093 4094 rcu_read_lock(); 4095 obj = i915_gem_object_lookup_rcu(file, args->handle); 4096 if (!obj) { 4097 err = -ENOENT; 4098 goto out; 4099 } 4100 4101 switch (obj->cache_level) { 4102 case I915_CACHE_LLC: 4103 case I915_CACHE_L3_LLC: 4104 args->caching = I915_CACHING_CACHED; 4105 break; 4106 4107 case I915_CACHE_WT: 4108 args->caching = I915_CACHING_DISPLAY; 4109 break; 4110 4111 default: 4112 args->caching = I915_CACHING_NONE; 4113 break; 4114 } 4115 out: 4116 rcu_read_unlock(); 4117 return err; 4118 } 4119 4120 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4121 struct drm_file *file) 4122 { 4123 struct drm_i915_private *i915 = to_i915(dev); 4124 struct drm_i915_gem_caching *args = data; 4125 struct drm_i915_gem_object *obj; 4126 enum i915_cache_level level; 4127 int ret = 0; 4128 4129 switch (args->caching) { 4130 case I915_CACHING_NONE: 4131 level = I915_CACHE_NONE; 4132 break; 4133 case I915_CACHING_CACHED: 4134 /* 4135 * Due to a HW issue on BXT A stepping, GPU stores via a 4136 * snooped mapping may leave stale data in a corresponding CPU 4137 * cacheline, whereas normally such cachelines would get 4138 * invalidated. 4139 */ 4140 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4141 return -ENODEV; 4142 4143 level = I915_CACHE_LLC; 4144 break; 4145 case I915_CACHING_DISPLAY: 4146 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4147 break; 4148 default: 4149 return -EINVAL; 4150 } 4151 4152 obj = i915_gem_object_lookup(file, args->handle); 4153 if (!obj) 4154 return -ENOENT; 4155 4156 /* 4157 * The caching mode of proxy object is handled by its generator, and 4158 * not allowed to be changed by userspace. 4159 */ 4160 if (i915_gem_object_is_proxy(obj)) { 4161 ret = -ENXIO; 4162 goto out; 4163 } 4164 4165 if (obj->cache_level == level) 4166 goto out; 4167 4168 ret = i915_gem_object_wait(obj, 4169 I915_WAIT_INTERRUPTIBLE, 4170 MAX_SCHEDULE_TIMEOUT, 4171 to_rps_client(file)); 4172 if (ret) 4173 goto out; 4174 4175 ret = i915_mutex_lock_interruptible(dev); 4176 if (ret) 4177 goto out; 4178 4179 ret = i915_gem_object_set_cache_level(obj, level); 4180 mutex_unlock(&dev->struct_mutex); 4181 4182 out: 4183 i915_gem_object_put(obj); 4184 return ret; 4185 } 4186 4187 /* 4188 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4189 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4190 * (for pageflips). We only flush the caches while preparing the buffer for 4191 * display, the callers are responsible for frontbuffer flush. 4192 */ 4193 struct i915_vma * 4194 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4195 u32 alignment, 4196 const struct i915_ggtt_view *view, 4197 unsigned int flags) 4198 { 4199 struct i915_vma *vma; 4200 int ret; 4201 4202 lockdep_assert_held(&obj->base.dev->struct_mutex); 4203 4204 /* Mark the global pin early so that we account for the 4205 * display coherency whilst setting up the cache domains. 4206 */ 4207 obj->pin_global++; 4208 4209 /* The display engine is not coherent with the LLC cache on gen6. As 4210 * a result, we make sure that the pinning that is about to occur is 4211 * done with uncached PTEs. This is lowest common denominator for all 4212 * chipsets. 4213 * 4214 * However for gen6+, we could do better by using the GFDT bit instead 4215 * of uncaching, which would allow us to flush all the LLC-cached data 4216 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4217 */ 4218 ret = i915_gem_object_set_cache_level(obj, 4219 HAS_WT(to_i915(obj->base.dev)) ? 4220 I915_CACHE_WT : I915_CACHE_NONE); 4221 if (ret) { 4222 vma = ERR_PTR(ret); 4223 goto err_unpin_global; 4224 } 4225 4226 /* As the user may map the buffer once pinned in the display plane 4227 * (e.g. libkms for the bootup splash), we have to ensure that we 4228 * always use map_and_fenceable for all scanout buffers. However, 4229 * it may simply be too big to fit into mappable, in which case 4230 * put it anyway and hope that userspace can cope (but always first 4231 * try to preserve the existing ABI). 4232 */ 4233 vma = ERR_PTR(-ENOSPC); 4234 if ((flags & PIN_MAPPABLE) == 0 && 4235 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4236 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4237 flags | 4238 PIN_MAPPABLE | 4239 PIN_NONBLOCK); 4240 if (IS_ERR(vma)) 4241 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4242 if (IS_ERR(vma)) 4243 goto err_unpin_global; 4244 4245 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4246 4247 __i915_gem_object_flush_for_display(obj); 4248 4249 /* It should now be out of any other write domains, and we can update 4250 * the domain values for our changes. 4251 */ 4252 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4253 4254 return vma; 4255 4256 err_unpin_global: 4257 obj->pin_global--; 4258 return vma; 4259 } 4260 4261 void 4262 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4263 { 4264 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4265 4266 if (WARN_ON(vma->obj->pin_global == 0)) 4267 return; 4268 4269 if (--vma->obj->pin_global == 0) 4270 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4271 4272 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4273 i915_gem_object_bump_inactive_ggtt(vma->obj); 4274 4275 i915_vma_unpin(vma); 4276 } 4277 4278 /** 4279 * Moves a single object to the CPU read, and possibly write domain. 4280 * @obj: object to act on 4281 * @write: requesting write or read-only access 4282 * 4283 * This function returns when the move is complete, including waiting on 4284 * flushes to occur. 4285 */ 4286 int 4287 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4288 { 4289 int ret; 4290 4291 lockdep_assert_held(&obj->base.dev->struct_mutex); 4292 4293 ret = i915_gem_object_wait(obj, 4294 I915_WAIT_INTERRUPTIBLE | 4295 I915_WAIT_LOCKED | 4296 (write ? I915_WAIT_ALL : 0), 4297 MAX_SCHEDULE_TIMEOUT, 4298 NULL); 4299 if (ret) 4300 return ret; 4301 4302 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4303 4304 /* Flush the CPU cache if it's still invalid. */ 4305 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4306 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4307 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4308 } 4309 4310 /* It should now be out of any other write domains, and we can update 4311 * the domain values for our changes. 4312 */ 4313 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4314 4315 /* If we're writing through the CPU, then the GPU read domains will 4316 * need to be invalidated at next use. 4317 */ 4318 if (write) 4319 __start_cpu_write(obj); 4320 4321 return 0; 4322 } 4323 4324 /* Throttle our rendering by waiting until the ring has completed our requests 4325 * emitted over 20 msec ago. 4326 * 4327 * Note that if we were to use the current jiffies each time around the loop, 4328 * we wouldn't escape the function with any frames outstanding if the time to 4329 * render a frame was over 20ms. 4330 * 4331 * This should get us reasonable parallelism between CPU and GPU but also 4332 * relatively low latency when blocking on a particular request to finish. 4333 */ 4334 static int 4335 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4336 { 4337 struct drm_i915_private *dev_priv = to_i915(dev); 4338 struct drm_i915_file_private *file_priv = file->driver_priv; 4339 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4340 struct i915_request *request, *target = NULL; 4341 long ret; 4342 4343 /* ABI: return -EIO if already wedged */ 4344 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4345 return -EIO; 4346 4347 spin_lock(&file_priv->mm.lock); 4348 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4349 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4350 break; 4351 4352 if (target) { 4353 list_del(&target->client_link); 4354 target->file_priv = NULL; 4355 } 4356 4357 target = request; 4358 } 4359 if (target) 4360 i915_request_get(target); 4361 spin_unlock(&file_priv->mm.lock); 4362 4363 if (target == NULL) 4364 return 0; 4365 4366 ret = i915_request_wait(target, 4367 I915_WAIT_INTERRUPTIBLE, 4368 MAX_SCHEDULE_TIMEOUT); 4369 i915_request_put(target); 4370 4371 return ret < 0 ? ret : 0; 4372 } 4373 4374 struct i915_vma * 4375 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4376 const struct i915_ggtt_view *view, 4377 u64 size, 4378 u64 alignment, 4379 u64 flags) 4380 { 4381 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4382 struct i915_address_space *vm = &dev_priv->ggtt.base; 4383 struct i915_vma *vma; 4384 int ret; 4385 4386 lockdep_assert_held(&obj->base.dev->struct_mutex); 4387 4388 if (flags & PIN_MAPPABLE && 4389 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4390 /* If the required space is larger than the available 4391 * aperture, we will not able to find a slot for the 4392 * object and unbinding the object now will be in 4393 * vain. Worse, doing so may cause us to ping-pong 4394 * the object in and out of the Global GTT and 4395 * waste a lot of cycles under the mutex. 4396 */ 4397 if (obj->base.size > dev_priv->ggtt.mappable_end) 4398 return ERR_PTR(-E2BIG); 4399 4400 /* If NONBLOCK is set the caller is optimistically 4401 * trying to cache the full object within the mappable 4402 * aperture, and *must* have a fallback in place for 4403 * situations where we cannot bind the object. We 4404 * can be a little more lax here and use the fallback 4405 * more often to avoid costly migrations of ourselves 4406 * and other objects within the aperture. 4407 * 4408 * Half-the-aperture is used as a simple heuristic. 4409 * More interesting would to do search for a free 4410 * block prior to making the commitment to unbind. 4411 * That caters for the self-harm case, and with a 4412 * little more heuristics (e.g. NOFAULT, NOEVICT) 4413 * we could try to minimise harm to others. 4414 */ 4415 if (flags & PIN_NONBLOCK && 4416 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4417 return ERR_PTR(-ENOSPC); 4418 } 4419 4420 vma = i915_vma_instance(obj, vm, view); 4421 if (unlikely(IS_ERR(vma))) 4422 return vma; 4423 4424 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4425 if (flags & PIN_NONBLOCK) { 4426 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4427 return ERR_PTR(-ENOSPC); 4428 4429 if (flags & PIN_MAPPABLE && 4430 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4431 return ERR_PTR(-ENOSPC); 4432 } 4433 4434 WARN(i915_vma_is_pinned(vma), 4435 "bo is already pinned in ggtt with incorrect alignment:" 4436 " offset=%08x, req.alignment=%llx," 4437 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4438 i915_ggtt_offset(vma), alignment, 4439 !!(flags & PIN_MAPPABLE), 4440 i915_vma_is_map_and_fenceable(vma)); 4441 ret = i915_vma_unbind(vma); 4442 if (ret) 4443 return ERR_PTR(ret); 4444 } 4445 4446 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4447 if (ret) 4448 return ERR_PTR(ret); 4449 4450 return vma; 4451 } 4452 4453 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4454 { 4455 /* Note that we could alias engines in the execbuf API, but 4456 * that would be very unwise as it prevents userspace from 4457 * fine control over engine selection. Ahem. 4458 * 4459 * This should be something like EXEC_MAX_ENGINE instead of 4460 * I915_NUM_ENGINES. 4461 */ 4462 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4463 return 0x10000 << id; 4464 } 4465 4466 static __always_inline unsigned int __busy_write_id(unsigned int id) 4467 { 4468 /* The uABI guarantees an active writer is also amongst the read 4469 * engines. This would be true if we accessed the activity tracking 4470 * under the lock, but as we perform the lookup of the object and 4471 * its activity locklessly we can not guarantee that the last_write 4472 * being active implies that we have set the same engine flag from 4473 * last_read - hence we always set both read and write busy for 4474 * last_write. 4475 */ 4476 return id | __busy_read_flag(id); 4477 } 4478 4479 static __always_inline unsigned int 4480 __busy_set_if_active(const struct dma_fence *fence, 4481 unsigned int (*flag)(unsigned int id)) 4482 { 4483 struct i915_request *rq; 4484 4485 /* We have to check the current hw status of the fence as the uABI 4486 * guarantees forward progress. We could rely on the idle worker 4487 * to eventually flush us, but to minimise latency just ask the 4488 * hardware. 4489 * 4490 * Note we only report on the status of native fences. 4491 */ 4492 if (!dma_fence_is_i915(fence)) 4493 return 0; 4494 4495 /* opencode to_request() in order to avoid const warnings */ 4496 rq = container_of(fence, struct i915_request, fence); 4497 if (i915_request_completed(rq)) 4498 return 0; 4499 4500 return flag(rq->engine->uabi_id); 4501 } 4502 4503 static __always_inline unsigned int 4504 busy_check_reader(const struct dma_fence *fence) 4505 { 4506 return __busy_set_if_active(fence, __busy_read_flag); 4507 } 4508 4509 static __always_inline unsigned int 4510 busy_check_writer(const struct dma_fence *fence) 4511 { 4512 if (!fence) 4513 return 0; 4514 4515 return __busy_set_if_active(fence, __busy_write_id); 4516 } 4517 4518 int 4519 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4520 struct drm_file *file) 4521 { 4522 struct drm_i915_gem_busy *args = data; 4523 struct drm_i915_gem_object *obj; 4524 struct reservation_object_list *list; 4525 unsigned int seq; 4526 int err; 4527 4528 err = -ENOENT; 4529 rcu_read_lock(); 4530 obj = i915_gem_object_lookup_rcu(file, args->handle); 4531 if (!obj) 4532 goto out; 4533 4534 /* A discrepancy here is that we do not report the status of 4535 * non-i915 fences, i.e. even though we may report the object as idle, 4536 * a call to set-domain may still stall waiting for foreign rendering. 4537 * This also means that wait-ioctl may report an object as busy, 4538 * where busy-ioctl considers it idle. 4539 * 4540 * We trade the ability to warn of foreign fences to report on which 4541 * i915 engines are active for the object. 4542 * 4543 * Alternatively, we can trade that extra information on read/write 4544 * activity with 4545 * args->busy = 4546 * !reservation_object_test_signaled_rcu(obj->resv, true); 4547 * to report the overall busyness. This is what the wait-ioctl does. 4548 * 4549 */ 4550 retry: 4551 seq = raw_read_seqcount(&obj->resv->seq); 4552 4553 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4554 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4555 4556 /* Translate shared fences to READ set of engines */ 4557 list = rcu_dereference(obj->resv->fence); 4558 if (list) { 4559 unsigned int shared_count = list->shared_count, i; 4560 4561 for (i = 0; i < shared_count; ++i) { 4562 struct dma_fence *fence = 4563 rcu_dereference(list->shared[i]); 4564 4565 args->busy |= busy_check_reader(fence); 4566 } 4567 } 4568 4569 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4570 goto retry; 4571 4572 err = 0; 4573 out: 4574 rcu_read_unlock(); 4575 return err; 4576 } 4577 4578 int 4579 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4580 struct drm_file *file_priv) 4581 { 4582 return i915_gem_ring_throttle(dev, file_priv); 4583 } 4584 4585 int 4586 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4587 struct drm_file *file_priv) 4588 { 4589 struct drm_i915_private *dev_priv = to_i915(dev); 4590 struct drm_i915_gem_madvise *args = data; 4591 struct drm_i915_gem_object *obj; 4592 int err; 4593 4594 switch (args->madv) { 4595 case I915_MADV_DONTNEED: 4596 case I915_MADV_WILLNEED: 4597 break; 4598 default: 4599 return -EINVAL; 4600 } 4601 4602 obj = i915_gem_object_lookup(file_priv, args->handle); 4603 if (!obj) 4604 return -ENOENT; 4605 4606 err = mutex_lock_interruptible(&obj->mm.lock); 4607 if (err) 4608 goto out; 4609 4610 if (i915_gem_object_has_pages(obj) && 4611 i915_gem_object_is_tiled(obj) && 4612 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4613 if (obj->mm.madv == I915_MADV_WILLNEED) { 4614 GEM_BUG_ON(!obj->mm.quirked); 4615 __i915_gem_object_unpin_pages(obj); 4616 obj->mm.quirked = false; 4617 } 4618 if (args->madv == I915_MADV_WILLNEED) { 4619 GEM_BUG_ON(obj->mm.quirked); 4620 __i915_gem_object_pin_pages(obj); 4621 obj->mm.quirked = true; 4622 } 4623 } 4624 4625 if (obj->mm.madv != __I915_MADV_PURGED) 4626 obj->mm.madv = args->madv; 4627 4628 /* if the object is no longer attached, discard its backing storage */ 4629 if (obj->mm.madv == I915_MADV_DONTNEED && 4630 !i915_gem_object_has_pages(obj)) 4631 i915_gem_object_truncate(obj); 4632 4633 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4634 mutex_unlock(&obj->mm.lock); 4635 4636 out: 4637 i915_gem_object_put(obj); 4638 return err; 4639 } 4640 4641 static void 4642 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4643 { 4644 struct drm_i915_gem_object *obj = 4645 container_of(active, typeof(*obj), frontbuffer_write); 4646 4647 intel_fb_obj_flush(obj, ORIGIN_CS); 4648 } 4649 4650 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4651 const struct drm_i915_gem_object_ops *ops) 4652 { 4653 mutex_init(&obj->mm.lock); 4654 4655 INIT_LIST_HEAD(&obj->vma_list); 4656 INIT_LIST_HEAD(&obj->lut_list); 4657 INIT_LIST_HEAD(&obj->batch_pool_link); 4658 4659 obj->ops = ops; 4660 4661 reservation_object_init(&obj->__builtin_resv); 4662 obj->resv = &obj->__builtin_resv; 4663 4664 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4665 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4666 4667 obj->mm.madv = I915_MADV_WILLNEED; 4668 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4669 mutex_init(&obj->mm.get_page.lock); 4670 4671 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4672 } 4673 4674 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4675 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4676 I915_GEM_OBJECT_IS_SHRINKABLE, 4677 4678 .get_pages = i915_gem_object_get_pages_gtt, 4679 .put_pages = i915_gem_object_put_pages_gtt, 4680 4681 .pwrite = i915_gem_object_pwrite_gtt, 4682 }; 4683 4684 static int i915_gem_object_create_shmem(struct drm_device *dev, 4685 struct drm_gem_object *obj, 4686 size_t size) 4687 { 4688 struct drm_i915_private *i915 = to_i915(dev); 4689 unsigned long flags = VM_NORESERVE; 4690 struct file *filp; 4691 4692 drm_gem_private_object_init(dev, obj, size); 4693 4694 if (i915->mm.gemfs) 4695 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4696 flags); 4697 else 4698 filp = shmem_file_setup("i915", size, flags); 4699 4700 if (IS_ERR(filp)) 4701 return PTR_ERR(filp); 4702 4703 obj->filp = filp; 4704 4705 return 0; 4706 } 4707 4708 struct drm_i915_gem_object * 4709 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4710 { 4711 struct drm_i915_gem_object *obj; 4712 struct address_space *mapping; 4713 unsigned int cache_level; 4714 gfp_t mask; 4715 int ret; 4716 4717 /* There is a prevalence of the assumption that we fit the object's 4718 * page count inside a 32bit _signed_ variable. Let's document this and 4719 * catch if we ever need to fix it. In the meantime, if you do spot 4720 * such a local variable, please consider fixing! 4721 */ 4722 if (size >> PAGE_SHIFT > INT_MAX) 4723 return ERR_PTR(-E2BIG); 4724 4725 if (overflows_type(size, obj->base.size)) 4726 return ERR_PTR(-E2BIG); 4727 4728 obj = i915_gem_object_alloc(dev_priv); 4729 if (obj == NULL) 4730 return ERR_PTR(-ENOMEM); 4731 4732 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4733 if (ret) 4734 goto fail; 4735 4736 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4737 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4738 /* 965gm cannot relocate objects above 4GiB. */ 4739 mask &= ~__GFP_HIGHMEM; 4740 mask |= __GFP_DMA32; 4741 } 4742 4743 mapping = obj->base.filp->f_mapping; 4744 mapping_set_gfp_mask(mapping, mask); 4745 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4746 4747 i915_gem_object_init(obj, &i915_gem_object_ops); 4748 4749 obj->write_domain = I915_GEM_DOMAIN_CPU; 4750 obj->read_domains = I915_GEM_DOMAIN_CPU; 4751 4752 if (HAS_LLC(dev_priv)) 4753 /* On some devices, we can have the GPU use the LLC (the CPU 4754 * cache) for about a 10% performance improvement 4755 * compared to uncached. Graphics requests other than 4756 * display scanout are coherent with the CPU in 4757 * accessing this cache. This means in this mode we 4758 * don't need to clflush on the CPU side, and on the 4759 * GPU side we only need to flush internal caches to 4760 * get data visible to the CPU. 4761 * 4762 * However, we maintain the display planes as UC, and so 4763 * need to rebind when first used as such. 4764 */ 4765 cache_level = I915_CACHE_LLC; 4766 else 4767 cache_level = I915_CACHE_NONE; 4768 4769 i915_gem_object_set_cache_coherency(obj, cache_level); 4770 4771 trace_i915_gem_object_create(obj); 4772 4773 return obj; 4774 4775 fail: 4776 i915_gem_object_free(obj); 4777 return ERR_PTR(ret); 4778 } 4779 4780 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4781 { 4782 /* If we are the last user of the backing storage (be it shmemfs 4783 * pages or stolen etc), we know that the pages are going to be 4784 * immediately released. In this case, we can then skip copying 4785 * back the contents from the GPU. 4786 */ 4787 4788 if (obj->mm.madv != I915_MADV_WILLNEED) 4789 return false; 4790 4791 if (obj->base.filp == NULL) 4792 return true; 4793 4794 /* At first glance, this looks racy, but then again so would be 4795 * userspace racing mmap against close. However, the first external 4796 * reference to the filp can only be obtained through the 4797 * i915_gem_mmap_ioctl() which safeguards us against the user 4798 * acquiring such a reference whilst we are in the middle of 4799 * freeing the object. 4800 */ 4801 return atomic_long_read(&obj->base.filp->f_count) == 1; 4802 } 4803 4804 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4805 struct llist_node *freed) 4806 { 4807 struct drm_i915_gem_object *obj, *on; 4808 4809 intel_runtime_pm_get(i915); 4810 llist_for_each_entry_safe(obj, on, freed, freed) { 4811 struct i915_vma *vma, *vn; 4812 4813 trace_i915_gem_object_destroy(obj); 4814 4815 mutex_lock(&i915->drm.struct_mutex); 4816 4817 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4818 list_for_each_entry_safe(vma, vn, 4819 &obj->vma_list, obj_link) { 4820 GEM_BUG_ON(i915_vma_is_active(vma)); 4821 vma->flags &= ~I915_VMA_PIN_MASK; 4822 i915_vma_destroy(vma); 4823 } 4824 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4825 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4826 4827 /* This serializes freeing with the shrinker. Since the free 4828 * is delayed, first by RCU then by the workqueue, we want the 4829 * shrinker to be able to free pages of unreferenced objects, 4830 * or else we may oom whilst there are plenty of deferred 4831 * freed objects. 4832 */ 4833 if (i915_gem_object_has_pages(obj)) { 4834 spin_lock(&i915->mm.obj_lock); 4835 list_del_init(&obj->mm.link); 4836 spin_unlock(&i915->mm.obj_lock); 4837 } 4838 4839 mutex_unlock(&i915->drm.struct_mutex); 4840 4841 GEM_BUG_ON(obj->bind_count); 4842 GEM_BUG_ON(obj->userfault_count); 4843 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4844 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4845 4846 if (obj->ops->release) 4847 obj->ops->release(obj); 4848 4849 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4850 atomic_set(&obj->mm.pages_pin_count, 0); 4851 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4852 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4853 4854 if (obj->base.import_attach) 4855 drm_prime_gem_destroy(&obj->base, NULL); 4856 4857 reservation_object_fini(&obj->__builtin_resv); 4858 drm_gem_object_release(&obj->base); 4859 i915_gem_info_remove_obj(i915, obj->base.size); 4860 4861 kfree(obj->bit_17); 4862 i915_gem_object_free(obj); 4863 4864 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4865 atomic_dec(&i915->mm.free_count); 4866 4867 if (on) 4868 cond_resched(); 4869 } 4870 intel_runtime_pm_put(i915); 4871 } 4872 4873 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4874 { 4875 struct llist_node *freed; 4876 4877 /* Free the oldest, most stale object to keep the free_list short */ 4878 freed = NULL; 4879 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4880 /* Only one consumer of llist_del_first() allowed */ 4881 spin_lock(&i915->mm.free_lock); 4882 freed = llist_del_first(&i915->mm.free_list); 4883 spin_unlock(&i915->mm.free_lock); 4884 } 4885 if (unlikely(freed)) { 4886 freed->next = NULL; 4887 __i915_gem_free_objects(i915, freed); 4888 } 4889 } 4890 4891 static void __i915_gem_free_work(struct work_struct *work) 4892 { 4893 struct drm_i915_private *i915 = 4894 container_of(work, struct drm_i915_private, mm.free_work); 4895 struct llist_node *freed; 4896 4897 /* 4898 * All file-owned VMA should have been released by this point through 4899 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4900 * However, the object may also be bound into the global GTT (e.g. 4901 * older GPUs without per-process support, or for direct access through 4902 * the GTT either for the user or for scanout). Those VMA still need to 4903 * unbound now. 4904 */ 4905 4906 spin_lock(&i915->mm.free_lock); 4907 while ((freed = llist_del_all(&i915->mm.free_list))) { 4908 spin_unlock(&i915->mm.free_lock); 4909 4910 __i915_gem_free_objects(i915, freed); 4911 if (need_resched()) 4912 return; 4913 4914 spin_lock(&i915->mm.free_lock); 4915 } 4916 spin_unlock(&i915->mm.free_lock); 4917 } 4918 4919 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4920 { 4921 struct drm_i915_gem_object *obj = 4922 container_of(head, typeof(*obj), rcu); 4923 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4924 4925 /* 4926 * Since we require blocking on struct_mutex to unbind the freed 4927 * object from the GPU before releasing resources back to the 4928 * system, we can not do that directly from the RCU callback (which may 4929 * be a softirq context), but must instead then defer that work onto a 4930 * kthread. We use the RCU callback rather than move the freed object 4931 * directly onto the work queue so that we can mix between using the 4932 * worker and performing frees directly from subsequent allocations for 4933 * crude but effective memory throttling. 4934 */ 4935 if (llist_add(&obj->freed, &i915->mm.free_list)) 4936 queue_work(i915->wq, &i915->mm.free_work); 4937 } 4938 4939 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4940 { 4941 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4942 4943 if (obj->mm.quirked) 4944 __i915_gem_object_unpin_pages(obj); 4945 4946 if (discard_backing_storage(obj)) 4947 obj->mm.madv = I915_MADV_DONTNEED; 4948 4949 /* 4950 * Before we free the object, make sure any pure RCU-only 4951 * read-side critical sections are complete, e.g. 4952 * i915_gem_busy_ioctl(). For the corresponding synchronized 4953 * lookup see i915_gem_object_lookup_rcu(). 4954 */ 4955 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4956 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4957 } 4958 4959 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4960 { 4961 lockdep_assert_held(&obj->base.dev->struct_mutex); 4962 4963 if (!i915_gem_object_has_active_reference(obj) && 4964 i915_gem_object_is_active(obj)) 4965 i915_gem_object_set_active_reference(obj); 4966 else 4967 i915_gem_object_put(obj); 4968 } 4969 4970 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 4971 { 4972 struct i915_gem_context *kernel_context = i915->kernel_context; 4973 struct intel_engine_cs *engine; 4974 enum intel_engine_id id; 4975 4976 for_each_engine(engine, i915, id) { 4977 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 4978 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4979 } 4980 } 4981 4982 void i915_gem_sanitize(struct drm_i915_private *i915) 4983 { 4984 if (i915_terminally_wedged(&i915->gpu_error)) { 4985 mutex_lock(&i915->drm.struct_mutex); 4986 i915_gem_unset_wedged(i915); 4987 mutex_unlock(&i915->drm.struct_mutex); 4988 } 4989 4990 /* 4991 * If we inherit context state from the BIOS or earlier occupants 4992 * of the GPU, the GPU may be in an inconsistent state when we 4993 * try to take over. The only way to remove the earlier state 4994 * is by resetting. However, resetting on earlier gen is tricky as 4995 * it may impact the display and we are uncertain about the stability 4996 * of the reset, so this could be applied to even earlier gen. 4997 */ 4998 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 4999 WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5000 } 5001 5002 int i915_gem_suspend(struct drm_i915_private *dev_priv) 5003 { 5004 struct drm_device *dev = &dev_priv->drm; 5005 int ret; 5006 5007 intel_runtime_pm_get(dev_priv); 5008 intel_suspend_gt_powersave(dev_priv); 5009 5010 mutex_lock(&dev->struct_mutex); 5011 5012 /* We have to flush all the executing contexts to main memory so 5013 * that they can saved in the hibernation image. To ensure the last 5014 * context image is coherent, we have to switch away from it. That 5015 * leaves the dev_priv->kernel_context still active when 5016 * we actually suspend, and its image in memory may not match the GPU 5017 * state. Fortunately, the kernel_context is disposable and we do 5018 * not rely on its state. 5019 */ 5020 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5021 ret = i915_gem_switch_to_kernel_context(dev_priv); 5022 if (ret) 5023 goto err_unlock; 5024 5025 ret = i915_gem_wait_for_idle(dev_priv, 5026 I915_WAIT_INTERRUPTIBLE | 5027 I915_WAIT_LOCKED); 5028 if (ret && ret != -EIO) 5029 goto err_unlock; 5030 5031 assert_kernel_context_is_current(dev_priv); 5032 } 5033 i915_gem_contexts_lost(dev_priv); 5034 mutex_unlock(&dev->struct_mutex); 5035 5036 intel_uc_suspend(dev_priv); 5037 5038 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 5039 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 5040 5041 /* As the idle_work is rearming if it detects a race, play safe and 5042 * repeat the flush until it is definitely idle. 5043 */ 5044 drain_delayed_work(&dev_priv->gt.idle_work); 5045 5046 /* Assert that we sucessfully flushed all the work and 5047 * reset the GPU back to its idle, low power state. 5048 */ 5049 WARN_ON(dev_priv->gt.awake); 5050 if (WARN_ON(!intel_engines_are_idle(dev_priv))) 5051 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ 5052 5053 /* 5054 * Neither the BIOS, ourselves or any other kernel 5055 * expects the system to be in execlists mode on startup, 5056 * so we need to reset the GPU back to legacy mode. And the only 5057 * known way to disable logical contexts is through a GPU reset. 5058 * 5059 * So in order to leave the system in a known default configuration, 5060 * always reset the GPU upon unload and suspend. Afterwards we then 5061 * clean up the GEM state tracking, flushing off the requests and 5062 * leaving the system in a known idle state. 5063 * 5064 * Note that is of the upmost importance that the GPU is idle and 5065 * all stray writes are flushed *before* we dismantle the backing 5066 * storage for the pinned objects. 5067 * 5068 * However, since we are uncertain that resetting the GPU on older 5069 * machines is a good idea, we don't - just in case it leaves the 5070 * machine in an unusable condition. 5071 */ 5072 intel_uc_sanitize(dev_priv); 5073 i915_gem_sanitize(dev_priv); 5074 5075 intel_runtime_pm_put(dev_priv); 5076 return 0; 5077 5078 err_unlock: 5079 mutex_unlock(&dev->struct_mutex); 5080 intel_runtime_pm_put(dev_priv); 5081 return ret; 5082 } 5083 5084 void i915_gem_resume(struct drm_i915_private *i915) 5085 { 5086 WARN_ON(i915->gt.awake); 5087 5088 mutex_lock(&i915->drm.struct_mutex); 5089 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5090 5091 i915_gem_restore_gtt_mappings(i915); 5092 i915_gem_restore_fences(i915); 5093 5094 /* 5095 * As we didn't flush the kernel context before suspend, we cannot 5096 * guarantee that the context image is complete. So let's just reset 5097 * it and start again. 5098 */ 5099 i915->gt.resume(i915); 5100 5101 if (i915_gem_init_hw(i915)) 5102 goto err_wedged; 5103 5104 intel_uc_resume(i915); 5105 5106 /* Always reload a context for powersaving. */ 5107 if (i915_gem_switch_to_kernel_context(i915)) 5108 goto err_wedged; 5109 5110 out_unlock: 5111 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5112 mutex_unlock(&i915->drm.struct_mutex); 5113 return; 5114 5115 err_wedged: 5116 if (!i915_terminally_wedged(&i915->gpu_error)) { 5117 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5118 i915_gem_set_wedged(i915); 5119 } 5120 goto out_unlock; 5121 } 5122 5123 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5124 { 5125 if (INTEL_GEN(dev_priv) < 5 || 5126 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5127 return; 5128 5129 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5130 DISP_TILE_SURFACE_SWIZZLING); 5131 5132 if (IS_GEN5(dev_priv)) 5133 return; 5134 5135 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5136 if (IS_GEN6(dev_priv)) 5137 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5138 else if (IS_GEN7(dev_priv)) 5139 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5140 else if (IS_GEN8(dev_priv)) 5141 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5142 else 5143 BUG(); 5144 } 5145 5146 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5147 { 5148 I915_WRITE(RING_CTL(base), 0); 5149 I915_WRITE(RING_HEAD(base), 0); 5150 I915_WRITE(RING_TAIL(base), 0); 5151 I915_WRITE(RING_START(base), 0); 5152 } 5153 5154 static void init_unused_rings(struct drm_i915_private *dev_priv) 5155 { 5156 if (IS_I830(dev_priv)) { 5157 init_unused_ring(dev_priv, PRB1_BASE); 5158 init_unused_ring(dev_priv, SRB0_BASE); 5159 init_unused_ring(dev_priv, SRB1_BASE); 5160 init_unused_ring(dev_priv, SRB2_BASE); 5161 init_unused_ring(dev_priv, SRB3_BASE); 5162 } else if (IS_GEN2(dev_priv)) { 5163 init_unused_ring(dev_priv, SRB0_BASE); 5164 init_unused_ring(dev_priv, SRB1_BASE); 5165 } else if (IS_GEN3(dev_priv)) { 5166 init_unused_ring(dev_priv, PRB1_BASE); 5167 init_unused_ring(dev_priv, PRB2_BASE); 5168 } 5169 } 5170 5171 static int __i915_gem_restart_engines(void *data) 5172 { 5173 struct drm_i915_private *i915 = data; 5174 struct intel_engine_cs *engine; 5175 enum intel_engine_id id; 5176 int err; 5177 5178 for_each_engine(engine, i915, id) { 5179 err = engine->init_hw(engine); 5180 if (err) { 5181 DRM_ERROR("Failed to restart %s (%d)\n", 5182 engine->name, err); 5183 return err; 5184 } 5185 } 5186 5187 return 0; 5188 } 5189 5190 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5191 { 5192 int ret; 5193 5194 dev_priv->gt.last_init_time = ktime_get(); 5195 5196 /* Double layer security blanket, see i915_gem_init() */ 5197 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5198 5199 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5200 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5201 5202 if (IS_HASWELL(dev_priv)) 5203 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5204 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5205 5206 if (HAS_PCH_NOP(dev_priv)) { 5207 if (IS_IVYBRIDGE(dev_priv)) { 5208 u32 temp = I915_READ(GEN7_MSG_CTL); 5209 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5210 I915_WRITE(GEN7_MSG_CTL, temp); 5211 } else if (INTEL_GEN(dev_priv) >= 7) { 5212 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5213 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5214 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5215 } 5216 } 5217 5218 intel_gt_workarounds_apply(dev_priv); 5219 5220 i915_gem_init_swizzling(dev_priv); 5221 5222 /* 5223 * At least 830 can leave some of the unused rings 5224 * "active" (ie. head != tail) after resume which 5225 * will prevent c3 entry. Makes sure all unused rings 5226 * are totally idle. 5227 */ 5228 init_unused_rings(dev_priv); 5229 5230 BUG_ON(!dev_priv->kernel_context); 5231 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5232 ret = -EIO; 5233 goto out; 5234 } 5235 5236 ret = i915_ppgtt_init_hw(dev_priv); 5237 if (ret) { 5238 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5239 goto out; 5240 } 5241 5242 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5243 if (ret) { 5244 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5245 goto out; 5246 } 5247 5248 /* We can't enable contexts until all firmware is loaded */ 5249 ret = intel_uc_init_hw(dev_priv); 5250 if (ret) { 5251 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5252 goto out; 5253 } 5254 5255 intel_mocs_init_l3cc_table(dev_priv); 5256 5257 /* Only when the HW is re-initialised, can we replay the requests */ 5258 ret = __i915_gem_restart_engines(dev_priv); 5259 out: 5260 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5261 return ret; 5262 } 5263 5264 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5265 { 5266 struct i915_gem_context *ctx; 5267 struct intel_engine_cs *engine; 5268 enum intel_engine_id id; 5269 int err; 5270 5271 /* 5272 * As we reset the gpu during very early sanitisation, the current 5273 * register state on the GPU should reflect its defaults values. 5274 * We load a context onto the hw (with restore-inhibit), then switch 5275 * over to a second context to save that default register state. We 5276 * can then prime every new context with that state so they all start 5277 * from the same default HW values. 5278 */ 5279 5280 ctx = i915_gem_context_create_kernel(i915, 0); 5281 if (IS_ERR(ctx)) 5282 return PTR_ERR(ctx); 5283 5284 for_each_engine(engine, i915, id) { 5285 struct i915_request *rq; 5286 5287 rq = i915_request_alloc(engine, ctx); 5288 if (IS_ERR(rq)) { 5289 err = PTR_ERR(rq); 5290 goto out_ctx; 5291 } 5292 5293 err = 0; 5294 if (engine->init_context) 5295 err = engine->init_context(rq); 5296 5297 __i915_request_add(rq, true); 5298 if (err) 5299 goto err_active; 5300 } 5301 5302 err = i915_gem_switch_to_kernel_context(i915); 5303 if (err) 5304 goto err_active; 5305 5306 err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); 5307 if (err) 5308 goto err_active; 5309 5310 assert_kernel_context_is_current(i915); 5311 5312 for_each_engine(engine, i915, id) { 5313 struct i915_vma *state; 5314 5315 state = to_intel_context(ctx, engine)->state; 5316 if (!state) 5317 continue; 5318 5319 /* 5320 * As we will hold a reference to the logical state, it will 5321 * not be torn down with the context, and importantly the 5322 * object will hold onto its vma (making it possible for a 5323 * stray GTT write to corrupt our defaults). Unmap the vma 5324 * from the GTT to prevent such accidents and reclaim the 5325 * space. 5326 */ 5327 err = i915_vma_unbind(state); 5328 if (err) 5329 goto err_active; 5330 5331 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5332 if (err) 5333 goto err_active; 5334 5335 engine->default_state = i915_gem_object_get(state->obj); 5336 } 5337 5338 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5339 unsigned int found = intel_engines_has_context_isolation(i915); 5340 5341 /* 5342 * Make sure that classes with multiple engine instances all 5343 * share the same basic configuration. 5344 */ 5345 for_each_engine(engine, i915, id) { 5346 unsigned int bit = BIT(engine->uabi_class); 5347 unsigned int expected = engine->default_state ? bit : 0; 5348 5349 if ((found & bit) != expected) { 5350 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5351 engine->uabi_class, engine->name); 5352 } 5353 } 5354 } 5355 5356 out_ctx: 5357 i915_gem_context_set_closed(ctx); 5358 i915_gem_context_put(ctx); 5359 return err; 5360 5361 err_active: 5362 /* 5363 * If we have to abandon now, we expect the engines to be idle 5364 * and ready to be torn-down. First try to flush any remaining 5365 * request, ensure we are pointing at the kernel context and 5366 * then remove it. 5367 */ 5368 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5369 goto out_ctx; 5370 5371 if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) 5372 goto out_ctx; 5373 5374 i915_gem_contexts_lost(i915); 5375 goto out_ctx; 5376 } 5377 5378 int i915_gem_init(struct drm_i915_private *dev_priv) 5379 { 5380 int ret; 5381 5382 /* 5383 * We need to fallback to 4K pages since gvt gtt handling doesn't 5384 * support huge page entries - we will need to check either hypervisor 5385 * mm can support huge guest page or just do emulation in gvt. 5386 */ 5387 if (intel_vgpu_active(dev_priv)) 5388 mkwrite_device_info(dev_priv)->page_sizes = 5389 I915_GTT_PAGE_SIZE_4K; 5390 5391 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5392 5393 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5394 dev_priv->gt.resume = intel_lr_context_resume; 5395 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5396 } else { 5397 dev_priv->gt.resume = intel_legacy_submission_resume; 5398 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5399 } 5400 5401 ret = i915_gem_init_userptr(dev_priv); 5402 if (ret) 5403 return ret; 5404 5405 ret = intel_wopcm_init(&dev_priv->wopcm); 5406 if (ret) 5407 return ret; 5408 5409 ret = intel_uc_init_misc(dev_priv); 5410 if (ret) 5411 return ret; 5412 5413 /* This is just a security blanket to placate dragons. 5414 * On some systems, we very sporadically observe that the first TLBs 5415 * used by the CS may be stale, despite us poking the TLB reset. If 5416 * we hold the forcewake during initialisation these problems 5417 * just magically go away. 5418 */ 5419 mutex_lock(&dev_priv->drm.struct_mutex); 5420 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5421 5422 ret = i915_gem_init_ggtt(dev_priv); 5423 if (ret) { 5424 GEM_BUG_ON(ret == -EIO); 5425 goto err_unlock; 5426 } 5427 5428 ret = i915_gem_contexts_init(dev_priv); 5429 if (ret) { 5430 GEM_BUG_ON(ret == -EIO); 5431 goto err_ggtt; 5432 } 5433 5434 ret = intel_engines_init(dev_priv); 5435 if (ret) { 5436 GEM_BUG_ON(ret == -EIO); 5437 goto err_context; 5438 } 5439 5440 intel_init_gt_powersave(dev_priv); 5441 5442 ret = intel_uc_init(dev_priv); 5443 if (ret) 5444 goto err_pm; 5445 5446 ret = i915_gem_init_hw(dev_priv); 5447 if (ret) 5448 goto err_uc_init; 5449 5450 /* 5451 * Despite its name intel_init_clock_gating applies both display 5452 * clock gating workarounds; GT mmio workarounds and the occasional 5453 * GT power context workaround. Worse, sometimes it includes a context 5454 * register workaround which we need to apply before we record the 5455 * default HW state for all contexts. 5456 * 5457 * FIXME: break up the workarounds and apply them at the right time! 5458 */ 5459 intel_init_clock_gating(dev_priv); 5460 5461 ret = __intel_engines_record_defaults(dev_priv); 5462 if (ret) 5463 goto err_init_hw; 5464 5465 if (i915_inject_load_failure()) { 5466 ret = -ENODEV; 5467 goto err_init_hw; 5468 } 5469 5470 if (i915_inject_load_failure()) { 5471 ret = -EIO; 5472 goto err_init_hw; 5473 } 5474 5475 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5476 mutex_unlock(&dev_priv->drm.struct_mutex); 5477 5478 return 0; 5479 5480 /* 5481 * Unwinding is complicated by that we want to handle -EIO to mean 5482 * disable GPU submission but keep KMS alive. We want to mark the 5483 * HW as irrevisibly wedged, but keep enough state around that the 5484 * driver doesn't explode during runtime. 5485 */ 5486 err_init_hw: 5487 i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); 5488 i915_gem_contexts_lost(dev_priv); 5489 intel_uc_fini_hw(dev_priv); 5490 err_uc_init: 5491 intel_uc_fini(dev_priv); 5492 err_pm: 5493 if (ret != -EIO) { 5494 intel_cleanup_gt_powersave(dev_priv); 5495 i915_gem_cleanup_engines(dev_priv); 5496 } 5497 err_context: 5498 if (ret != -EIO) 5499 i915_gem_contexts_fini(dev_priv); 5500 err_ggtt: 5501 err_unlock: 5502 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5503 mutex_unlock(&dev_priv->drm.struct_mutex); 5504 5505 intel_uc_fini_misc(dev_priv); 5506 5507 if (ret != -EIO) 5508 i915_gem_cleanup_userptr(dev_priv); 5509 5510 if (ret == -EIO) { 5511 /* 5512 * Allow engine initialisation to fail by marking the GPU as 5513 * wedged. But we only want to do this where the GPU is angry, 5514 * for all other failure, such as an allocation failure, bail. 5515 */ 5516 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5517 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5518 i915_gem_set_wedged(dev_priv); 5519 } 5520 ret = 0; 5521 } 5522 5523 i915_gem_drain_freed_objects(dev_priv); 5524 return ret; 5525 } 5526 5527 void i915_gem_init_mmio(struct drm_i915_private *i915) 5528 { 5529 i915_gem_sanitize(i915); 5530 } 5531 5532 void 5533 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5534 { 5535 struct intel_engine_cs *engine; 5536 enum intel_engine_id id; 5537 5538 for_each_engine(engine, dev_priv, id) 5539 dev_priv->gt.cleanup_engine(engine); 5540 } 5541 5542 void 5543 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5544 { 5545 int i; 5546 5547 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5548 !IS_CHERRYVIEW(dev_priv)) 5549 dev_priv->num_fence_regs = 32; 5550 else if (INTEL_GEN(dev_priv) >= 4 || 5551 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5552 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5553 dev_priv->num_fence_regs = 16; 5554 else 5555 dev_priv->num_fence_regs = 8; 5556 5557 if (intel_vgpu_active(dev_priv)) 5558 dev_priv->num_fence_regs = 5559 I915_READ(vgtif_reg(avail_rs.fence_num)); 5560 5561 /* Initialize fence registers to zero */ 5562 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5563 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5564 5565 fence->i915 = dev_priv; 5566 fence->id = i; 5567 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5568 } 5569 i915_gem_restore_fences(dev_priv); 5570 5571 i915_gem_detect_bit_6_swizzle(dev_priv); 5572 } 5573 5574 static void i915_gem_init__mm(struct drm_i915_private *i915) 5575 { 5576 spin_lock_init(&i915->mm.object_stat_lock); 5577 spin_lock_init(&i915->mm.obj_lock); 5578 spin_lock_init(&i915->mm.free_lock); 5579 5580 init_llist_head(&i915->mm.free_list); 5581 5582 INIT_LIST_HEAD(&i915->mm.unbound_list); 5583 INIT_LIST_HEAD(&i915->mm.bound_list); 5584 INIT_LIST_HEAD(&i915->mm.fence_list); 5585 INIT_LIST_HEAD(&i915->mm.userfault_list); 5586 5587 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5588 } 5589 5590 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5591 { 5592 int err = -ENOMEM; 5593 5594 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5595 if (!dev_priv->objects) 5596 goto err_out; 5597 5598 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5599 if (!dev_priv->vmas) 5600 goto err_objects; 5601 5602 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5603 if (!dev_priv->luts) 5604 goto err_vmas; 5605 5606 dev_priv->requests = KMEM_CACHE(i915_request, 5607 SLAB_HWCACHE_ALIGN | 5608 SLAB_RECLAIM_ACCOUNT | 5609 SLAB_TYPESAFE_BY_RCU); 5610 if (!dev_priv->requests) 5611 goto err_luts; 5612 5613 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5614 SLAB_HWCACHE_ALIGN | 5615 SLAB_RECLAIM_ACCOUNT); 5616 if (!dev_priv->dependencies) 5617 goto err_requests; 5618 5619 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5620 if (!dev_priv->priorities) 5621 goto err_dependencies; 5622 5623 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5624 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5625 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5626 5627 i915_gem_init__mm(dev_priv); 5628 5629 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5630 i915_gem_retire_work_handler); 5631 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5632 i915_gem_idle_work_handler); 5633 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5634 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5635 5636 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5637 5638 spin_lock_init(&dev_priv->fb_tracking.lock); 5639 5640 err = i915_gemfs_init(dev_priv); 5641 if (err) 5642 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5643 5644 return 0; 5645 5646 err_dependencies: 5647 kmem_cache_destroy(dev_priv->dependencies); 5648 err_requests: 5649 kmem_cache_destroy(dev_priv->requests); 5650 err_luts: 5651 kmem_cache_destroy(dev_priv->luts); 5652 err_vmas: 5653 kmem_cache_destroy(dev_priv->vmas); 5654 err_objects: 5655 kmem_cache_destroy(dev_priv->objects); 5656 err_out: 5657 return err; 5658 } 5659 5660 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5661 { 5662 i915_gem_drain_freed_objects(dev_priv); 5663 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5664 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5665 WARN_ON(dev_priv->mm.object_count); 5666 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5667 5668 kmem_cache_destroy(dev_priv->priorities); 5669 kmem_cache_destroy(dev_priv->dependencies); 5670 kmem_cache_destroy(dev_priv->requests); 5671 kmem_cache_destroy(dev_priv->luts); 5672 kmem_cache_destroy(dev_priv->vmas); 5673 kmem_cache_destroy(dev_priv->objects); 5674 5675 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5676 rcu_barrier(); 5677 5678 i915_gemfs_fini(dev_priv); 5679 } 5680 5681 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5682 { 5683 /* Discard all purgeable objects, let userspace recover those as 5684 * required after resuming. 5685 */ 5686 i915_gem_shrink_all(dev_priv); 5687 5688 return 0; 5689 } 5690 5691 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5692 { 5693 struct drm_i915_gem_object *obj; 5694 struct list_head *phases[] = { 5695 &dev_priv->mm.unbound_list, 5696 &dev_priv->mm.bound_list, 5697 NULL 5698 }, **p; 5699 5700 /* Called just before we write the hibernation image. 5701 * 5702 * We need to update the domain tracking to reflect that the CPU 5703 * will be accessing all the pages to create and restore from the 5704 * hibernation, and so upon restoration those pages will be in the 5705 * CPU domain. 5706 * 5707 * To make sure the hibernation image contains the latest state, 5708 * we update that state just before writing out the image. 5709 * 5710 * To try and reduce the hibernation image, we manually shrink 5711 * the objects as well, see i915_gem_freeze() 5712 */ 5713 5714 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); 5715 i915_gem_drain_freed_objects(dev_priv); 5716 5717 spin_lock(&dev_priv->mm.obj_lock); 5718 for (p = phases; *p; p++) { 5719 list_for_each_entry(obj, *p, mm.link) 5720 __start_cpu_write(obj); 5721 } 5722 spin_unlock(&dev_priv->mm.obj_lock); 5723 5724 return 0; 5725 } 5726 5727 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5728 { 5729 struct drm_i915_file_private *file_priv = file->driver_priv; 5730 struct i915_request *request; 5731 5732 /* Clean up our request list when the client is going away, so that 5733 * later retire_requests won't dereference our soon-to-be-gone 5734 * file_priv. 5735 */ 5736 spin_lock(&file_priv->mm.lock); 5737 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5738 request->file_priv = NULL; 5739 spin_unlock(&file_priv->mm.lock); 5740 } 5741 5742 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5743 { 5744 struct drm_i915_file_private *file_priv; 5745 int ret; 5746 5747 DRM_DEBUG("\n"); 5748 5749 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5750 if (!file_priv) 5751 return -ENOMEM; 5752 5753 file->driver_priv = file_priv; 5754 file_priv->dev_priv = i915; 5755 file_priv->file = file; 5756 5757 spin_lock_init(&file_priv->mm.lock); 5758 INIT_LIST_HEAD(&file_priv->mm.request_list); 5759 5760 file_priv->bsd_engine = -1; 5761 file_priv->hang_timestamp = jiffies; 5762 5763 ret = i915_gem_context_open(i915, file); 5764 if (ret) 5765 kfree(file_priv); 5766 5767 return ret; 5768 } 5769 5770 /** 5771 * i915_gem_track_fb - update frontbuffer tracking 5772 * @old: current GEM buffer for the frontbuffer slots 5773 * @new: new GEM buffer for the frontbuffer slots 5774 * @frontbuffer_bits: bitmask of frontbuffer slots 5775 * 5776 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5777 * from @old and setting them in @new. Both @old and @new can be NULL. 5778 */ 5779 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5780 struct drm_i915_gem_object *new, 5781 unsigned frontbuffer_bits) 5782 { 5783 /* Control of individual bits within the mask are guarded by 5784 * the owning plane->mutex, i.e. we can never see concurrent 5785 * manipulation of individual bits. But since the bitfield as a whole 5786 * is updated using RMW, we need to use atomics in order to update 5787 * the bits. 5788 */ 5789 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5790 sizeof(atomic_t) * BITS_PER_BYTE); 5791 5792 if (old) { 5793 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5794 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5795 } 5796 5797 if (new) { 5798 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5799 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5800 } 5801 } 5802 5803 /* Allocate a new GEM object and fill it with the supplied data */ 5804 struct drm_i915_gem_object * 5805 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5806 const void *data, size_t size) 5807 { 5808 struct drm_i915_gem_object *obj; 5809 struct file *file; 5810 size_t offset; 5811 int err; 5812 5813 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5814 if (IS_ERR(obj)) 5815 return obj; 5816 5817 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5818 5819 file = obj->base.filp; 5820 offset = 0; 5821 do { 5822 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5823 struct page *page; 5824 void *pgdata, *vaddr; 5825 5826 err = pagecache_write_begin(file, file->f_mapping, 5827 offset, len, 0, 5828 &page, &pgdata); 5829 if (err < 0) 5830 goto fail; 5831 5832 vaddr = kmap(page); 5833 memcpy(vaddr, data, len); 5834 kunmap(page); 5835 5836 err = pagecache_write_end(file, file->f_mapping, 5837 offset, len, len, 5838 page, pgdata); 5839 if (err < 0) 5840 goto fail; 5841 5842 size -= len; 5843 data += len; 5844 offset += len; 5845 } while (size); 5846 5847 return obj; 5848 5849 fail: 5850 i915_gem_object_put(obj); 5851 return ERR_PTR(err); 5852 } 5853 5854 struct scatterlist * 5855 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5856 unsigned int n, 5857 unsigned int *offset) 5858 { 5859 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5860 struct scatterlist *sg; 5861 unsigned int idx, count; 5862 5863 might_sleep(); 5864 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5865 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5866 5867 /* As we iterate forward through the sg, we record each entry in a 5868 * radixtree for quick repeated (backwards) lookups. If we have seen 5869 * this index previously, we will have an entry for it. 5870 * 5871 * Initial lookup is O(N), but this is amortized to O(1) for 5872 * sequential page access (where each new request is consecutive 5873 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5874 * i.e. O(1) with a large constant! 5875 */ 5876 if (n < READ_ONCE(iter->sg_idx)) 5877 goto lookup; 5878 5879 mutex_lock(&iter->lock); 5880 5881 /* We prefer to reuse the last sg so that repeated lookup of this 5882 * (or the subsequent) sg are fast - comparing against the last 5883 * sg is faster than going through the radixtree. 5884 */ 5885 5886 sg = iter->sg_pos; 5887 idx = iter->sg_idx; 5888 count = __sg_page_count(sg); 5889 5890 while (idx + count <= n) { 5891 unsigned long exception, i; 5892 int ret; 5893 5894 /* If we cannot allocate and insert this entry, or the 5895 * individual pages from this range, cancel updating the 5896 * sg_idx so that on this lookup we are forced to linearly 5897 * scan onwards, but on future lookups we will try the 5898 * insertion again (in which case we need to be careful of 5899 * the error return reporting that we have already inserted 5900 * this index). 5901 */ 5902 ret = radix_tree_insert(&iter->radix, idx, sg); 5903 if (ret && ret != -EEXIST) 5904 goto scan; 5905 5906 exception = 5907 RADIX_TREE_EXCEPTIONAL_ENTRY | 5908 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5909 for (i = 1; i < count; i++) { 5910 ret = radix_tree_insert(&iter->radix, idx + i, 5911 (void *)exception); 5912 if (ret && ret != -EEXIST) 5913 goto scan; 5914 } 5915 5916 idx += count; 5917 sg = ____sg_next(sg); 5918 count = __sg_page_count(sg); 5919 } 5920 5921 scan: 5922 iter->sg_pos = sg; 5923 iter->sg_idx = idx; 5924 5925 mutex_unlock(&iter->lock); 5926 5927 if (unlikely(n < idx)) /* insertion completed by another thread */ 5928 goto lookup; 5929 5930 /* In case we failed to insert the entry into the radixtree, we need 5931 * to look beyond the current sg. 5932 */ 5933 while (idx + count <= n) { 5934 idx += count; 5935 sg = ____sg_next(sg); 5936 count = __sg_page_count(sg); 5937 } 5938 5939 *offset = n - idx; 5940 return sg; 5941 5942 lookup: 5943 rcu_read_lock(); 5944 5945 sg = radix_tree_lookup(&iter->radix, n); 5946 GEM_BUG_ON(!sg); 5947 5948 /* If this index is in the middle of multi-page sg entry, 5949 * the radixtree will contain an exceptional entry that points 5950 * to the start of that range. We will return the pointer to 5951 * the base page and the offset of this page within the 5952 * sg entry's range. 5953 */ 5954 *offset = 0; 5955 if (unlikely(radix_tree_exception(sg))) { 5956 unsigned long base = 5957 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5958 5959 sg = radix_tree_lookup(&iter->radix, base); 5960 GEM_BUG_ON(!sg); 5961 5962 *offset = n - base; 5963 } 5964 5965 rcu_read_unlock(); 5966 5967 return sg; 5968 } 5969 5970 struct page * 5971 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5972 { 5973 struct scatterlist *sg; 5974 unsigned int offset; 5975 5976 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5977 5978 sg = i915_gem_object_get_sg(obj, n, &offset); 5979 return nth_page(sg_page(sg), offset); 5980 } 5981 5982 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5983 struct page * 5984 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5985 unsigned int n) 5986 { 5987 struct page *page; 5988 5989 page = i915_gem_object_get_page(obj, n); 5990 if (!obj->mm.dirty) 5991 set_page_dirty(page); 5992 5993 return page; 5994 } 5995 5996 dma_addr_t 5997 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5998 unsigned long n) 5999 { 6000 struct scatterlist *sg; 6001 unsigned int offset; 6002 6003 sg = i915_gem_object_get_sg(obj, n, &offset); 6004 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6005 } 6006 6007 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6008 { 6009 struct sg_table *pages; 6010 int err; 6011 6012 if (align > obj->base.size) 6013 return -EINVAL; 6014 6015 if (obj->ops == &i915_gem_phys_ops) 6016 return 0; 6017 6018 if (obj->ops != &i915_gem_object_ops) 6019 return -EINVAL; 6020 6021 err = i915_gem_object_unbind(obj); 6022 if (err) 6023 return err; 6024 6025 mutex_lock(&obj->mm.lock); 6026 6027 if (obj->mm.madv != I915_MADV_WILLNEED) { 6028 err = -EFAULT; 6029 goto err_unlock; 6030 } 6031 6032 if (obj->mm.quirked) { 6033 err = -EFAULT; 6034 goto err_unlock; 6035 } 6036 6037 if (obj->mm.mapping) { 6038 err = -EBUSY; 6039 goto err_unlock; 6040 } 6041 6042 pages = fetch_and_zero(&obj->mm.pages); 6043 if (pages) { 6044 struct drm_i915_private *i915 = to_i915(obj->base.dev); 6045 6046 __i915_gem_object_reset_page_iter(obj); 6047 6048 spin_lock(&i915->mm.obj_lock); 6049 list_del(&obj->mm.link); 6050 spin_unlock(&i915->mm.obj_lock); 6051 } 6052 6053 obj->ops = &i915_gem_phys_ops; 6054 6055 err = ____i915_gem_object_get_pages(obj); 6056 if (err) 6057 goto err_xfer; 6058 6059 /* Perma-pin (until release) the physical set of pages */ 6060 __i915_gem_object_pin_pages(obj); 6061 6062 if (!IS_ERR_OR_NULL(pages)) 6063 i915_gem_object_ops.put_pages(obj, pages); 6064 mutex_unlock(&obj->mm.lock); 6065 return 0; 6066 6067 err_xfer: 6068 obj->ops = &i915_gem_object_ops; 6069 obj->mm.pages = pages; 6070 err_unlock: 6071 mutex_unlock(&obj->mm.lock); 6072 return err; 6073 } 6074 6075 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6076 #include "selftests/scatterlist.c" 6077 #include "selftests/mock_gem_device.c" 6078 #include "selftests/huge_gem_object.c" 6079 #include "selftests/huge_pages.c" 6080 #include "selftests/i915_gem_object.c" 6081 #include "selftests/i915_gem_coherency.c" 6082 #endif 6083