1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 i915_gem_chipset_flush(dev_priv); 806 807 intel_runtime_pm_get(dev_priv); 808 spin_lock_irq(&dev_priv->uncore.lock); 809 810 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 811 812 spin_unlock_irq(&dev_priv->uncore.lock); 813 intel_runtime_pm_put(dev_priv); 814 } 815 816 static void 817 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 818 { 819 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 820 struct i915_vma *vma; 821 822 if (!(obj->write_domain & flush_domains)) 823 return; 824 825 switch (obj->write_domain) { 826 case I915_GEM_DOMAIN_GTT: 827 i915_gem_flush_ggtt_writes(dev_priv); 828 829 intel_fb_obj_flush(obj, 830 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 831 832 for_each_ggtt_vma(vma, obj) { 833 if (vma->iomap) 834 continue; 835 836 i915_vma_unset_ggtt_write(vma); 837 } 838 break; 839 840 case I915_GEM_DOMAIN_WC: 841 wmb(); 842 break; 843 844 case I915_GEM_DOMAIN_CPU: 845 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 846 break; 847 848 case I915_GEM_DOMAIN_RENDER: 849 if (gpu_write_needs_clflush(obj)) 850 obj->cache_dirty = true; 851 break; 852 } 853 854 obj->write_domain = 0; 855 } 856 857 static inline int 858 __copy_to_user_swizzled(char __user *cpu_vaddr, 859 const char *gpu_vaddr, int gpu_offset, 860 int length) 861 { 862 int ret, cpu_offset = 0; 863 864 while (length > 0) { 865 int cacheline_end = ALIGN(gpu_offset + 1, 64); 866 int this_length = min(cacheline_end - gpu_offset, length); 867 int swizzled_gpu_offset = gpu_offset ^ 64; 868 869 ret = __copy_to_user(cpu_vaddr + cpu_offset, 870 gpu_vaddr + swizzled_gpu_offset, 871 this_length); 872 if (ret) 873 return ret + length; 874 875 cpu_offset += this_length; 876 gpu_offset += this_length; 877 length -= this_length; 878 } 879 880 return 0; 881 } 882 883 static inline int 884 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 885 const char __user *cpu_vaddr, 886 int length) 887 { 888 int ret, cpu_offset = 0; 889 890 while (length > 0) { 891 int cacheline_end = ALIGN(gpu_offset + 1, 64); 892 int this_length = min(cacheline_end - gpu_offset, length); 893 int swizzled_gpu_offset = gpu_offset ^ 64; 894 895 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 896 cpu_vaddr + cpu_offset, 897 this_length); 898 if (ret) 899 return ret + length; 900 901 cpu_offset += this_length; 902 gpu_offset += this_length; 903 length -= this_length; 904 } 905 906 return 0; 907 } 908 909 /* 910 * Pins the specified object's pages and synchronizes the object with 911 * GPU accesses. Sets needs_clflush to non-zero if the caller should 912 * flush the object from the CPU cache. 913 */ 914 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 915 unsigned int *needs_clflush) 916 { 917 int ret; 918 919 lockdep_assert_held(&obj->base.dev->struct_mutex); 920 921 *needs_clflush = 0; 922 if (!i915_gem_object_has_struct_page(obj)) 923 return -ENODEV; 924 925 ret = i915_gem_object_wait(obj, 926 I915_WAIT_INTERRUPTIBLE | 927 I915_WAIT_LOCKED, 928 MAX_SCHEDULE_TIMEOUT, 929 NULL); 930 if (ret) 931 return ret; 932 933 ret = i915_gem_object_pin_pages(obj); 934 if (ret) 935 return ret; 936 937 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 938 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 939 ret = i915_gem_object_set_to_cpu_domain(obj, false); 940 if (ret) 941 goto err_unpin; 942 else 943 goto out; 944 } 945 946 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 947 948 /* If we're not in the cpu read domain, set ourself into the gtt 949 * read domain and manually flush cachelines (if required). This 950 * optimizes for the case when the gpu will dirty the data 951 * anyway again before the next pread happens. 952 */ 953 if (!obj->cache_dirty && 954 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 955 *needs_clflush = CLFLUSH_BEFORE; 956 957 out: 958 /* return with the pages pinned */ 959 return 0; 960 961 err_unpin: 962 i915_gem_object_unpin_pages(obj); 963 return ret; 964 } 965 966 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 967 unsigned int *needs_clflush) 968 { 969 int ret; 970 971 lockdep_assert_held(&obj->base.dev->struct_mutex); 972 973 *needs_clflush = 0; 974 if (!i915_gem_object_has_struct_page(obj)) 975 return -ENODEV; 976 977 ret = i915_gem_object_wait(obj, 978 I915_WAIT_INTERRUPTIBLE | 979 I915_WAIT_LOCKED | 980 I915_WAIT_ALL, 981 MAX_SCHEDULE_TIMEOUT, 982 NULL); 983 if (ret) 984 return ret; 985 986 ret = i915_gem_object_pin_pages(obj); 987 if (ret) 988 return ret; 989 990 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 991 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 992 ret = i915_gem_object_set_to_cpu_domain(obj, true); 993 if (ret) 994 goto err_unpin; 995 else 996 goto out; 997 } 998 999 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1000 1001 /* If we're not in the cpu write domain, set ourself into the 1002 * gtt write domain and manually flush cachelines (as required). 1003 * This optimizes for the case when the gpu will use the data 1004 * right away and we therefore have to clflush anyway. 1005 */ 1006 if (!obj->cache_dirty) { 1007 *needs_clflush |= CLFLUSH_AFTER; 1008 1009 /* 1010 * Same trick applies to invalidate partially written 1011 * cachelines read before writing. 1012 */ 1013 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1014 *needs_clflush |= CLFLUSH_BEFORE; 1015 } 1016 1017 out: 1018 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1019 obj->mm.dirty = true; 1020 /* return with the pages pinned */ 1021 return 0; 1022 1023 err_unpin: 1024 i915_gem_object_unpin_pages(obj); 1025 return ret; 1026 } 1027 1028 static void 1029 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1030 bool swizzled) 1031 { 1032 if (unlikely(swizzled)) { 1033 unsigned long start = (unsigned long) addr; 1034 unsigned long end = (unsigned long) addr + length; 1035 1036 /* For swizzling simply ensure that we always flush both 1037 * channels. Lame, but simple and it works. Swizzled 1038 * pwrite/pread is far from a hotpath - current userspace 1039 * doesn't use it at all. */ 1040 start = round_down(start, 128); 1041 end = round_up(end, 128); 1042 1043 drm_clflush_virt_range((void *)start, end - start); 1044 } else { 1045 drm_clflush_virt_range(addr, length); 1046 } 1047 1048 } 1049 1050 /* Only difference to the fast-path function is that this can handle bit17 1051 * and uses non-atomic copy and kmap functions. */ 1052 static int 1053 shmem_pread_slow(struct page *page, int offset, int length, 1054 char __user *user_data, 1055 bool page_do_bit17_swizzling, bool needs_clflush) 1056 { 1057 char *vaddr; 1058 int ret; 1059 1060 vaddr = kmap(page); 1061 if (needs_clflush) 1062 shmem_clflush_swizzled_range(vaddr + offset, length, 1063 page_do_bit17_swizzling); 1064 1065 if (page_do_bit17_swizzling) 1066 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1067 else 1068 ret = __copy_to_user(user_data, vaddr + offset, length); 1069 kunmap(page); 1070 1071 return ret ? - EFAULT : 0; 1072 } 1073 1074 static int 1075 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1076 bool page_do_bit17_swizzling, bool needs_clflush) 1077 { 1078 int ret; 1079 1080 ret = -ENODEV; 1081 if (!page_do_bit17_swizzling) { 1082 char *vaddr = kmap_atomic(page); 1083 1084 if (needs_clflush) 1085 drm_clflush_virt_range(vaddr + offset, length); 1086 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1087 kunmap_atomic(vaddr); 1088 } 1089 if (ret == 0) 1090 return 0; 1091 1092 return shmem_pread_slow(page, offset, length, user_data, 1093 page_do_bit17_swizzling, needs_clflush); 1094 } 1095 1096 static int 1097 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1098 struct drm_i915_gem_pread *args) 1099 { 1100 char __user *user_data; 1101 u64 remain; 1102 unsigned int obj_do_bit17_swizzling; 1103 unsigned int needs_clflush; 1104 unsigned int idx, offset; 1105 int ret; 1106 1107 obj_do_bit17_swizzling = 0; 1108 if (i915_gem_object_needs_bit17_swizzle(obj)) 1109 obj_do_bit17_swizzling = BIT(17); 1110 1111 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1112 if (ret) 1113 return ret; 1114 1115 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1116 mutex_unlock(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 remain = args->size; 1121 user_data = u64_to_user_ptr(args->data_ptr); 1122 offset = offset_in_page(args->offset); 1123 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1124 struct page *page = i915_gem_object_get_page(obj, idx); 1125 int length; 1126 1127 length = remain; 1128 if (offset + length > PAGE_SIZE) 1129 length = PAGE_SIZE - offset; 1130 1131 ret = shmem_pread(page, offset, length, user_data, 1132 page_to_phys(page) & obj_do_bit17_swizzling, 1133 needs_clflush); 1134 if (ret) 1135 break; 1136 1137 remain -= length; 1138 user_data += length; 1139 offset = 0; 1140 } 1141 1142 i915_gem_obj_finish_shmem_access(obj); 1143 return ret; 1144 } 1145 1146 static inline bool 1147 gtt_user_read(struct io_mapping *mapping, 1148 loff_t base, int offset, 1149 char __user *user_data, int length) 1150 { 1151 void __iomem *vaddr; 1152 unsigned long unwritten; 1153 1154 /* We can use the cpu mem copy function because this is X86. */ 1155 vaddr = io_mapping_map_atomic_wc(mapping, base); 1156 unwritten = __copy_to_user_inatomic(user_data, 1157 (void __force *)vaddr + offset, 1158 length); 1159 io_mapping_unmap_atomic(vaddr); 1160 if (unwritten) { 1161 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1162 unwritten = copy_to_user(user_data, 1163 (void __force *)vaddr + offset, 1164 length); 1165 io_mapping_unmap(vaddr); 1166 } 1167 return unwritten; 1168 } 1169 1170 static int 1171 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1172 const struct drm_i915_gem_pread *args) 1173 { 1174 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1175 struct i915_ggtt *ggtt = &i915->ggtt; 1176 struct drm_mm_node node; 1177 struct i915_vma *vma; 1178 void __user *user_data; 1179 u64 remain, offset; 1180 int ret; 1181 1182 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1183 if (ret) 1184 return ret; 1185 1186 intel_runtime_pm_get(i915); 1187 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1188 PIN_MAPPABLE | 1189 PIN_NONFAULT | 1190 PIN_NONBLOCK); 1191 if (!IS_ERR(vma)) { 1192 node.start = i915_ggtt_offset(vma); 1193 node.allocated = false; 1194 ret = i915_vma_put_fence(vma); 1195 if (ret) { 1196 i915_vma_unpin(vma); 1197 vma = ERR_PTR(ret); 1198 } 1199 } 1200 if (IS_ERR(vma)) { 1201 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1202 if (ret) 1203 goto out_unlock; 1204 GEM_BUG_ON(!node.allocated); 1205 } 1206 1207 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1208 if (ret) 1209 goto out_unpin; 1210 1211 mutex_unlock(&i915->drm.struct_mutex); 1212 1213 user_data = u64_to_user_ptr(args->data_ptr); 1214 remain = args->size; 1215 offset = args->offset; 1216 1217 while (remain > 0) { 1218 /* Operation in this page 1219 * 1220 * page_base = page offset within aperture 1221 * page_offset = offset within page 1222 * page_length = bytes to copy for this page 1223 */ 1224 u32 page_base = node.start; 1225 unsigned page_offset = offset_in_page(offset); 1226 unsigned page_length = PAGE_SIZE - page_offset; 1227 page_length = remain < page_length ? remain : page_length; 1228 if (node.allocated) { 1229 wmb(); 1230 ggtt->vm.insert_page(&ggtt->vm, 1231 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1232 node.start, I915_CACHE_NONE, 0); 1233 wmb(); 1234 } else { 1235 page_base += offset & PAGE_MASK; 1236 } 1237 1238 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1239 user_data, page_length)) { 1240 ret = -EFAULT; 1241 break; 1242 } 1243 1244 remain -= page_length; 1245 user_data += page_length; 1246 offset += page_length; 1247 } 1248 1249 mutex_lock(&i915->drm.struct_mutex); 1250 out_unpin: 1251 if (node.allocated) { 1252 wmb(); 1253 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1254 remove_mappable_node(&node); 1255 } else { 1256 i915_vma_unpin(vma); 1257 } 1258 out_unlock: 1259 intel_runtime_pm_put(i915); 1260 mutex_unlock(&i915->drm.struct_mutex); 1261 1262 return ret; 1263 } 1264 1265 /** 1266 * Reads data from the object referenced by handle. 1267 * @dev: drm device pointer 1268 * @data: ioctl data blob 1269 * @file: drm file pointer 1270 * 1271 * On error, the contents of *data are undefined. 1272 */ 1273 int 1274 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1275 struct drm_file *file) 1276 { 1277 struct drm_i915_gem_pread *args = data; 1278 struct drm_i915_gem_object *obj; 1279 int ret; 1280 1281 if (args->size == 0) 1282 return 0; 1283 1284 if (!access_ok(VERIFY_WRITE, 1285 u64_to_user_ptr(args->data_ptr), 1286 args->size)) 1287 return -EFAULT; 1288 1289 obj = i915_gem_object_lookup(file, args->handle); 1290 if (!obj) 1291 return -ENOENT; 1292 1293 /* Bounds check source. */ 1294 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 1299 trace_i915_gem_object_pread(obj, args->offset, args->size); 1300 1301 ret = i915_gem_object_wait(obj, 1302 I915_WAIT_INTERRUPTIBLE, 1303 MAX_SCHEDULE_TIMEOUT, 1304 to_rps_client(file)); 1305 if (ret) 1306 goto out; 1307 1308 ret = i915_gem_object_pin_pages(obj); 1309 if (ret) 1310 goto out; 1311 1312 ret = i915_gem_shmem_pread(obj, args); 1313 if (ret == -EFAULT || ret == -ENODEV) 1314 ret = i915_gem_gtt_pread(obj, args); 1315 1316 i915_gem_object_unpin_pages(obj); 1317 out: 1318 i915_gem_object_put(obj); 1319 return ret; 1320 } 1321 1322 /* This is the fast write path which cannot handle 1323 * page faults in the source data 1324 */ 1325 1326 static inline bool 1327 ggtt_write(struct io_mapping *mapping, 1328 loff_t base, int offset, 1329 char __user *user_data, int length) 1330 { 1331 void __iomem *vaddr; 1332 unsigned long unwritten; 1333 1334 /* We can use the cpu mem copy function because this is X86. */ 1335 vaddr = io_mapping_map_atomic_wc(mapping, base); 1336 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1337 user_data, length); 1338 io_mapping_unmap_atomic(vaddr); 1339 if (unwritten) { 1340 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1341 unwritten = copy_from_user((void __force *)vaddr + offset, 1342 user_data, length); 1343 io_mapping_unmap(vaddr); 1344 } 1345 1346 return unwritten; 1347 } 1348 1349 /** 1350 * This is the fast pwrite path, where we copy the data directly from the 1351 * user into the GTT, uncached. 1352 * @obj: i915 GEM object 1353 * @args: pwrite arguments structure 1354 */ 1355 static int 1356 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1357 const struct drm_i915_gem_pwrite *args) 1358 { 1359 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1360 struct i915_ggtt *ggtt = &i915->ggtt; 1361 struct drm_mm_node node; 1362 struct i915_vma *vma; 1363 u64 remain, offset; 1364 void __user *user_data; 1365 int ret; 1366 1367 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1368 if (ret) 1369 return ret; 1370 1371 if (i915_gem_object_has_struct_page(obj)) { 1372 /* 1373 * Avoid waking the device up if we can fallback, as 1374 * waking/resuming is very slow (worst-case 10-100 ms 1375 * depending on PCI sleeps and our own resume time). 1376 * This easily dwarfs any performance advantage from 1377 * using the cache bypass of indirect GGTT access. 1378 */ 1379 if (!intel_runtime_pm_get_if_in_use(i915)) { 1380 ret = -EFAULT; 1381 goto out_unlock; 1382 } 1383 } else { 1384 /* No backing pages, no fallback, we must force GGTT access */ 1385 intel_runtime_pm_get(i915); 1386 } 1387 1388 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1389 PIN_MAPPABLE | 1390 PIN_NONFAULT | 1391 PIN_NONBLOCK); 1392 if (!IS_ERR(vma)) { 1393 node.start = i915_ggtt_offset(vma); 1394 node.allocated = false; 1395 ret = i915_vma_put_fence(vma); 1396 if (ret) { 1397 i915_vma_unpin(vma); 1398 vma = ERR_PTR(ret); 1399 } 1400 } 1401 if (IS_ERR(vma)) { 1402 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1403 if (ret) 1404 goto out_rpm; 1405 GEM_BUG_ON(!node.allocated); 1406 } 1407 1408 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1409 if (ret) 1410 goto out_unpin; 1411 1412 mutex_unlock(&i915->drm.struct_mutex); 1413 1414 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1415 1416 user_data = u64_to_user_ptr(args->data_ptr); 1417 offset = args->offset; 1418 remain = args->size; 1419 while (remain) { 1420 /* Operation in this page 1421 * 1422 * page_base = page offset within aperture 1423 * page_offset = offset within page 1424 * page_length = bytes to copy for this page 1425 */ 1426 u32 page_base = node.start; 1427 unsigned int page_offset = offset_in_page(offset); 1428 unsigned int page_length = PAGE_SIZE - page_offset; 1429 page_length = remain < page_length ? remain : page_length; 1430 if (node.allocated) { 1431 wmb(); /* flush the write before we modify the GGTT */ 1432 ggtt->vm.insert_page(&ggtt->vm, 1433 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1434 node.start, I915_CACHE_NONE, 0); 1435 wmb(); /* flush modifications to the GGTT (insert_page) */ 1436 } else { 1437 page_base += offset & PAGE_MASK; 1438 } 1439 /* If we get a fault while copying data, then (presumably) our 1440 * source page isn't available. Return the error and we'll 1441 * retry in the slow path. 1442 * If the object is non-shmem backed, we retry again with the 1443 * path that handles page fault. 1444 */ 1445 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1446 user_data, page_length)) { 1447 ret = -EFAULT; 1448 break; 1449 } 1450 1451 remain -= page_length; 1452 user_data += page_length; 1453 offset += page_length; 1454 } 1455 intel_fb_obj_flush(obj, ORIGIN_CPU); 1456 1457 mutex_lock(&i915->drm.struct_mutex); 1458 out_unpin: 1459 if (node.allocated) { 1460 wmb(); 1461 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1462 remove_mappable_node(&node); 1463 } else { 1464 i915_vma_unpin(vma); 1465 } 1466 out_rpm: 1467 intel_runtime_pm_put(i915); 1468 out_unlock: 1469 mutex_unlock(&i915->drm.struct_mutex); 1470 return ret; 1471 } 1472 1473 static int 1474 shmem_pwrite_slow(struct page *page, int offset, int length, 1475 char __user *user_data, 1476 bool page_do_bit17_swizzling, 1477 bool needs_clflush_before, 1478 bool needs_clflush_after) 1479 { 1480 char *vaddr; 1481 int ret; 1482 1483 vaddr = kmap(page); 1484 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1485 shmem_clflush_swizzled_range(vaddr + offset, length, 1486 page_do_bit17_swizzling); 1487 if (page_do_bit17_swizzling) 1488 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1489 length); 1490 else 1491 ret = __copy_from_user(vaddr + offset, user_data, length); 1492 if (needs_clflush_after) 1493 shmem_clflush_swizzled_range(vaddr + offset, length, 1494 page_do_bit17_swizzling); 1495 kunmap(page); 1496 1497 return ret ? -EFAULT : 0; 1498 } 1499 1500 /* Per-page copy function for the shmem pwrite fastpath. 1501 * Flushes invalid cachelines before writing to the target if 1502 * needs_clflush_before is set and flushes out any written cachelines after 1503 * writing if needs_clflush is set. 1504 */ 1505 static int 1506 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1507 bool page_do_bit17_swizzling, 1508 bool needs_clflush_before, 1509 bool needs_clflush_after) 1510 { 1511 int ret; 1512 1513 ret = -ENODEV; 1514 if (!page_do_bit17_swizzling) { 1515 char *vaddr = kmap_atomic(page); 1516 1517 if (needs_clflush_before) 1518 drm_clflush_virt_range(vaddr + offset, len); 1519 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1520 if (needs_clflush_after) 1521 drm_clflush_virt_range(vaddr + offset, len); 1522 1523 kunmap_atomic(vaddr); 1524 } 1525 if (ret == 0) 1526 return ret; 1527 1528 return shmem_pwrite_slow(page, offset, len, user_data, 1529 page_do_bit17_swizzling, 1530 needs_clflush_before, 1531 needs_clflush_after); 1532 } 1533 1534 static int 1535 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1536 const struct drm_i915_gem_pwrite *args) 1537 { 1538 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1539 void __user *user_data; 1540 u64 remain; 1541 unsigned int obj_do_bit17_swizzling; 1542 unsigned int partial_cacheline_write; 1543 unsigned int needs_clflush; 1544 unsigned int offset, idx; 1545 int ret; 1546 1547 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1548 if (ret) 1549 return ret; 1550 1551 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1552 mutex_unlock(&i915->drm.struct_mutex); 1553 if (ret) 1554 return ret; 1555 1556 obj_do_bit17_swizzling = 0; 1557 if (i915_gem_object_needs_bit17_swizzle(obj)) 1558 obj_do_bit17_swizzling = BIT(17); 1559 1560 /* If we don't overwrite a cacheline completely we need to be 1561 * careful to have up-to-date data by first clflushing. Don't 1562 * overcomplicate things and flush the entire patch. 1563 */ 1564 partial_cacheline_write = 0; 1565 if (needs_clflush & CLFLUSH_BEFORE) 1566 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1567 1568 user_data = u64_to_user_ptr(args->data_ptr); 1569 remain = args->size; 1570 offset = offset_in_page(args->offset); 1571 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1572 struct page *page = i915_gem_object_get_page(obj, idx); 1573 int length; 1574 1575 length = remain; 1576 if (offset + length > PAGE_SIZE) 1577 length = PAGE_SIZE - offset; 1578 1579 ret = shmem_pwrite(page, offset, length, user_data, 1580 page_to_phys(page) & obj_do_bit17_swizzling, 1581 (offset | length) & partial_cacheline_write, 1582 needs_clflush & CLFLUSH_AFTER); 1583 if (ret) 1584 break; 1585 1586 remain -= length; 1587 user_data += length; 1588 offset = 0; 1589 } 1590 1591 intel_fb_obj_flush(obj, ORIGIN_CPU); 1592 i915_gem_obj_finish_shmem_access(obj); 1593 return ret; 1594 } 1595 1596 /** 1597 * Writes data to the object referenced by handle. 1598 * @dev: drm device 1599 * @data: ioctl data blob 1600 * @file: drm file 1601 * 1602 * On error, the contents of the buffer that were to be modified are undefined. 1603 */ 1604 int 1605 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1606 struct drm_file *file) 1607 { 1608 struct drm_i915_gem_pwrite *args = data; 1609 struct drm_i915_gem_object *obj; 1610 int ret; 1611 1612 if (args->size == 0) 1613 return 0; 1614 1615 if (!access_ok(VERIFY_READ, 1616 u64_to_user_ptr(args->data_ptr), 1617 args->size)) 1618 return -EFAULT; 1619 1620 obj = i915_gem_object_lookup(file, args->handle); 1621 if (!obj) 1622 return -ENOENT; 1623 1624 /* Bounds check destination. */ 1625 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1626 ret = -EINVAL; 1627 goto err; 1628 } 1629 1630 /* Writes not allowed into this read-only object */ 1631 if (i915_gem_object_is_readonly(obj)) { 1632 ret = -EINVAL; 1633 goto err; 1634 } 1635 1636 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1637 1638 ret = -ENODEV; 1639 if (obj->ops->pwrite) 1640 ret = obj->ops->pwrite(obj, args); 1641 if (ret != -ENODEV) 1642 goto err; 1643 1644 ret = i915_gem_object_wait(obj, 1645 I915_WAIT_INTERRUPTIBLE | 1646 I915_WAIT_ALL, 1647 MAX_SCHEDULE_TIMEOUT, 1648 to_rps_client(file)); 1649 if (ret) 1650 goto err; 1651 1652 ret = i915_gem_object_pin_pages(obj); 1653 if (ret) 1654 goto err; 1655 1656 ret = -EFAULT; 1657 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1658 * it would end up going through the fenced access, and we'll get 1659 * different detiling behavior between reading and writing. 1660 * pread/pwrite currently are reading and writing from the CPU 1661 * perspective, requiring manual detiling by the client. 1662 */ 1663 if (!i915_gem_object_has_struct_page(obj) || 1664 cpu_write_needs_clflush(obj)) 1665 /* Note that the gtt paths might fail with non-page-backed user 1666 * pointers (e.g. gtt mappings when moving data between 1667 * textures). Fallback to the shmem path in that case. 1668 */ 1669 ret = i915_gem_gtt_pwrite_fast(obj, args); 1670 1671 if (ret == -EFAULT || ret == -ENOSPC) { 1672 if (obj->phys_handle) 1673 ret = i915_gem_phys_pwrite(obj, args, file); 1674 else 1675 ret = i915_gem_shmem_pwrite(obj, args); 1676 } 1677 1678 i915_gem_object_unpin_pages(obj); 1679 err: 1680 i915_gem_object_put(obj); 1681 return ret; 1682 } 1683 1684 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1685 { 1686 struct drm_i915_private *i915; 1687 struct list_head *list; 1688 struct i915_vma *vma; 1689 1690 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1691 1692 for_each_ggtt_vma(vma, obj) { 1693 if (i915_vma_is_active(vma)) 1694 continue; 1695 1696 if (!drm_mm_node_allocated(&vma->node)) 1697 continue; 1698 1699 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1700 } 1701 1702 i915 = to_i915(obj->base.dev); 1703 spin_lock(&i915->mm.obj_lock); 1704 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1705 list_move_tail(&obj->mm.link, list); 1706 spin_unlock(&i915->mm.obj_lock); 1707 } 1708 1709 /** 1710 * Called when user space prepares to use an object with the CPU, either 1711 * through the mmap ioctl's mapping or a GTT mapping. 1712 * @dev: drm device 1713 * @data: ioctl data blob 1714 * @file: drm file 1715 */ 1716 int 1717 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1718 struct drm_file *file) 1719 { 1720 struct drm_i915_gem_set_domain *args = data; 1721 struct drm_i915_gem_object *obj; 1722 uint32_t read_domains = args->read_domains; 1723 uint32_t write_domain = args->write_domain; 1724 int err; 1725 1726 /* Only handle setting domains to types used by the CPU. */ 1727 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1728 return -EINVAL; 1729 1730 /* Having something in the write domain implies it's in the read 1731 * domain, and only that read domain. Enforce that in the request. 1732 */ 1733 if (write_domain != 0 && read_domains != write_domain) 1734 return -EINVAL; 1735 1736 obj = i915_gem_object_lookup(file, args->handle); 1737 if (!obj) 1738 return -ENOENT; 1739 1740 /* Try to flush the object off the GPU without holding the lock. 1741 * We will repeat the flush holding the lock in the normal manner 1742 * to catch cases where we are gazumped. 1743 */ 1744 err = i915_gem_object_wait(obj, 1745 I915_WAIT_INTERRUPTIBLE | 1746 (write_domain ? I915_WAIT_ALL : 0), 1747 MAX_SCHEDULE_TIMEOUT, 1748 to_rps_client(file)); 1749 if (err) 1750 goto out; 1751 1752 /* 1753 * Proxy objects do not control access to the backing storage, ergo 1754 * they cannot be used as a means to manipulate the cache domain 1755 * tracking for that backing storage. The proxy object is always 1756 * considered to be outside of any cache domain. 1757 */ 1758 if (i915_gem_object_is_proxy(obj)) { 1759 err = -ENXIO; 1760 goto out; 1761 } 1762 1763 /* 1764 * Flush and acquire obj->pages so that we are coherent through 1765 * direct access in memory with previous cached writes through 1766 * shmemfs and that our cache domain tracking remains valid. 1767 * For example, if the obj->filp was moved to swap without us 1768 * being notified and releasing the pages, we would mistakenly 1769 * continue to assume that the obj remained out of the CPU cached 1770 * domain. 1771 */ 1772 err = i915_gem_object_pin_pages(obj); 1773 if (err) 1774 goto out; 1775 1776 err = i915_mutex_lock_interruptible(dev); 1777 if (err) 1778 goto out_unpin; 1779 1780 if (read_domains & I915_GEM_DOMAIN_WC) 1781 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1782 else if (read_domains & I915_GEM_DOMAIN_GTT) 1783 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1784 else 1785 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1786 1787 /* And bump the LRU for this access */ 1788 i915_gem_object_bump_inactive_ggtt(obj); 1789 1790 mutex_unlock(&dev->struct_mutex); 1791 1792 if (write_domain != 0) 1793 intel_fb_obj_invalidate(obj, 1794 fb_write_origin(obj, write_domain)); 1795 1796 out_unpin: 1797 i915_gem_object_unpin_pages(obj); 1798 out: 1799 i915_gem_object_put(obj); 1800 return err; 1801 } 1802 1803 /** 1804 * Called when user space has done writes to this buffer 1805 * @dev: drm device 1806 * @data: ioctl data blob 1807 * @file: drm file 1808 */ 1809 int 1810 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1811 struct drm_file *file) 1812 { 1813 struct drm_i915_gem_sw_finish *args = data; 1814 struct drm_i915_gem_object *obj; 1815 1816 obj = i915_gem_object_lookup(file, args->handle); 1817 if (!obj) 1818 return -ENOENT; 1819 1820 /* 1821 * Proxy objects are barred from CPU access, so there is no 1822 * need to ban sw_finish as it is a nop. 1823 */ 1824 1825 /* Pinned buffers may be scanout, so flush the cache */ 1826 i915_gem_object_flush_if_display(obj); 1827 i915_gem_object_put(obj); 1828 1829 return 0; 1830 } 1831 1832 /** 1833 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1834 * it is mapped to. 1835 * @dev: drm device 1836 * @data: ioctl data blob 1837 * @file: drm file 1838 * 1839 * While the mapping holds a reference on the contents of the object, it doesn't 1840 * imply a ref on the object itself. 1841 * 1842 * IMPORTANT: 1843 * 1844 * DRM driver writers who look a this function as an example for how to do GEM 1845 * mmap support, please don't implement mmap support like here. The modern way 1846 * to implement DRM mmap support is with an mmap offset ioctl (like 1847 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1848 * That way debug tooling like valgrind will understand what's going on, hiding 1849 * the mmap call in a driver private ioctl will break that. The i915 driver only 1850 * does cpu mmaps this way because we didn't know better. 1851 */ 1852 int 1853 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1854 struct drm_file *file) 1855 { 1856 struct drm_i915_gem_mmap *args = data; 1857 struct drm_i915_gem_object *obj; 1858 unsigned long addr; 1859 1860 if (args->flags & ~(I915_MMAP_WC)) 1861 return -EINVAL; 1862 1863 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1864 return -ENODEV; 1865 1866 obj = i915_gem_object_lookup(file, args->handle); 1867 if (!obj) 1868 return -ENOENT; 1869 1870 /* prime objects have no backing filp to GEM mmap 1871 * pages from. 1872 */ 1873 if (!obj->base.filp) { 1874 i915_gem_object_put(obj); 1875 return -ENXIO; 1876 } 1877 1878 addr = vm_mmap(obj->base.filp, 0, args->size, 1879 PROT_READ | PROT_WRITE, MAP_SHARED, 1880 args->offset); 1881 if (args->flags & I915_MMAP_WC) { 1882 struct mm_struct *mm = current->mm; 1883 struct vm_area_struct *vma; 1884 1885 if (down_write_killable(&mm->mmap_sem)) { 1886 i915_gem_object_put(obj); 1887 return -EINTR; 1888 } 1889 vma = find_vma(mm, addr); 1890 if (vma) 1891 vma->vm_page_prot = 1892 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1893 else 1894 addr = -ENOMEM; 1895 up_write(&mm->mmap_sem); 1896 1897 /* This may race, but that's ok, it only gets set */ 1898 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1899 } 1900 i915_gem_object_put(obj); 1901 if (IS_ERR((void *)addr)) 1902 return addr; 1903 1904 args->addr_ptr = (uint64_t) addr; 1905 1906 return 0; 1907 } 1908 1909 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1910 { 1911 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1912 } 1913 1914 /** 1915 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1916 * 1917 * A history of the GTT mmap interface: 1918 * 1919 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1920 * aligned and suitable for fencing, and still fit into the available 1921 * mappable space left by the pinned display objects. A classic problem 1922 * we called the page-fault-of-doom where we would ping-pong between 1923 * two objects that could not fit inside the GTT and so the memcpy 1924 * would page one object in at the expense of the other between every 1925 * single byte. 1926 * 1927 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1928 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1929 * object is too large for the available space (or simply too large 1930 * for the mappable aperture!), a view is created instead and faulted 1931 * into userspace. (This view is aligned and sized appropriately for 1932 * fenced access.) 1933 * 1934 * 2 - Recognise WC as a separate cache domain so that we can flush the 1935 * delayed writes via GTT before performing direct access via WC. 1936 * 1937 * Restrictions: 1938 * 1939 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1940 * hangs on some architectures, corruption on others. An attempt to service 1941 * a GTT page fault from a snoopable object will generate a SIGBUS. 1942 * 1943 * * the object must be able to fit into RAM (physical memory, though no 1944 * limited to the mappable aperture). 1945 * 1946 * 1947 * Caveats: 1948 * 1949 * * a new GTT page fault will synchronize rendering from the GPU and flush 1950 * all data to system memory. Subsequent access will not be synchronized. 1951 * 1952 * * all mappings are revoked on runtime device suspend. 1953 * 1954 * * there are only 8, 16 or 32 fence registers to share between all users 1955 * (older machines require fence register for display and blitter access 1956 * as well). Contention of the fence registers will cause the previous users 1957 * to be unmapped and any new access will generate new page faults. 1958 * 1959 * * running out of memory while servicing a fault may generate a SIGBUS, 1960 * rather than the expected SIGSEGV. 1961 */ 1962 int i915_gem_mmap_gtt_version(void) 1963 { 1964 return 2; 1965 } 1966 1967 static inline struct i915_ggtt_view 1968 compute_partial_view(struct drm_i915_gem_object *obj, 1969 pgoff_t page_offset, 1970 unsigned int chunk) 1971 { 1972 struct i915_ggtt_view view; 1973 1974 if (i915_gem_object_is_tiled(obj)) 1975 chunk = roundup(chunk, tile_row_pages(obj)); 1976 1977 view.type = I915_GGTT_VIEW_PARTIAL; 1978 view.partial.offset = rounddown(page_offset, chunk); 1979 view.partial.size = 1980 min_t(unsigned int, chunk, 1981 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1982 1983 /* If the partial covers the entire object, just create a normal VMA. */ 1984 if (chunk >= obj->base.size >> PAGE_SHIFT) 1985 view.type = I915_GGTT_VIEW_NORMAL; 1986 1987 return view; 1988 } 1989 1990 /** 1991 * i915_gem_fault - fault a page into the GTT 1992 * @vmf: fault info 1993 * 1994 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1995 * from userspace. The fault handler takes care of binding the object to 1996 * the GTT (if needed), allocating and programming a fence register (again, 1997 * only if needed based on whether the old reg is still valid or the object 1998 * is tiled) and inserting a new PTE into the faulting process. 1999 * 2000 * Note that the faulting process may involve evicting existing objects 2001 * from the GTT and/or fence registers to make room. So performance may 2002 * suffer if the GTT working set is large or there are few fence registers 2003 * left. 2004 * 2005 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2006 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2007 */ 2008 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2009 { 2010 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2011 struct vm_area_struct *area = vmf->vma; 2012 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2013 struct drm_device *dev = obj->base.dev; 2014 struct drm_i915_private *dev_priv = to_i915(dev); 2015 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2016 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2017 struct i915_vma *vma; 2018 pgoff_t page_offset; 2019 int ret; 2020 2021 /* Sanity check that we allow writing into this object */ 2022 if (i915_gem_object_is_readonly(obj) && write) 2023 return VM_FAULT_SIGBUS; 2024 2025 /* We don't use vmf->pgoff since that has the fake offset */ 2026 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2027 2028 trace_i915_gem_object_fault(obj, page_offset, true, write); 2029 2030 /* Try to flush the object off the GPU first without holding the lock. 2031 * Upon acquiring the lock, we will perform our sanity checks and then 2032 * repeat the flush holding the lock in the normal manner to catch cases 2033 * where we are gazumped. 2034 */ 2035 ret = i915_gem_object_wait(obj, 2036 I915_WAIT_INTERRUPTIBLE, 2037 MAX_SCHEDULE_TIMEOUT, 2038 NULL); 2039 if (ret) 2040 goto err; 2041 2042 ret = i915_gem_object_pin_pages(obj); 2043 if (ret) 2044 goto err; 2045 2046 intel_runtime_pm_get(dev_priv); 2047 2048 ret = i915_mutex_lock_interruptible(dev); 2049 if (ret) 2050 goto err_rpm; 2051 2052 /* Access to snoopable pages through the GTT is incoherent. */ 2053 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2054 ret = -EFAULT; 2055 goto err_unlock; 2056 } 2057 2058 2059 /* Now pin it into the GTT as needed */ 2060 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2061 PIN_MAPPABLE | 2062 PIN_NONBLOCK | 2063 PIN_NONFAULT); 2064 if (IS_ERR(vma)) { 2065 /* Use a partial view if it is bigger than available space */ 2066 struct i915_ggtt_view view = 2067 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2068 unsigned int flags; 2069 2070 flags = PIN_MAPPABLE; 2071 if (view.type == I915_GGTT_VIEW_NORMAL) 2072 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2073 2074 /* 2075 * Userspace is now writing through an untracked VMA, abandon 2076 * all hope that the hardware is able to track future writes. 2077 */ 2078 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2079 2080 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2081 if (IS_ERR(vma) && !view.type) { 2082 flags = PIN_MAPPABLE; 2083 view.type = I915_GGTT_VIEW_PARTIAL; 2084 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2085 } 2086 } 2087 if (IS_ERR(vma)) { 2088 ret = PTR_ERR(vma); 2089 goto err_unlock; 2090 } 2091 2092 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2093 if (ret) 2094 goto err_unpin; 2095 2096 ret = i915_vma_pin_fence(vma); 2097 if (ret) 2098 goto err_unpin; 2099 2100 /* Finally, remap it using the new GTT offset */ 2101 ret = remap_io_mapping(area, 2102 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2103 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2104 min_t(u64, vma->size, area->vm_end - area->vm_start), 2105 &ggtt->iomap); 2106 if (ret) 2107 goto err_fence; 2108 2109 /* Mark as being mmapped into userspace for later revocation */ 2110 assert_rpm_wakelock_held(dev_priv); 2111 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2112 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2113 GEM_BUG_ON(!obj->userfault_count); 2114 2115 i915_vma_set_ggtt_write(vma); 2116 2117 err_fence: 2118 i915_vma_unpin_fence(vma); 2119 err_unpin: 2120 __i915_vma_unpin(vma); 2121 err_unlock: 2122 mutex_unlock(&dev->struct_mutex); 2123 err_rpm: 2124 intel_runtime_pm_put(dev_priv); 2125 i915_gem_object_unpin_pages(obj); 2126 err: 2127 switch (ret) { 2128 case -EIO: 2129 /* 2130 * We eat errors when the gpu is terminally wedged to avoid 2131 * userspace unduly crashing (gl has no provisions for mmaps to 2132 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2133 * and so needs to be reported. 2134 */ 2135 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2136 return VM_FAULT_SIGBUS; 2137 /* else: fall through */ 2138 case -EAGAIN: 2139 /* 2140 * EAGAIN means the gpu is hung and we'll wait for the error 2141 * handler to reset everything when re-faulting in 2142 * i915_mutex_lock_interruptible. 2143 */ 2144 case 0: 2145 case -ERESTARTSYS: 2146 case -EINTR: 2147 case -EBUSY: 2148 /* 2149 * EBUSY is ok: this just means that another thread 2150 * already did the job. 2151 */ 2152 return VM_FAULT_NOPAGE; 2153 case -ENOMEM: 2154 return VM_FAULT_OOM; 2155 case -ENOSPC: 2156 case -EFAULT: 2157 return VM_FAULT_SIGBUS; 2158 default: 2159 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2160 return VM_FAULT_SIGBUS; 2161 } 2162 } 2163 2164 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2165 { 2166 struct i915_vma *vma; 2167 2168 GEM_BUG_ON(!obj->userfault_count); 2169 2170 obj->userfault_count = 0; 2171 list_del(&obj->userfault_link); 2172 drm_vma_node_unmap(&obj->base.vma_node, 2173 obj->base.dev->anon_inode->i_mapping); 2174 2175 for_each_ggtt_vma(vma, obj) 2176 i915_vma_unset_userfault(vma); 2177 } 2178 2179 /** 2180 * i915_gem_release_mmap - remove physical page mappings 2181 * @obj: obj in question 2182 * 2183 * Preserve the reservation of the mmapping with the DRM core code, but 2184 * relinquish ownership of the pages back to the system. 2185 * 2186 * It is vital that we remove the page mapping if we have mapped a tiled 2187 * object through the GTT and then lose the fence register due to 2188 * resource pressure. Similarly if the object has been moved out of the 2189 * aperture, than pages mapped into userspace must be revoked. Removing the 2190 * mapping will then trigger a page fault on the next user access, allowing 2191 * fixup by i915_gem_fault(). 2192 */ 2193 void 2194 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2195 { 2196 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2197 2198 /* Serialisation between user GTT access and our code depends upon 2199 * revoking the CPU's PTE whilst the mutex is held. The next user 2200 * pagefault then has to wait until we release the mutex. 2201 * 2202 * Note that RPM complicates somewhat by adding an additional 2203 * requirement that operations to the GGTT be made holding the RPM 2204 * wakeref. 2205 */ 2206 lockdep_assert_held(&i915->drm.struct_mutex); 2207 intel_runtime_pm_get(i915); 2208 2209 if (!obj->userfault_count) 2210 goto out; 2211 2212 __i915_gem_object_release_mmap(obj); 2213 2214 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2215 * memory transactions from userspace before we return. The TLB 2216 * flushing implied above by changing the PTE above *should* be 2217 * sufficient, an extra barrier here just provides us with a bit 2218 * of paranoid documentation about our requirement to serialise 2219 * memory writes before touching registers / GSM. 2220 */ 2221 wmb(); 2222 2223 out: 2224 intel_runtime_pm_put(i915); 2225 } 2226 2227 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2228 { 2229 struct drm_i915_gem_object *obj, *on; 2230 int i; 2231 2232 /* 2233 * Only called during RPM suspend. All users of the userfault_list 2234 * must be holding an RPM wakeref to ensure that this can not 2235 * run concurrently with themselves (and use the struct_mutex for 2236 * protection between themselves). 2237 */ 2238 2239 list_for_each_entry_safe(obj, on, 2240 &dev_priv->mm.userfault_list, userfault_link) 2241 __i915_gem_object_release_mmap(obj); 2242 2243 /* The fence will be lost when the device powers down. If any were 2244 * in use by hardware (i.e. they are pinned), we should not be powering 2245 * down! All other fences will be reacquired by the user upon waking. 2246 */ 2247 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2248 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2249 2250 /* Ideally we want to assert that the fence register is not 2251 * live at this point (i.e. that no piece of code will be 2252 * trying to write through fence + GTT, as that both violates 2253 * our tracking of activity and associated locking/barriers, 2254 * but also is illegal given that the hw is powered down). 2255 * 2256 * Previously we used reg->pin_count as a "liveness" indicator. 2257 * That is not sufficient, and we need a more fine-grained 2258 * tool if we want to have a sanity check here. 2259 */ 2260 2261 if (!reg->vma) 2262 continue; 2263 2264 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2265 reg->dirty = true; 2266 } 2267 } 2268 2269 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2270 { 2271 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2272 int err; 2273 2274 err = drm_gem_create_mmap_offset(&obj->base); 2275 if (likely(!err)) 2276 return 0; 2277 2278 /* Attempt to reap some mmap space from dead objects */ 2279 do { 2280 err = i915_gem_wait_for_idle(dev_priv, 2281 I915_WAIT_INTERRUPTIBLE, 2282 MAX_SCHEDULE_TIMEOUT); 2283 if (err) 2284 break; 2285 2286 i915_gem_drain_freed_objects(dev_priv); 2287 err = drm_gem_create_mmap_offset(&obj->base); 2288 if (!err) 2289 break; 2290 2291 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2292 2293 return err; 2294 } 2295 2296 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2297 { 2298 drm_gem_free_mmap_offset(&obj->base); 2299 } 2300 2301 int 2302 i915_gem_mmap_gtt(struct drm_file *file, 2303 struct drm_device *dev, 2304 uint32_t handle, 2305 uint64_t *offset) 2306 { 2307 struct drm_i915_gem_object *obj; 2308 int ret; 2309 2310 obj = i915_gem_object_lookup(file, handle); 2311 if (!obj) 2312 return -ENOENT; 2313 2314 ret = i915_gem_object_create_mmap_offset(obj); 2315 if (ret == 0) 2316 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2317 2318 i915_gem_object_put(obj); 2319 return ret; 2320 } 2321 2322 /** 2323 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2324 * @dev: DRM device 2325 * @data: GTT mapping ioctl data 2326 * @file: GEM object info 2327 * 2328 * Simply returns the fake offset to userspace so it can mmap it. 2329 * The mmap call will end up in drm_gem_mmap(), which will set things 2330 * up so we can get faults in the handler above. 2331 * 2332 * The fault handler will take care of binding the object into the GTT 2333 * (since it may have been evicted to make room for something), allocating 2334 * a fence register, and mapping the appropriate aperture address into 2335 * userspace. 2336 */ 2337 int 2338 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2339 struct drm_file *file) 2340 { 2341 struct drm_i915_gem_mmap_gtt *args = data; 2342 2343 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2344 } 2345 2346 /* Immediately discard the backing storage */ 2347 static void 2348 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2349 { 2350 i915_gem_object_free_mmap_offset(obj); 2351 2352 if (obj->base.filp == NULL) 2353 return; 2354 2355 /* Our goal here is to return as much of the memory as 2356 * is possible back to the system as we are called from OOM. 2357 * To do this we must instruct the shmfs to drop all of its 2358 * backing pages, *now*. 2359 */ 2360 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2361 obj->mm.madv = __I915_MADV_PURGED; 2362 obj->mm.pages = ERR_PTR(-EFAULT); 2363 } 2364 2365 /* Try to discard unwanted pages */ 2366 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2367 { 2368 struct address_space *mapping; 2369 2370 lockdep_assert_held(&obj->mm.lock); 2371 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2372 2373 switch (obj->mm.madv) { 2374 case I915_MADV_DONTNEED: 2375 i915_gem_object_truncate(obj); 2376 case __I915_MADV_PURGED: 2377 return; 2378 } 2379 2380 if (obj->base.filp == NULL) 2381 return; 2382 2383 mapping = obj->base.filp->f_mapping, 2384 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2385 } 2386 2387 static void 2388 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2389 struct sg_table *pages) 2390 { 2391 struct sgt_iter sgt_iter; 2392 struct page *page; 2393 2394 __i915_gem_object_release_shmem(obj, pages, true); 2395 2396 i915_gem_gtt_finish_pages(obj, pages); 2397 2398 if (i915_gem_object_needs_bit17_swizzle(obj)) 2399 i915_gem_object_save_bit_17_swizzle(obj, pages); 2400 2401 for_each_sgt_page(page, sgt_iter, pages) { 2402 if (obj->mm.dirty) 2403 set_page_dirty(page); 2404 2405 if (obj->mm.madv == I915_MADV_WILLNEED) 2406 mark_page_accessed(page); 2407 2408 put_page(page); 2409 } 2410 obj->mm.dirty = false; 2411 2412 sg_free_table(pages); 2413 kfree(pages); 2414 } 2415 2416 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2417 { 2418 struct radix_tree_iter iter; 2419 void __rcu **slot; 2420 2421 rcu_read_lock(); 2422 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2423 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2424 rcu_read_unlock(); 2425 } 2426 2427 static struct sg_table * 2428 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2429 { 2430 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2431 struct sg_table *pages; 2432 2433 pages = fetch_and_zero(&obj->mm.pages); 2434 if (!pages) 2435 return NULL; 2436 2437 spin_lock(&i915->mm.obj_lock); 2438 list_del(&obj->mm.link); 2439 spin_unlock(&i915->mm.obj_lock); 2440 2441 if (obj->mm.mapping) { 2442 void *ptr; 2443 2444 ptr = page_mask_bits(obj->mm.mapping); 2445 if (is_vmalloc_addr(ptr)) 2446 vunmap(ptr); 2447 else 2448 kunmap(kmap_to_page(ptr)); 2449 2450 obj->mm.mapping = NULL; 2451 } 2452 2453 __i915_gem_object_reset_page_iter(obj); 2454 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2455 2456 return pages; 2457 } 2458 2459 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2460 enum i915_mm_subclass subclass) 2461 { 2462 struct sg_table *pages; 2463 2464 if (i915_gem_object_has_pinned_pages(obj)) 2465 return; 2466 2467 GEM_BUG_ON(obj->bind_count); 2468 if (!i915_gem_object_has_pages(obj)) 2469 return; 2470 2471 /* May be called by shrinker from within get_pages() (on another bo) */ 2472 mutex_lock_nested(&obj->mm.lock, subclass); 2473 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2474 goto unlock; 2475 2476 /* 2477 * ->put_pages might need to allocate memory for the bit17 swizzle 2478 * array, hence protect them from being reaped by removing them from gtt 2479 * lists early. 2480 */ 2481 pages = __i915_gem_object_unset_pages(obj); 2482 if (!IS_ERR(pages)) 2483 obj->ops->put_pages(obj, pages); 2484 2485 unlock: 2486 mutex_unlock(&obj->mm.lock); 2487 } 2488 2489 static bool i915_sg_trim(struct sg_table *orig_st) 2490 { 2491 struct sg_table new_st; 2492 struct scatterlist *sg, *new_sg; 2493 unsigned int i; 2494 2495 if (orig_st->nents == orig_st->orig_nents) 2496 return false; 2497 2498 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2499 return false; 2500 2501 new_sg = new_st.sgl; 2502 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2503 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2504 /* called before being DMA mapped, no need to copy sg->dma_* */ 2505 new_sg = sg_next(new_sg); 2506 } 2507 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2508 2509 sg_free_table(orig_st); 2510 2511 *orig_st = new_st; 2512 return true; 2513 } 2514 2515 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2516 { 2517 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2518 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2519 unsigned long i; 2520 struct address_space *mapping; 2521 struct sg_table *st; 2522 struct scatterlist *sg; 2523 struct sgt_iter sgt_iter; 2524 struct page *page; 2525 unsigned long last_pfn = 0; /* suppress gcc warning */ 2526 unsigned int max_segment = i915_sg_segment_size(); 2527 unsigned int sg_page_sizes; 2528 gfp_t noreclaim; 2529 int ret; 2530 2531 /* Assert that the object is not currently in any GPU domain. As it 2532 * wasn't in the GTT, there shouldn't be any way it could have been in 2533 * a GPU cache 2534 */ 2535 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2536 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2537 2538 st = kmalloc(sizeof(*st), GFP_KERNEL); 2539 if (st == NULL) 2540 return -ENOMEM; 2541 2542 rebuild_st: 2543 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2544 kfree(st); 2545 return -ENOMEM; 2546 } 2547 2548 /* Get the list of pages out of our struct file. They'll be pinned 2549 * at this point until we release them. 2550 * 2551 * Fail silently without starting the shrinker 2552 */ 2553 mapping = obj->base.filp->f_mapping; 2554 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2555 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2556 2557 sg = st->sgl; 2558 st->nents = 0; 2559 sg_page_sizes = 0; 2560 for (i = 0; i < page_count; i++) { 2561 const unsigned int shrink[] = { 2562 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2563 0, 2564 }, *s = shrink; 2565 gfp_t gfp = noreclaim; 2566 2567 do { 2568 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2569 if (likely(!IS_ERR(page))) 2570 break; 2571 2572 if (!*s) { 2573 ret = PTR_ERR(page); 2574 goto err_sg; 2575 } 2576 2577 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2578 cond_resched(); 2579 2580 /* We've tried hard to allocate the memory by reaping 2581 * our own buffer, now let the real VM do its job and 2582 * go down in flames if truly OOM. 2583 * 2584 * However, since graphics tend to be disposable, 2585 * defer the oom here by reporting the ENOMEM back 2586 * to userspace. 2587 */ 2588 if (!*s) { 2589 /* reclaim and warn, but no oom */ 2590 gfp = mapping_gfp_mask(mapping); 2591 2592 /* Our bo are always dirty and so we require 2593 * kswapd to reclaim our pages (direct reclaim 2594 * does not effectively begin pageout of our 2595 * buffers on its own). However, direct reclaim 2596 * only waits for kswapd when under allocation 2597 * congestion. So as a result __GFP_RECLAIM is 2598 * unreliable and fails to actually reclaim our 2599 * dirty pages -- unless you try over and over 2600 * again with !__GFP_NORETRY. However, we still 2601 * want to fail this allocation rather than 2602 * trigger the out-of-memory killer and for 2603 * this we want __GFP_RETRY_MAYFAIL. 2604 */ 2605 gfp |= __GFP_RETRY_MAYFAIL; 2606 } 2607 } while (1); 2608 2609 if (!i || 2610 sg->length >= max_segment || 2611 page_to_pfn(page) != last_pfn + 1) { 2612 if (i) { 2613 sg_page_sizes |= sg->length; 2614 sg = sg_next(sg); 2615 } 2616 st->nents++; 2617 sg_set_page(sg, page, PAGE_SIZE, 0); 2618 } else { 2619 sg->length += PAGE_SIZE; 2620 } 2621 last_pfn = page_to_pfn(page); 2622 2623 /* Check that the i965g/gm workaround works. */ 2624 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2625 } 2626 if (sg) { /* loop terminated early; short sg table */ 2627 sg_page_sizes |= sg->length; 2628 sg_mark_end(sg); 2629 } 2630 2631 /* Trim unused sg entries to avoid wasting memory. */ 2632 i915_sg_trim(st); 2633 2634 ret = i915_gem_gtt_prepare_pages(obj, st); 2635 if (ret) { 2636 /* DMA remapping failed? One possible cause is that 2637 * it could not reserve enough large entries, asking 2638 * for PAGE_SIZE chunks instead may be helpful. 2639 */ 2640 if (max_segment > PAGE_SIZE) { 2641 for_each_sgt_page(page, sgt_iter, st) 2642 put_page(page); 2643 sg_free_table(st); 2644 2645 max_segment = PAGE_SIZE; 2646 goto rebuild_st; 2647 } else { 2648 dev_warn(&dev_priv->drm.pdev->dev, 2649 "Failed to DMA remap %lu pages\n", 2650 page_count); 2651 goto err_pages; 2652 } 2653 } 2654 2655 if (i915_gem_object_needs_bit17_swizzle(obj)) 2656 i915_gem_object_do_bit_17_swizzle(obj, st); 2657 2658 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2659 2660 return 0; 2661 2662 err_sg: 2663 sg_mark_end(sg); 2664 err_pages: 2665 for_each_sgt_page(page, sgt_iter, st) 2666 put_page(page); 2667 sg_free_table(st); 2668 kfree(st); 2669 2670 /* shmemfs first checks if there is enough memory to allocate the page 2671 * and reports ENOSPC should there be insufficient, along with the usual 2672 * ENOMEM for a genuine allocation failure. 2673 * 2674 * We use ENOSPC in our driver to mean that we have run out of aperture 2675 * space and so want to translate the error from shmemfs back to our 2676 * usual understanding of ENOMEM. 2677 */ 2678 if (ret == -ENOSPC) 2679 ret = -ENOMEM; 2680 2681 return ret; 2682 } 2683 2684 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2685 struct sg_table *pages, 2686 unsigned int sg_page_sizes) 2687 { 2688 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2689 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2690 int i; 2691 2692 lockdep_assert_held(&obj->mm.lock); 2693 2694 obj->mm.get_page.sg_pos = pages->sgl; 2695 obj->mm.get_page.sg_idx = 0; 2696 2697 obj->mm.pages = pages; 2698 2699 if (i915_gem_object_is_tiled(obj) && 2700 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2701 GEM_BUG_ON(obj->mm.quirked); 2702 __i915_gem_object_pin_pages(obj); 2703 obj->mm.quirked = true; 2704 } 2705 2706 GEM_BUG_ON(!sg_page_sizes); 2707 obj->mm.page_sizes.phys = sg_page_sizes; 2708 2709 /* 2710 * Calculate the supported page-sizes which fit into the given 2711 * sg_page_sizes. This will give us the page-sizes which we may be able 2712 * to use opportunistically when later inserting into the GTT. For 2713 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2714 * 64K or 4K pages, although in practice this will depend on a number of 2715 * other factors. 2716 */ 2717 obj->mm.page_sizes.sg = 0; 2718 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2719 if (obj->mm.page_sizes.phys & ~0u << i) 2720 obj->mm.page_sizes.sg |= BIT(i); 2721 } 2722 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2723 2724 spin_lock(&i915->mm.obj_lock); 2725 list_add(&obj->mm.link, &i915->mm.unbound_list); 2726 spin_unlock(&i915->mm.obj_lock); 2727 } 2728 2729 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2730 { 2731 int err; 2732 2733 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2734 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2735 return -EFAULT; 2736 } 2737 2738 err = obj->ops->get_pages(obj); 2739 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2740 2741 return err; 2742 } 2743 2744 /* Ensure that the associated pages are gathered from the backing storage 2745 * and pinned into our object. i915_gem_object_pin_pages() may be called 2746 * multiple times before they are released by a single call to 2747 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2748 * either as a result of memory pressure (reaping pages under the shrinker) 2749 * or as the object is itself released. 2750 */ 2751 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2752 { 2753 int err; 2754 2755 err = mutex_lock_interruptible(&obj->mm.lock); 2756 if (err) 2757 return err; 2758 2759 if (unlikely(!i915_gem_object_has_pages(obj))) { 2760 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2761 2762 err = ____i915_gem_object_get_pages(obj); 2763 if (err) 2764 goto unlock; 2765 2766 smp_mb__before_atomic(); 2767 } 2768 atomic_inc(&obj->mm.pages_pin_count); 2769 2770 unlock: 2771 mutex_unlock(&obj->mm.lock); 2772 return err; 2773 } 2774 2775 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2776 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2777 enum i915_map_type type) 2778 { 2779 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2780 struct sg_table *sgt = obj->mm.pages; 2781 struct sgt_iter sgt_iter; 2782 struct page *page; 2783 struct page *stack_pages[32]; 2784 struct page **pages = stack_pages; 2785 unsigned long i = 0; 2786 pgprot_t pgprot; 2787 void *addr; 2788 2789 /* A single page can always be kmapped */ 2790 if (n_pages == 1 && type == I915_MAP_WB) 2791 return kmap(sg_page(sgt->sgl)); 2792 2793 if (n_pages > ARRAY_SIZE(stack_pages)) { 2794 /* Too big for stack -- allocate temporary array instead */ 2795 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2796 if (!pages) 2797 return NULL; 2798 } 2799 2800 for_each_sgt_page(page, sgt_iter, sgt) 2801 pages[i++] = page; 2802 2803 /* Check that we have the expected number of pages */ 2804 GEM_BUG_ON(i != n_pages); 2805 2806 switch (type) { 2807 default: 2808 MISSING_CASE(type); 2809 /* fallthrough to use PAGE_KERNEL anyway */ 2810 case I915_MAP_WB: 2811 pgprot = PAGE_KERNEL; 2812 break; 2813 case I915_MAP_WC: 2814 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2815 break; 2816 } 2817 addr = vmap(pages, n_pages, 0, pgprot); 2818 2819 if (pages != stack_pages) 2820 kvfree(pages); 2821 2822 return addr; 2823 } 2824 2825 /* get, pin, and map the pages of the object into kernel space */ 2826 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2827 enum i915_map_type type) 2828 { 2829 enum i915_map_type has_type; 2830 bool pinned; 2831 void *ptr; 2832 int ret; 2833 2834 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2835 return ERR_PTR(-ENXIO); 2836 2837 ret = mutex_lock_interruptible(&obj->mm.lock); 2838 if (ret) 2839 return ERR_PTR(ret); 2840 2841 pinned = !(type & I915_MAP_OVERRIDE); 2842 type &= ~I915_MAP_OVERRIDE; 2843 2844 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2845 if (unlikely(!i915_gem_object_has_pages(obj))) { 2846 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2847 2848 ret = ____i915_gem_object_get_pages(obj); 2849 if (ret) 2850 goto err_unlock; 2851 2852 smp_mb__before_atomic(); 2853 } 2854 atomic_inc(&obj->mm.pages_pin_count); 2855 pinned = false; 2856 } 2857 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2858 2859 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2860 if (ptr && has_type != type) { 2861 if (pinned) { 2862 ret = -EBUSY; 2863 goto err_unpin; 2864 } 2865 2866 if (is_vmalloc_addr(ptr)) 2867 vunmap(ptr); 2868 else 2869 kunmap(kmap_to_page(ptr)); 2870 2871 ptr = obj->mm.mapping = NULL; 2872 } 2873 2874 if (!ptr) { 2875 ptr = i915_gem_object_map(obj, type); 2876 if (!ptr) { 2877 ret = -ENOMEM; 2878 goto err_unpin; 2879 } 2880 2881 obj->mm.mapping = page_pack_bits(ptr, type); 2882 } 2883 2884 out_unlock: 2885 mutex_unlock(&obj->mm.lock); 2886 return ptr; 2887 2888 err_unpin: 2889 atomic_dec(&obj->mm.pages_pin_count); 2890 err_unlock: 2891 ptr = ERR_PTR(ret); 2892 goto out_unlock; 2893 } 2894 2895 static int 2896 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2897 const struct drm_i915_gem_pwrite *arg) 2898 { 2899 struct address_space *mapping = obj->base.filp->f_mapping; 2900 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2901 u64 remain, offset; 2902 unsigned int pg; 2903 2904 /* Before we instantiate/pin the backing store for our use, we 2905 * can prepopulate the shmemfs filp efficiently using a write into 2906 * the pagecache. We avoid the penalty of instantiating all the 2907 * pages, important if the user is just writing to a few and never 2908 * uses the object on the GPU, and using a direct write into shmemfs 2909 * allows it to avoid the cost of retrieving a page (either swapin 2910 * or clearing-before-use) before it is overwritten. 2911 */ 2912 if (i915_gem_object_has_pages(obj)) 2913 return -ENODEV; 2914 2915 if (obj->mm.madv != I915_MADV_WILLNEED) 2916 return -EFAULT; 2917 2918 /* Before the pages are instantiated the object is treated as being 2919 * in the CPU domain. The pages will be clflushed as required before 2920 * use, and we can freely write into the pages directly. If userspace 2921 * races pwrite with any other operation; corruption will ensue - 2922 * that is userspace's prerogative! 2923 */ 2924 2925 remain = arg->size; 2926 offset = arg->offset; 2927 pg = offset_in_page(offset); 2928 2929 do { 2930 unsigned int len, unwritten; 2931 struct page *page; 2932 void *data, *vaddr; 2933 int err; 2934 2935 len = PAGE_SIZE - pg; 2936 if (len > remain) 2937 len = remain; 2938 2939 err = pagecache_write_begin(obj->base.filp, mapping, 2940 offset, len, 0, 2941 &page, &data); 2942 if (err < 0) 2943 return err; 2944 2945 vaddr = kmap(page); 2946 unwritten = copy_from_user(vaddr + pg, user_data, len); 2947 kunmap(page); 2948 2949 err = pagecache_write_end(obj->base.filp, mapping, 2950 offset, len, len - unwritten, 2951 page, data); 2952 if (err < 0) 2953 return err; 2954 2955 if (unwritten) 2956 return -EFAULT; 2957 2958 remain -= len; 2959 user_data += len; 2960 offset += len; 2961 pg = 0; 2962 } while (remain); 2963 2964 return 0; 2965 } 2966 2967 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 2968 const struct i915_gem_context *ctx) 2969 { 2970 unsigned int score; 2971 unsigned long prev_hang; 2972 2973 if (i915_gem_context_is_banned(ctx)) 2974 score = I915_CLIENT_SCORE_CONTEXT_BAN; 2975 else 2976 score = 0; 2977 2978 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 2979 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 2980 score += I915_CLIENT_SCORE_HANG_FAST; 2981 2982 if (score) { 2983 atomic_add(score, &file_priv->ban_score); 2984 2985 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 2986 ctx->name, score, 2987 atomic_read(&file_priv->ban_score)); 2988 } 2989 } 2990 2991 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2992 { 2993 unsigned int score; 2994 bool banned, bannable; 2995 2996 atomic_inc(&ctx->guilty_count); 2997 2998 bannable = i915_gem_context_is_bannable(ctx); 2999 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3000 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3001 3002 /* Cool contexts don't accumulate client ban score */ 3003 if (!bannable) 3004 return; 3005 3006 if (banned) { 3007 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3008 ctx->name, atomic_read(&ctx->guilty_count), 3009 score); 3010 i915_gem_context_set_banned(ctx); 3011 } 3012 3013 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3014 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3015 } 3016 3017 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3018 { 3019 atomic_inc(&ctx->active_count); 3020 } 3021 3022 struct i915_request * 3023 i915_gem_find_active_request(struct intel_engine_cs *engine) 3024 { 3025 struct i915_request *request, *active = NULL; 3026 unsigned long flags; 3027 3028 /* 3029 * We are called by the error capture, reset and to dump engine 3030 * state at random points in time. In particular, note that neither is 3031 * crucially ordered with an interrupt. After a hang, the GPU is dead 3032 * and we assume that no more writes can happen (we waited long enough 3033 * for all writes that were in transaction to be flushed) - adding an 3034 * extra delay for a recent interrupt is pointless. Hence, we do 3035 * not need an engine->irq_seqno_barrier() before the seqno reads. 3036 * At all other times, we must assume the GPU is still running, but 3037 * we only care about the snapshot of this moment. 3038 */ 3039 spin_lock_irqsave(&engine->timeline.lock, flags); 3040 list_for_each_entry(request, &engine->timeline.requests, link) { 3041 if (__i915_request_completed(request, request->global_seqno)) 3042 continue; 3043 3044 active = request; 3045 break; 3046 } 3047 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3048 3049 return active; 3050 } 3051 3052 /* 3053 * Ensure irq handler finishes, and not run again. 3054 * Also return the active request so that we only search for it once. 3055 */ 3056 struct i915_request * 3057 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3058 { 3059 struct i915_request *request; 3060 3061 /* 3062 * During the reset sequence, we must prevent the engine from 3063 * entering RC6. As the context state is undefined until we restart 3064 * the engine, if it does enter RC6 during the reset, the state 3065 * written to the powercontext is undefined and so we may lose 3066 * GPU state upon resume, i.e. fail to restart after a reset. 3067 */ 3068 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3069 3070 request = engine->reset.prepare(engine); 3071 if (request && request->fence.error == -EIO) 3072 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3073 3074 return request; 3075 } 3076 3077 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3078 { 3079 struct intel_engine_cs *engine; 3080 struct i915_request *request; 3081 enum intel_engine_id id; 3082 int err = 0; 3083 3084 for_each_engine(engine, dev_priv, id) { 3085 request = i915_gem_reset_prepare_engine(engine); 3086 if (IS_ERR(request)) { 3087 err = PTR_ERR(request); 3088 continue; 3089 } 3090 3091 engine->hangcheck.active_request = request; 3092 } 3093 3094 i915_gem_revoke_fences(dev_priv); 3095 intel_uc_sanitize(dev_priv); 3096 3097 return err; 3098 } 3099 3100 static void engine_skip_context(struct i915_request *request) 3101 { 3102 struct intel_engine_cs *engine = request->engine; 3103 struct i915_gem_context *hung_ctx = request->gem_context; 3104 struct i915_timeline *timeline = request->timeline; 3105 unsigned long flags; 3106 3107 GEM_BUG_ON(timeline == &engine->timeline); 3108 3109 spin_lock_irqsave(&engine->timeline.lock, flags); 3110 spin_lock(&timeline->lock); 3111 3112 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3113 if (request->gem_context == hung_ctx) 3114 i915_request_skip(request, -EIO); 3115 3116 list_for_each_entry(request, &timeline->requests, link) 3117 i915_request_skip(request, -EIO); 3118 3119 spin_unlock(&timeline->lock); 3120 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3121 } 3122 3123 /* Returns the request if it was guilty of the hang */ 3124 static struct i915_request * 3125 i915_gem_reset_request(struct intel_engine_cs *engine, 3126 struct i915_request *request, 3127 bool stalled) 3128 { 3129 /* The guilty request will get skipped on a hung engine. 3130 * 3131 * Users of client default contexts do not rely on logical 3132 * state preserved between batches so it is safe to execute 3133 * queued requests following the hang. Non default contexts 3134 * rely on preserved state, so skipping a batch loses the 3135 * evolution of the state and it needs to be considered corrupted. 3136 * Executing more queued batches on top of corrupted state is 3137 * risky. But we take the risk by trying to advance through 3138 * the queued requests in order to make the client behaviour 3139 * more predictable around resets, by not throwing away random 3140 * amount of batches it has prepared for execution. Sophisticated 3141 * clients can use gem_reset_stats_ioctl and dma fence status 3142 * (exported via sync_file info ioctl on explicit fences) to observe 3143 * when it loses the context state and should rebuild accordingly. 3144 * 3145 * The context ban, and ultimately the client ban, mechanism are safety 3146 * valves if client submission ends up resulting in nothing more than 3147 * subsequent hangs. 3148 */ 3149 3150 if (i915_request_completed(request)) { 3151 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3152 engine->name, request->global_seqno, 3153 request->fence.context, request->fence.seqno, 3154 intel_engine_get_seqno(engine)); 3155 stalled = false; 3156 } 3157 3158 if (stalled) { 3159 i915_gem_context_mark_guilty(request->gem_context); 3160 i915_request_skip(request, -EIO); 3161 3162 /* If this context is now banned, skip all pending requests. */ 3163 if (i915_gem_context_is_banned(request->gem_context)) 3164 engine_skip_context(request); 3165 } else { 3166 /* 3167 * Since this is not the hung engine, it may have advanced 3168 * since the hang declaration. Double check by refinding 3169 * the active request at the time of the reset. 3170 */ 3171 request = i915_gem_find_active_request(engine); 3172 if (request) { 3173 unsigned long flags; 3174 3175 i915_gem_context_mark_innocent(request->gem_context); 3176 dma_fence_set_error(&request->fence, -EAGAIN); 3177 3178 /* Rewind the engine to replay the incomplete rq */ 3179 spin_lock_irqsave(&engine->timeline.lock, flags); 3180 request = list_prev_entry(request, link); 3181 if (&request->link == &engine->timeline.requests) 3182 request = NULL; 3183 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3184 } 3185 } 3186 3187 return request; 3188 } 3189 3190 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3191 struct i915_request *request, 3192 bool stalled) 3193 { 3194 /* 3195 * Make sure this write is visible before we re-enable the interrupt 3196 * handlers on another CPU, as tasklet_enable() resolves to just 3197 * a compiler barrier which is insufficient for our purpose here. 3198 */ 3199 smp_store_mb(engine->irq_posted, 0); 3200 3201 if (request) 3202 request = i915_gem_reset_request(engine, request, stalled); 3203 3204 /* Setup the CS to resume from the breadcrumb of the hung request */ 3205 engine->reset.reset(engine, request); 3206 } 3207 3208 void i915_gem_reset(struct drm_i915_private *dev_priv, 3209 unsigned int stalled_mask) 3210 { 3211 struct intel_engine_cs *engine; 3212 enum intel_engine_id id; 3213 3214 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3215 3216 i915_retire_requests(dev_priv); 3217 3218 for_each_engine(engine, dev_priv, id) { 3219 struct intel_context *ce; 3220 3221 i915_gem_reset_engine(engine, 3222 engine->hangcheck.active_request, 3223 stalled_mask & ENGINE_MASK(id)); 3224 ce = fetch_and_zero(&engine->last_retired_context); 3225 if (ce) 3226 intel_context_unpin(ce); 3227 3228 /* 3229 * Ostensibily, we always want a context loaded for powersaving, 3230 * so if the engine is idle after the reset, send a request 3231 * to load our scratch kernel_context. 3232 * 3233 * More mysteriously, if we leave the engine idle after a reset, 3234 * the next userspace batch may hang, with what appears to be 3235 * an incoherent read by the CS (presumably stale TLB). An 3236 * empty request appears sufficient to paper over the glitch. 3237 */ 3238 if (intel_engine_is_idle(engine)) { 3239 struct i915_request *rq; 3240 3241 rq = i915_request_alloc(engine, 3242 dev_priv->kernel_context); 3243 if (!IS_ERR(rq)) 3244 i915_request_add(rq); 3245 } 3246 } 3247 3248 i915_gem_restore_fences(dev_priv); 3249 } 3250 3251 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3252 { 3253 engine->reset.finish(engine); 3254 3255 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3256 } 3257 3258 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3259 { 3260 struct intel_engine_cs *engine; 3261 enum intel_engine_id id; 3262 3263 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3264 3265 for_each_engine(engine, dev_priv, id) { 3266 engine->hangcheck.active_request = NULL; 3267 i915_gem_reset_finish_engine(engine); 3268 } 3269 } 3270 3271 static void nop_submit_request(struct i915_request *request) 3272 { 3273 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3274 request->engine->name, 3275 request->fence.context, request->fence.seqno); 3276 dma_fence_set_error(&request->fence, -EIO); 3277 3278 i915_request_submit(request); 3279 } 3280 3281 static void nop_complete_submit_request(struct i915_request *request) 3282 { 3283 unsigned long flags; 3284 3285 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3286 request->engine->name, 3287 request->fence.context, request->fence.seqno); 3288 dma_fence_set_error(&request->fence, -EIO); 3289 3290 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3291 __i915_request_submit(request); 3292 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3293 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3294 } 3295 3296 void i915_gem_set_wedged(struct drm_i915_private *i915) 3297 { 3298 struct intel_engine_cs *engine; 3299 enum intel_engine_id id; 3300 3301 GEM_TRACE("start\n"); 3302 3303 if (GEM_SHOW_DEBUG()) { 3304 struct drm_printer p = drm_debug_printer(__func__); 3305 3306 for_each_engine(engine, i915, id) 3307 intel_engine_dump(engine, &p, "%s\n", engine->name); 3308 } 3309 3310 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3311 smp_mb__after_atomic(); 3312 3313 /* 3314 * First, stop submission to hw, but do not yet complete requests by 3315 * rolling the global seqno forward (since this would complete requests 3316 * for which we haven't set the fence error to EIO yet). 3317 */ 3318 for_each_engine(engine, i915, id) { 3319 i915_gem_reset_prepare_engine(engine); 3320 3321 engine->submit_request = nop_submit_request; 3322 engine->schedule = NULL; 3323 } 3324 i915->caps.scheduler = 0; 3325 3326 /* Even if the GPU reset fails, it should still stop the engines */ 3327 intel_gpu_reset(i915, ALL_ENGINES); 3328 3329 /* 3330 * Make sure no one is running the old callback before we proceed with 3331 * cancelling requests and resetting the completion tracking. Otherwise 3332 * we might submit a request to the hardware which never completes. 3333 */ 3334 synchronize_rcu(); 3335 3336 for_each_engine(engine, i915, id) { 3337 /* Mark all executing requests as skipped */ 3338 engine->cancel_requests(engine); 3339 3340 /* 3341 * Only once we've force-cancelled all in-flight requests can we 3342 * start to complete all requests. 3343 */ 3344 engine->submit_request = nop_complete_submit_request; 3345 } 3346 3347 /* 3348 * Make sure no request can slip through without getting completed by 3349 * either this call here to intel_engine_init_global_seqno, or the one 3350 * in nop_complete_submit_request. 3351 */ 3352 synchronize_rcu(); 3353 3354 for_each_engine(engine, i915, id) { 3355 unsigned long flags; 3356 3357 /* 3358 * Mark all pending requests as complete so that any concurrent 3359 * (lockless) lookup doesn't try and wait upon the request as we 3360 * reset it. 3361 */ 3362 spin_lock_irqsave(&engine->timeline.lock, flags); 3363 intel_engine_init_global_seqno(engine, 3364 intel_engine_last_submit(engine)); 3365 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3366 3367 i915_gem_reset_finish_engine(engine); 3368 } 3369 3370 GEM_TRACE("end\n"); 3371 3372 wake_up_all(&i915->gpu_error.reset_queue); 3373 } 3374 3375 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3376 { 3377 struct i915_timeline *tl; 3378 3379 lockdep_assert_held(&i915->drm.struct_mutex); 3380 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3381 return true; 3382 3383 GEM_TRACE("start\n"); 3384 3385 /* 3386 * Before unwedging, make sure that all pending operations 3387 * are flushed and errored out - we may have requests waiting upon 3388 * third party fences. We marked all inflight requests as EIO, and 3389 * every execbuf since returned EIO, for consistency we want all 3390 * the currently pending requests to also be marked as EIO, which 3391 * is done inside our nop_submit_request - and so we must wait. 3392 * 3393 * No more can be submitted until we reset the wedged bit. 3394 */ 3395 list_for_each_entry(tl, &i915->gt.timelines, link) { 3396 struct i915_request *rq; 3397 3398 rq = i915_gem_active_peek(&tl->last_request, 3399 &i915->drm.struct_mutex); 3400 if (!rq) 3401 continue; 3402 3403 /* 3404 * We can't use our normal waiter as we want to 3405 * avoid recursively trying to handle the current 3406 * reset. The basic dma_fence_default_wait() installs 3407 * a callback for dma_fence_signal(), which is 3408 * triggered by our nop handler (indirectly, the 3409 * callback enables the signaler thread which is 3410 * woken by the nop_submit_request() advancing the seqno 3411 * and when the seqno passes the fence, the signaler 3412 * then signals the fence waking us up). 3413 */ 3414 if (dma_fence_default_wait(&rq->fence, true, 3415 MAX_SCHEDULE_TIMEOUT) < 0) 3416 return false; 3417 } 3418 i915_retire_requests(i915); 3419 GEM_BUG_ON(i915->gt.active_requests); 3420 3421 /* 3422 * Undo nop_submit_request. We prevent all new i915 requests from 3423 * being queued (by disallowing execbuf whilst wedged) so having 3424 * waited for all active requests above, we know the system is idle 3425 * and do not have to worry about a thread being inside 3426 * engine->submit_request() as we swap over. So unlike installing 3427 * the nop_submit_request on reset, we can do this from normal 3428 * context and do not require stop_machine(). 3429 */ 3430 intel_engines_reset_default_submission(i915); 3431 i915_gem_contexts_lost(i915); 3432 3433 GEM_TRACE("end\n"); 3434 3435 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3436 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3437 3438 return true; 3439 } 3440 3441 static void 3442 i915_gem_retire_work_handler(struct work_struct *work) 3443 { 3444 struct drm_i915_private *dev_priv = 3445 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3446 struct drm_device *dev = &dev_priv->drm; 3447 3448 /* Come back later if the device is busy... */ 3449 if (mutex_trylock(&dev->struct_mutex)) { 3450 i915_retire_requests(dev_priv); 3451 mutex_unlock(&dev->struct_mutex); 3452 } 3453 3454 /* 3455 * Keep the retire handler running until we are finally idle. 3456 * We do not need to do this test under locking as in the worst-case 3457 * we queue the retire worker once too often. 3458 */ 3459 if (READ_ONCE(dev_priv->gt.awake)) 3460 queue_delayed_work(dev_priv->wq, 3461 &dev_priv->gt.retire_work, 3462 round_jiffies_up_relative(HZ)); 3463 } 3464 3465 static void shrink_caches(struct drm_i915_private *i915) 3466 { 3467 /* 3468 * kmem_cache_shrink() discards empty slabs and reorders partially 3469 * filled slabs to prioritise allocating from the mostly full slabs, 3470 * with the aim of reducing fragmentation. 3471 */ 3472 kmem_cache_shrink(i915->priorities); 3473 kmem_cache_shrink(i915->dependencies); 3474 kmem_cache_shrink(i915->requests); 3475 kmem_cache_shrink(i915->luts); 3476 kmem_cache_shrink(i915->vmas); 3477 kmem_cache_shrink(i915->objects); 3478 } 3479 3480 struct sleep_rcu_work { 3481 union { 3482 struct rcu_head rcu; 3483 struct work_struct work; 3484 }; 3485 struct drm_i915_private *i915; 3486 unsigned int epoch; 3487 }; 3488 3489 static inline bool 3490 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3491 { 3492 /* 3493 * There is a small chance that the epoch wrapped since we started 3494 * sleeping. If we assume that epoch is at least a u32, then it will 3495 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3496 */ 3497 return epoch == READ_ONCE(i915->gt.epoch); 3498 } 3499 3500 static void __sleep_work(struct work_struct *work) 3501 { 3502 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3503 struct drm_i915_private *i915 = s->i915; 3504 unsigned int epoch = s->epoch; 3505 3506 kfree(s); 3507 if (same_epoch(i915, epoch)) 3508 shrink_caches(i915); 3509 } 3510 3511 static void __sleep_rcu(struct rcu_head *rcu) 3512 { 3513 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3514 struct drm_i915_private *i915 = s->i915; 3515 3516 if (same_epoch(i915, s->epoch)) { 3517 INIT_WORK(&s->work, __sleep_work); 3518 queue_work(i915->wq, &s->work); 3519 } else { 3520 kfree(s); 3521 } 3522 } 3523 3524 static inline bool 3525 new_requests_since_last_retire(const struct drm_i915_private *i915) 3526 { 3527 return (READ_ONCE(i915->gt.active_requests) || 3528 work_pending(&i915->gt.idle_work.work)); 3529 } 3530 3531 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3532 { 3533 struct intel_engine_cs *engine; 3534 enum intel_engine_id id; 3535 3536 if (i915_terminally_wedged(&i915->gpu_error)) 3537 return; 3538 3539 GEM_BUG_ON(i915->gt.active_requests); 3540 for_each_engine(engine, i915, id) { 3541 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3542 GEM_BUG_ON(engine->last_retired_context != 3543 to_intel_context(i915->kernel_context, engine)); 3544 } 3545 } 3546 3547 static void 3548 i915_gem_idle_work_handler(struct work_struct *work) 3549 { 3550 struct drm_i915_private *dev_priv = 3551 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3552 unsigned int epoch = I915_EPOCH_INVALID; 3553 bool rearm_hangcheck; 3554 3555 if (!READ_ONCE(dev_priv->gt.awake)) 3556 return; 3557 3558 if (READ_ONCE(dev_priv->gt.active_requests)) 3559 return; 3560 3561 /* 3562 * Flush out the last user context, leaving only the pinned 3563 * kernel context resident. When we are idling on the kernel_context, 3564 * no more new requests (with a context switch) are emitted and we 3565 * can finally rest. A consequence is that the idle work handler is 3566 * always called at least twice before idling (and if the system is 3567 * idle that implies a round trip through the retire worker). 3568 */ 3569 mutex_lock(&dev_priv->drm.struct_mutex); 3570 i915_gem_switch_to_kernel_context(dev_priv); 3571 mutex_unlock(&dev_priv->drm.struct_mutex); 3572 3573 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3574 READ_ONCE(dev_priv->gt.active_requests)); 3575 3576 /* 3577 * Wait for last execlists context complete, but bail out in case a 3578 * new request is submitted. As we don't trust the hardware, we 3579 * continue on if the wait times out. This is necessary to allow 3580 * the machine to suspend even if the hardware dies, and we will 3581 * try to recover in resume (after depriving the hardware of power, 3582 * it may be in a better mmod). 3583 */ 3584 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3585 intel_engines_are_idle(dev_priv), 3586 I915_IDLE_ENGINES_TIMEOUT * 1000, 3587 10, 500); 3588 3589 rearm_hangcheck = 3590 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3591 3592 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3593 /* Currently busy, come back later */ 3594 mod_delayed_work(dev_priv->wq, 3595 &dev_priv->gt.idle_work, 3596 msecs_to_jiffies(50)); 3597 goto out_rearm; 3598 } 3599 3600 /* 3601 * New request retired after this work handler started, extend active 3602 * period until next instance of the work. 3603 */ 3604 if (new_requests_since_last_retire(dev_priv)) 3605 goto out_unlock; 3606 3607 epoch = __i915_gem_park(dev_priv); 3608 3609 assert_kernel_context_is_current(dev_priv); 3610 3611 rearm_hangcheck = false; 3612 out_unlock: 3613 mutex_unlock(&dev_priv->drm.struct_mutex); 3614 3615 out_rearm: 3616 if (rearm_hangcheck) { 3617 GEM_BUG_ON(!dev_priv->gt.awake); 3618 i915_queue_hangcheck(dev_priv); 3619 } 3620 3621 /* 3622 * When we are idle, it is an opportune time to reap our caches. 3623 * However, we have many objects that utilise RCU and the ordered 3624 * i915->wq that this work is executing on. To try and flush any 3625 * pending frees now we are idle, we first wait for an RCU grace 3626 * period, and then queue a task (that will run last on the wq) to 3627 * shrink and re-optimize the caches. 3628 */ 3629 if (same_epoch(dev_priv, epoch)) { 3630 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3631 if (s) { 3632 s->i915 = dev_priv; 3633 s->epoch = epoch; 3634 call_rcu(&s->rcu, __sleep_rcu); 3635 } 3636 } 3637 } 3638 3639 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3640 { 3641 struct drm_i915_private *i915 = to_i915(gem->dev); 3642 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3643 struct drm_i915_file_private *fpriv = file->driver_priv; 3644 struct i915_lut_handle *lut, *ln; 3645 3646 mutex_lock(&i915->drm.struct_mutex); 3647 3648 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3649 struct i915_gem_context *ctx = lut->ctx; 3650 struct i915_vma *vma; 3651 3652 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3653 if (ctx->file_priv != fpriv) 3654 continue; 3655 3656 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3657 GEM_BUG_ON(vma->obj != obj); 3658 3659 /* We allow the process to have multiple handles to the same 3660 * vma, in the same fd namespace, by virtue of flink/open. 3661 */ 3662 GEM_BUG_ON(!vma->open_count); 3663 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3664 i915_vma_close(vma); 3665 3666 list_del(&lut->obj_link); 3667 list_del(&lut->ctx_link); 3668 3669 kmem_cache_free(i915->luts, lut); 3670 __i915_gem_object_release_unless_active(obj); 3671 } 3672 3673 mutex_unlock(&i915->drm.struct_mutex); 3674 } 3675 3676 static unsigned long to_wait_timeout(s64 timeout_ns) 3677 { 3678 if (timeout_ns < 0) 3679 return MAX_SCHEDULE_TIMEOUT; 3680 3681 if (timeout_ns == 0) 3682 return 0; 3683 3684 return nsecs_to_jiffies_timeout(timeout_ns); 3685 } 3686 3687 /** 3688 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3689 * @dev: drm device pointer 3690 * @data: ioctl data blob 3691 * @file: drm file pointer 3692 * 3693 * Returns 0 if successful, else an error is returned with the remaining time in 3694 * the timeout parameter. 3695 * -ETIME: object is still busy after timeout 3696 * -ERESTARTSYS: signal interrupted the wait 3697 * -ENONENT: object doesn't exist 3698 * Also possible, but rare: 3699 * -EAGAIN: incomplete, restart syscall 3700 * -ENOMEM: damn 3701 * -ENODEV: Internal IRQ fail 3702 * -E?: The add request failed 3703 * 3704 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3705 * non-zero timeout parameter the wait ioctl will wait for the given number of 3706 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3707 * without holding struct_mutex the object may become re-busied before this 3708 * function completes. A similar but shorter * race condition exists in the busy 3709 * ioctl 3710 */ 3711 int 3712 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3713 { 3714 struct drm_i915_gem_wait *args = data; 3715 struct drm_i915_gem_object *obj; 3716 ktime_t start; 3717 long ret; 3718 3719 if (args->flags != 0) 3720 return -EINVAL; 3721 3722 obj = i915_gem_object_lookup(file, args->bo_handle); 3723 if (!obj) 3724 return -ENOENT; 3725 3726 start = ktime_get(); 3727 3728 ret = i915_gem_object_wait(obj, 3729 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3730 to_wait_timeout(args->timeout_ns), 3731 to_rps_client(file)); 3732 3733 if (args->timeout_ns > 0) { 3734 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3735 if (args->timeout_ns < 0) 3736 args->timeout_ns = 0; 3737 3738 /* 3739 * Apparently ktime isn't accurate enough and occasionally has a 3740 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3741 * things up to make the test happy. We allow up to 1 jiffy. 3742 * 3743 * This is a regression from the timespec->ktime conversion. 3744 */ 3745 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3746 args->timeout_ns = 0; 3747 3748 /* Asked to wait beyond the jiffie/scheduler precision? */ 3749 if (ret == -ETIME && args->timeout_ns) 3750 ret = -EAGAIN; 3751 } 3752 3753 i915_gem_object_put(obj); 3754 return ret; 3755 } 3756 3757 static long wait_for_timeline(struct i915_timeline *tl, 3758 unsigned int flags, long timeout) 3759 { 3760 struct i915_request *rq; 3761 3762 rq = i915_gem_active_get_unlocked(&tl->last_request); 3763 if (!rq) 3764 return timeout; 3765 3766 /* 3767 * "Race-to-idle". 3768 * 3769 * Switching to the kernel context is often used a synchronous 3770 * step prior to idling, e.g. in suspend for flushing all 3771 * current operations to memory before sleeping. These we 3772 * want to complete as quickly as possible to avoid prolonged 3773 * stalls, so allow the gpu to boost to maximum clocks. 3774 */ 3775 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3776 gen6_rps_boost(rq, NULL); 3777 3778 timeout = i915_request_wait(rq, flags, timeout); 3779 i915_request_put(rq); 3780 3781 return timeout; 3782 } 3783 3784 static int wait_for_engines(struct drm_i915_private *i915) 3785 { 3786 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3787 dev_err(i915->drm.dev, 3788 "Failed to idle engines, declaring wedged!\n"); 3789 GEM_TRACE_DUMP(); 3790 i915_gem_set_wedged(i915); 3791 return -EIO; 3792 } 3793 3794 return 0; 3795 } 3796 3797 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3798 unsigned int flags, long timeout) 3799 { 3800 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3801 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3802 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3803 3804 /* If the device is asleep, we have no requests outstanding */ 3805 if (!READ_ONCE(i915->gt.awake)) 3806 return 0; 3807 3808 if (flags & I915_WAIT_LOCKED) { 3809 struct i915_timeline *tl; 3810 int err; 3811 3812 lockdep_assert_held(&i915->drm.struct_mutex); 3813 3814 list_for_each_entry(tl, &i915->gt.timelines, link) { 3815 timeout = wait_for_timeline(tl, flags, timeout); 3816 if (timeout < 0) 3817 return timeout; 3818 } 3819 3820 err = wait_for_engines(i915); 3821 if (err) 3822 return err; 3823 3824 i915_retire_requests(i915); 3825 GEM_BUG_ON(i915->gt.active_requests); 3826 } else { 3827 struct intel_engine_cs *engine; 3828 enum intel_engine_id id; 3829 3830 for_each_engine(engine, i915, id) { 3831 struct i915_timeline *tl = &engine->timeline; 3832 3833 timeout = wait_for_timeline(tl, flags, timeout); 3834 if (timeout < 0) 3835 return timeout; 3836 } 3837 } 3838 3839 return 0; 3840 } 3841 3842 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3843 { 3844 /* 3845 * We manually flush the CPU domain so that we can override and 3846 * force the flush for the display, and perform it asyncrhonously. 3847 */ 3848 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3849 if (obj->cache_dirty) 3850 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3851 obj->write_domain = 0; 3852 } 3853 3854 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3855 { 3856 if (!READ_ONCE(obj->pin_global)) 3857 return; 3858 3859 mutex_lock(&obj->base.dev->struct_mutex); 3860 __i915_gem_object_flush_for_display(obj); 3861 mutex_unlock(&obj->base.dev->struct_mutex); 3862 } 3863 3864 /** 3865 * Moves a single object to the WC read, and possibly write domain. 3866 * @obj: object to act on 3867 * @write: ask for write access or read only 3868 * 3869 * This function returns when the move is complete, including waiting on 3870 * flushes to occur. 3871 */ 3872 int 3873 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3874 { 3875 int ret; 3876 3877 lockdep_assert_held(&obj->base.dev->struct_mutex); 3878 3879 ret = i915_gem_object_wait(obj, 3880 I915_WAIT_INTERRUPTIBLE | 3881 I915_WAIT_LOCKED | 3882 (write ? I915_WAIT_ALL : 0), 3883 MAX_SCHEDULE_TIMEOUT, 3884 NULL); 3885 if (ret) 3886 return ret; 3887 3888 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3889 return 0; 3890 3891 /* Flush and acquire obj->pages so that we are coherent through 3892 * direct access in memory with previous cached writes through 3893 * shmemfs and that our cache domain tracking remains valid. 3894 * For example, if the obj->filp was moved to swap without us 3895 * being notified and releasing the pages, we would mistakenly 3896 * continue to assume that the obj remained out of the CPU cached 3897 * domain. 3898 */ 3899 ret = i915_gem_object_pin_pages(obj); 3900 if (ret) 3901 return ret; 3902 3903 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3904 3905 /* Serialise direct access to this object with the barriers for 3906 * coherent writes from the GPU, by effectively invalidating the 3907 * WC domain upon first access. 3908 */ 3909 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3910 mb(); 3911 3912 /* It should now be out of any other write domains, and we can update 3913 * the domain values for our changes. 3914 */ 3915 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3916 obj->read_domains |= I915_GEM_DOMAIN_WC; 3917 if (write) { 3918 obj->read_domains = I915_GEM_DOMAIN_WC; 3919 obj->write_domain = I915_GEM_DOMAIN_WC; 3920 obj->mm.dirty = true; 3921 } 3922 3923 i915_gem_object_unpin_pages(obj); 3924 return 0; 3925 } 3926 3927 /** 3928 * Moves a single object to the GTT read, and possibly write domain. 3929 * @obj: object to act on 3930 * @write: ask for write access or read only 3931 * 3932 * This function returns when the move is complete, including waiting on 3933 * flushes to occur. 3934 */ 3935 int 3936 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3937 { 3938 int ret; 3939 3940 lockdep_assert_held(&obj->base.dev->struct_mutex); 3941 3942 ret = i915_gem_object_wait(obj, 3943 I915_WAIT_INTERRUPTIBLE | 3944 I915_WAIT_LOCKED | 3945 (write ? I915_WAIT_ALL : 0), 3946 MAX_SCHEDULE_TIMEOUT, 3947 NULL); 3948 if (ret) 3949 return ret; 3950 3951 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3952 return 0; 3953 3954 /* Flush and acquire obj->pages so that we are coherent through 3955 * direct access in memory with previous cached writes through 3956 * shmemfs and that our cache domain tracking remains valid. 3957 * For example, if the obj->filp was moved to swap without us 3958 * being notified and releasing the pages, we would mistakenly 3959 * continue to assume that the obj remained out of the CPU cached 3960 * domain. 3961 */ 3962 ret = i915_gem_object_pin_pages(obj); 3963 if (ret) 3964 return ret; 3965 3966 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3967 3968 /* Serialise direct access to this object with the barriers for 3969 * coherent writes from the GPU, by effectively invalidating the 3970 * GTT domain upon first access. 3971 */ 3972 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3973 mb(); 3974 3975 /* It should now be out of any other write domains, and we can update 3976 * the domain values for our changes. 3977 */ 3978 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3979 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3980 if (write) { 3981 obj->read_domains = I915_GEM_DOMAIN_GTT; 3982 obj->write_domain = I915_GEM_DOMAIN_GTT; 3983 obj->mm.dirty = true; 3984 } 3985 3986 i915_gem_object_unpin_pages(obj); 3987 return 0; 3988 } 3989 3990 /** 3991 * Changes the cache-level of an object across all VMA. 3992 * @obj: object to act on 3993 * @cache_level: new cache level to set for the object 3994 * 3995 * After this function returns, the object will be in the new cache-level 3996 * across all GTT and the contents of the backing storage will be coherent, 3997 * with respect to the new cache-level. In order to keep the backing storage 3998 * coherent for all users, we only allow a single cache level to be set 3999 * globally on the object and prevent it from being changed whilst the 4000 * hardware is reading from the object. That is if the object is currently 4001 * on the scanout it will be set to uncached (or equivalent display 4002 * cache coherency) and all non-MOCS GPU access will also be uncached so 4003 * that all direct access to the scanout remains coherent. 4004 */ 4005 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4006 enum i915_cache_level cache_level) 4007 { 4008 struct i915_vma *vma; 4009 int ret; 4010 4011 lockdep_assert_held(&obj->base.dev->struct_mutex); 4012 4013 if (obj->cache_level == cache_level) 4014 return 0; 4015 4016 /* Inspect the list of currently bound VMA and unbind any that would 4017 * be invalid given the new cache-level. This is principally to 4018 * catch the issue of the CS prefetch crossing page boundaries and 4019 * reading an invalid PTE on older architectures. 4020 */ 4021 restart: 4022 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4023 if (!drm_mm_node_allocated(&vma->node)) 4024 continue; 4025 4026 if (i915_vma_is_pinned(vma)) { 4027 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4028 return -EBUSY; 4029 } 4030 4031 if (!i915_vma_is_closed(vma) && 4032 i915_gem_valid_gtt_space(vma, cache_level)) 4033 continue; 4034 4035 ret = i915_vma_unbind(vma); 4036 if (ret) 4037 return ret; 4038 4039 /* As unbinding may affect other elements in the 4040 * obj->vma_list (due to side-effects from retiring 4041 * an active vma), play safe and restart the iterator. 4042 */ 4043 goto restart; 4044 } 4045 4046 /* We can reuse the existing drm_mm nodes but need to change the 4047 * cache-level on the PTE. We could simply unbind them all and 4048 * rebind with the correct cache-level on next use. However since 4049 * we already have a valid slot, dma mapping, pages etc, we may as 4050 * rewrite the PTE in the belief that doing so tramples upon less 4051 * state and so involves less work. 4052 */ 4053 if (obj->bind_count) { 4054 /* Before we change the PTE, the GPU must not be accessing it. 4055 * If we wait upon the object, we know that all the bound 4056 * VMA are no longer active. 4057 */ 4058 ret = i915_gem_object_wait(obj, 4059 I915_WAIT_INTERRUPTIBLE | 4060 I915_WAIT_LOCKED | 4061 I915_WAIT_ALL, 4062 MAX_SCHEDULE_TIMEOUT, 4063 NULL); 4064 if (ret) 4065 return ret; 4066 4067 if (!HAS_LLC(to_i915(obj->base.dev)) && 4068 cache_level != I915_CACHE_NONE) { 4069 /* Access to snoopable pages through the GTT is 4070 * incoherent and on some machines causes a hard 4071 * lockup. Relinquish the CPU mmaping to force 4072 * userspace to refault in the pages and we can 4073 * then double check if the GTT mapping is still 4074 * valid for that pointer access. 4075 */ 4076 i915_gem_release_mmap(obj); 4077 4078 /* As we no longer need a fence for GTT access, 4079 * we can relinquish it now (and so prevent having 4080 * to steal a fence from someone else on the next 4081 * fence request). Note GPU activity would have 4082 * dropped the fence as all snoopable access is 4083 * supposed to be linear. 4084 */ 4085 for_each_ggtt_vma(vma, obj) { 4086 ret = i915_vma_put_fence(vma); 4087 if (ret) 4088 return ret; 4089 } 4090 } else { 4091 /* We either have incoherent backing store and 4092 * so no GTT access or the architecture is fully 4093 * coherent. In such cases, existing GTT mmaps 4094 * ignore the cache bit in the PTE and we can 4095 * rewrite it without confusing the GPU or having 4096 * to force userspace to fault back in its mmaps. 4097 */ 4098 } 4099 4100 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4101 if (!drm_mm_node_allocated(&vma->node)) 4102 continue; 4103 4104 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4105 if (ret) 4106 return ret; 4107 } 4108 } 4109 4110 list_for_each_entry(vma, &obj->vma_list, obj_link) 4111 vma->node.color = cache_level; 4112 i915_gem_object_set_cache_coherency(obj, cache_level); 4113 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4114 4115 return 0; 4116 } 4117 4118 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4119 struct drm_file *file) 4120 { 4121 struct drm_i915_gem_caching *args = data; 4122 struct drm_i915_gem_object *obj; 4123 int err = 0; 4124 4125 rcu_read_lock(); 4126 obj = i915_gem_object_lookup_rcu(file, args->handle); 4127 if (!obj) { 4128 err = -ENOENT; 4129 goto out; 4130 } 4131 4132 switch (obj->cache_level) { 4133 case I915_CACHE_LLC: 4134 case I915_CACHE_L3_LLC: 4135 args->caching = I915_CACHING_CACHED; 4136 break; 4137 4138 case I915_CACHE_WT: 4139 args->caching = I915_CACHING_DISPLAY; 4140 break; 4141 4142 default: 4143 args->caching = I915_CACHING_NONE; 4144 break; 4145 } 4146 out: 4147 rcu_read_unlock(); 4148 return err; 4149 } 4150 4151 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4152 struct drm_file *file) 4153 { 4154 struct drm_i915_private *i915 = to_i915(dev); 4155 struct drm_i915_gem_caching *args = data; 4156 struct drm_i915_gem_object *obj; 4157 enum i915_cache_level level; 4158 int ret = 0; 4159 4160 switch (args->caching) { 4161 case I915_CACHING_NONE: 4162 level = I915_CACHE_NONE; 4163 break; 4164 case I915_CACHING_CACHED: 4165 /* 4166 * Due to a HW issue on BXT A stepping, GPU stores via a 4167 * snooped mapping may leave stale data in a corresponding CPU 4168 * cacheline, whereas normally such cachelines would get 4169 * invalidated. 4170 */ 4171 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4172 return -ENODEV; 4173 4174 level = I915_CACHE_LLC; 4175 break; 4176 case I915_CACHING_DISPLAY: 4177 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4178 break; 4179 default: 4180 return -EINVAL; 4181 } 4182 4183 obj = i915_gem_object_lookup(file, args->handle); 4184 if (!obj) 4185 return -ENOENT; 4186 4187 /* 4188 * The caching mode of proxy object is handled by its generator, and 4189 * not allowed to be changed by userspace. 4190 */ 4191 if (i915_gem_object_is_proxy(obj)) { 4192 ret = -ENXIO; 4193 goto out; 4194 } 4195 4196 if (obj->cache_level == level) 4197 goto out; 4198 4199 ret = i915_gem_object_wait(obj, 4200 I915_WAIT_INTERRUPTIBLE, 4201 MAX_SCHEDULE_TIMEOUT, 4202 to_rps_client(file)); 4203 if (ret) 4204 goto out; 4205 4206 ret = i915_mutex_lock_interruptible(dev); 4207 if (ret) 4208 goto out; 4209 4210 ret = i915_gem_object_set_cache_level(obj, level); 4211 mutex_unlock(&dev->struct_mutex); 4212 4213 out: 4214 i915_gem_object_put(obj); 4215 return ret; 4216 } 4217 4218 /* 4219 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4220 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4221 * (for pageflips). We only flush the caches while preparing the buffer for 4222 * display, the callers are responsible for frontbuffer flush. 4223 */ 4224 struct i915_vma * 4225 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4226 u32 alignment, 4227 const struct i915_ggtt_view *view, 4228 unsigned int flags) 4229 { 4230 struct i915_vma *vma; 4231 int ret; 4232 4233 lockdep_assert_held(&obj->base.dev->struct_mutex); 4234 4235 /* Mark the global pin early so that we account for the 4236 * display coherency whilst setting up the cache domains. 4237 */ 4238 obj->pin_global++; 4239 4240 /* The display engine is not coherent with the LLC cache on gen6. As 4241 * a result, we make sure that the pinning that is about to occur is 4242 * done with uncached PTEs. This is lowest common denominator for all 4243 * chipsets. 4244 * 4245 * However for gen6+, we could do better by using the GFDT bit instead 4246 * of uncaching, which would allow us to flush all the LLC-cached data 4247 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4248 */ 4249 ret = i915_gem_object_set_cache_level(obj, 4250 HAS_WT(to_i915(obj->base.dev)) ? 4251 I915_CACHE_WT : I915_CACHE_NONE); 4252 if (ret) { 4253 vma = ERR_PTR(ret); 4254 goto err_unpin_global; 4255 } 4256 4257 /* As the user may map the buffer once pinned in the display plane 4258 * (e.g. libkms for the bootup splash), we have to ensure that we 4259 * always use map_and_fenceable for all scanout buffers. However, 4260 * it may simply be too big to fit into mappable, in which case 4261 * put it anyway and hope that userspace can cope (but always first 4262 * try to preserve the existing ABI). 4263 */ 4264 vma = ERR_PTR(-ENOSPC); 4265 if ((flags & PIN_MAPPABLE) == 0 && 4266 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4267 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4268 flags | 4269 PIN_MAPPABLE | 4270 PIN_NONBLOCK); 4271 if (IS_ERR(vma)) 4272 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4273 if (IS_ERR(vma)) 4274 goto err_unpin_global; 4275 4276 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4277 4278 __i915_gem_object_flush_for_display(obj); 4279 4280 /* It should now be out of any other write domains, and we can update 4281 * the domain values for our changes. 4282 */ 4283 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4284 4285 return vma; 4286 4287 err_unpin_global: 4288 obj->pin_global--; 4289 return vma; 4290 } 4291 4292 void 4293 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4294 { 4295 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4296 4297 if (WARN_ON(vma->obj->pin_global == 0)) 4298 return; 4299 4300 if (--vma->obj->pin_global == 0) 4301 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4302 4303 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4304 i915_gem_object_bump_inactive_ggtt(vma->obj); 4305 4306 i915_vma_unpin(vma); 4307 } 4308 4309 /** 4310 * Moves a single object to the CPU read, and possibly write domain. 4311 * @obj: object to act on 4312 * @write: requesting write or read-only access 4313 * 4314 * This function returns when the move is complete, including waiting on 4315 * flushes to occur. 4316 */ 4317 int 4318 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4319 { 4320 int ret; 4321 4322 lockdep_assert_held(&obj->base.dev->struct_mutex); 4323 4324 ret = i915_gem_object_wait(obj, 4325 I915_WAIT_INTERRUPTIBLE | 4326 I915_WAIT_LOCKED | 4327 (write ? I915_WAIT_ALL : 0), 4328 MAX_SCHEDULE_TIMEOUT, 4329 NULL); 4330 if (ret) 4331 return ret; 4332 4333 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4334 4335 /* Flush the CPU cache if it's still invalid. */ 4336 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4337 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4338 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4339 } 4340 4341 /* It should now be out of any other write domains, and we can update 4342 * the domain values for our changes. 4343 */ 4344 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4345 4346 /* If we're writing through the CPU, then the GPU read domains will 4347 * need to be invalidated at next use. 4348 */ 4349 if (write) 4350 __start_cpu_write(obj); 4351 4352 return 0; 4353 } 4354 4355 /* Throttle our rendering by waiting until the ring has completed our requests 4356 * emitted over 20 msec ago. 4357 * 4358 * Note that if we were to use the current jiffies each time around the loop, 4359 * we wouldn't escape the function with any frames outstanding if the time to 4360 * render a frame was over 20ms. 4361 * 4362 * This should get us reasonable parallelism between CPU and GPU but also 4363 * relatively low latency when blocking on a particular request to finish. 4364 */ 4365 static int 4366 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4367 { 4368 struct drm_i915_private *dev_priv = to_i915(dev); 4369 struct drm_i915_file_private *file_priv = file->driver_priv; 4370 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4371 struct i915_request *request, *target = NULL; 4372 long ret; 4373 4374 /* ABI: return -EIO if already wedged */ 4375 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4376 return -EIO; 4377 4378 spin_lock(&file_priv->mm.lock); 4379 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4380 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4381 break; 4382 4383 if (target) { 4384 list_del(&target->client_link); 4385 target->file_priv = NULL; 4386 } 4387 4388 target = request; 4389 } 4390 if (target) 4391 i915_request_get(target); 4392 spin_unlock(&file_priv->mm.lock); 4393 4394 if (target == NULL) 4395 return 0; 4396 4397 ret = i915_request_wait(target, 4398 I915_WAIT_INTERRUPTIBLE, 4399 MAX_SCHEDULE_TIMEOUT); 4400 i915_request_put(target); 4401 4402 return ret < 0 ? ret : 0; 4403 } 4404 4405 struct i915_vma * 4406 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4407 const struct i915_ggtt_view *view, 4408 u64 size, 4409 u64 alignment, 4410 u64 flags) 4411 { 4412 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4413 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4414 struct i915_vma *vma; 4415 int ret; 4416 4417 lockdep_assert_held(&obj->base.dev->struct_mutex); 4418 4419 if (flags & PIN_MAPPABLE && 4420 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4421 /* If the required space is larger than the available 4422 * aperture, we will not able to find a slot for the 4423 * object and unbinding the object now will be in 4424 * vain. Worse, doing so may cause us to ping-pong 4425 * the object in and out of the Global GTT and 4426 * waste a lot of cycles under the mutex. 4427 */ 4428 if (obj->base.size > dev_priv->ggtt.mappable_end) 4429 return ERR_PTR(-E2BIG); 4430 4431 /* If NONBLOCK is set the caller is optimistically 4432 * trying to cache the full object within the mappable 4433 * aperture, and *must* have a fallback in place for 4434 * situations where we cannot bind the object. We 4435 * can be a little more lax here and use the fallback 4436 * more often to avoid costly migrations of ourselves 4437 * and other objects within the aperture. 4438 * 4439 * Half-the-aperture is used as a simple heuristic. 4440 * More interesting would to do search for a free 4441 * block prior to making the commitment to unbind. 4442 * That caters for the self-harm case, and with a 4443 * little more heuristics (e.g. NOFAULT, NOEVICT) 4444 * we could try to minimise harm to others. 4445 */ 4446 if (flags & PIN_NONBLOCK && 4447 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4448 return ERR_PTR(-ENOSPC); 4449 } 4450 4451 vma = i915_vma_instance(obj, vm, view); 4452 if (unlikely(IS_ERR(vma))) 4453 return vma; 4454 4455 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4456 if (flags & PIN_NONBLOCK) { 4457 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4458 return ERR_PTR(-ENOSPC); 4459 4460 if (flags & PIN_MAPPABLE && 4461 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4462 return ERR_PTR(-ENOSPC); 4463 } 4464 4465 WARN(i915_vma_is_pinned(vma), 4466 "bo is already pinned in ggtt with incorrect alignment:" 4467 " offset=%08x, req.alignment=%llx," 4468 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4469 i915_ggtt_offset(vma), alignment, 4470 !!(flags & PIN_MAPPABLE), 4471 i915_vma_is_map_and_fenceable(vma)); 4472 ret = i915_vma_unbind(vma); 4473 if (ret) 4474 return ERR_PTR(ret); 4475 } 4476 4477 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4478 if (ret) 4479 return ERR_PTR(ret); 4480 4481 return vma; 4482 } 4483 4484 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4485 { 4486 /* Note that we could alias engines in the execbuf API, but 4487 * that would be very unwise as it prevents userspace from 4488 * fine control over engine selection. Ahem. 4489 * 4490 * This should be something like EXEC_MAX_ENGINE instead of 4491 * I915_NUM_ENGINES. 4492 */ 4493 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4494 return 0x10000 << id; 4495 } 4496 4497 static __always_inline unsigned int __busy_write_id(unsigned int id) 4498 { 4499 /* The uABI guarantees an active writer is also amongst the read 4500 * engines. This would be true if we accessed the activity tracking 4501 * under the lock, but as we perform the lookup of the object and 4502 * its activity locklessly we can not guarantee that the last_write 4503 * being active implies that we have set the same engine flag from 4504 * last_read - hence we always set both read and write busy for 4505 * last_write. 4506 */ 4507 return id | __busy_read_flag(id); 4508 } 4509 4510 static __always_inline unsigned int 4511 __busy_set_if_active(const struct dma_fence *fence, 4512 unsigned int (*flag)(unsigned int id)) 4513 { 4514 struct i915_request *rq; 4515 4516 /* We have to check the current hw status of the fence as the uABI 4517 * guarantees forward progress. We could rely on the idle worker 4518 * to eventually flush us, but to minimise latency just ask the 4519 * hardware. 4520 * 4521 * Note we only report on the status of native fences. 4522 */ 4523 if (!dma_fence_is_i915(fence)) 4524 return 0; 4525 4526 /* opencode to_request() in order to avoid const warnings */ 4527 rq = container_of(fence, struct i915_request, fence); 4528 if (i915_request_completed(rq)) 4529 return 0; 4530 4531 return flag(rq->engine->uabi_id); 4532 } 4533 4534 static __always_inline unsigned int 4535 busy_check_reader(const struct dma_fence *fence) 4536 { 4537 return __busy_set_if_active(fence, __busy_read_flag); 4538 } 4539 4540 static __always_inline unsigned int 4541 busy_check_writer(const struct dma_fence *fence) 4542 { 4543 if (!fence) 4544 return 0; 4545 4546 return __busy_set_if_active(fence, __busy_write_id); 4547 } 4548 4549 int 4550 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4551 struct drm_file *file) 4552 { 4553 struct drm_i915_gem_busy *args = data; 4554 struct drm_i915_gem_object *obj; 4555 struct reservation_object_list *list; 4556 unsigned int seq; 4557 int err; 4558 4559 err = -ENOENT; 4560 rcu_read_lock(); 4561 obj = i915_gem_object_lookup_rcu(file, args->handle); 4562 if (!obj) 4563 goto out; 4564 4565 /* A discrepancy here is that we do not report the status of 4566 * non-i915 fences, i.e. even though we may report the object as idle, 4567 * a call to set-domain may still stall waiting for foreign rendering. 4568 * This also means that wait-ioctl may report an object as busy, 4569 * where busy-ioctl considers it idle. 4570 * 4571 * We trade the ability to warn of foreign fences to report on which 4572 * i915 engines are active for the object. 4573 * 4574 * Alternatively, we can trade that extra information on read/write 4575 * activity with 4576 * args->busy = 4577 * !reservation_object_test_signaled_rcu(obj->resv, true); 4578 * to report the overall busyness. This is what the wait-ioctl does. 4579 * 4580 */ 4581 retry: 4582 seq = raw_read_seqcount(&obj->resv->seq); 4583 4584 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4585 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4586 4587 /* Translate shared fences to READ set of engines */ 4588 list = rcu_dereference(obj->resv->fence); 4589 if (list) { 4590 unsigned int shared_count = list->shared_count, i; 4591 4592 for (i = 0; i < shared_count; ++i) { 4593 struct dma_fence *fence = 4594 rcu_dereference(list->shared[i]); 4595 4596 args->busy |= busy_check_reader(fence); 4597 } 4598 } 4599 4600 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4601 goto retry; 4602 4603 err = 0; 4604 out: 4605 rcu_read_unlock(); 4606 return err; 4607 } 4608 4609 int 4610 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4611 struct drm_file *file_priv) 4612 { 4613 return i915_gem_ring_throttle(dev, file_priv); 4614 } 4615 4616 int 4617 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4618 struct drm_file *file_priv) 4619 { 4620 struct drm_i915_private *dev_priv = to_i915(dev); 4621 struct drm_i915_gem_madvise *args = data; 4622 struct drm_i915_gem_object *obj; 4623 int err; 4624 4625 switch (args->madv) { 4626 case I915_MADV_DONTNEED: 4627 case I915_MADV_WILLNEED: 4628 break; 4629 default: 4630 return -EINVAL; 4631 } 4632 4633 obj = i915_gem_object_lookup(file_priv, args->handle); 4634 if (!obj) 4635 return -ENOENT; 4636 4637 err = mutex_lock_interruptible(&obj->mm.lock); 4638 if (err) 4639 goto out; 4640 4641 if (i915_gem_object_has_pages(obj) && 4642 i915_gem_object_is_tiled(obj) && 4643 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4644 if (obj->mm.madv == I915_MADV_WILLNEED) { 4645 GEM_BUG_ON(!obj->mm.quirked); 4646 __i915_gem_object_unpin_pages(obj); 4647 obj->mm.quirked = false; 4648 } 4649 if (args->madv == I915_MADV_WILLNEED) { 4650 GEM_BUG_ON(obj->mm.quirked); 4651 __i915_gem_object_pin_pages(obj); 4652 obj->mm.quirked = true; 4653 } 4654 } 4655 4656 if (obj->mm.madv != __I915_MADV_PURGED) 4657 obj->mm.madv = args->madv; 4658 4659 /* if the object is no longer attached, discard its backing storage */ 4660 if (obj->mm.madv == I915_MADV_DONTNEED && 4661 !i915_gem_object_has_pages(obj)) 4662 i915_gem_object_truncate(obj); 4663 4664 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4665 mutex_unlock(&obj->mm.lock); 4666 4667 out: 4668 i915_gem_object_put(obj); 4669 return err; 4670 } 4671 4672 static void 4673 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4674 { 4675 struct drm_i915_gem_object *obj = 4676 container_of(active, typeof(*obj), frontbuffer_write); 4677 4678 intel_fb_obj_flush(obj, ORIGIN_CS); 4679 } 4680 4681 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4682 const struct drm_i915_gem_object_ops *ops) 4683 { 4684 mutex_init(&obj->mm.lock); 4685 4686 INIT_LIST_HEAD(&obj->vma_list); 4687 INIT_LIST_HEAD(&obj->lut_list); 4688 INIT_LIST_HEAD(&obj->batch_pool_link); 4689 4690 obj->ops = ops; 4691 4692 reservation_object_init(&obj->__builtin_resv); 4693 obj->resv = &obj->__builtin_resv; 4694 4695 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4696 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4697 4698 obj->mm.madv = I915_MADV_WILLNEED; 4699 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4700 mutex_init(&obj->mm.get_page.lock); 4701 4702 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4703 } 4704 4705 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4706 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4707 I915_GEM_OBJECT_IS_SHRINKABLE, 4708 4709 .get_pages = i915_gem_object_get_pages_gtt, 4710 .put_pages = i915_gem_object_put_pages_gtt, 4711 4712 .pwrite = i915_gem_object_pwrite_gtt, 4713 }; 4714 4715 static int i915_gem_object_create_shmem(struct drm_device *dev, 4716 struct drm_gem_object *obj, 4717 size_t size) 4718 { 4719 struct drm_i915_private *i915 = to_i915(dev); 4720 unsigned long flags = VM_NORESERVE; 4721 struct file *filp; 4722 4723 drm_gem_private_object_init(dev, obj, size); 4724 4725 if (i915->mm.gemfs) 4726 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4727 flags); 4728 else 4729 filp = shmem_file_setup("i915", size, flags); 4730 4731 if (IS_ERR(filp)) 4732 return PTR_ERR(filp); 4733 4734 obj->filp = filp; 4735 4736 return 0; 4737 } 4738 4739 struct drm_i915_gem_object * 4740 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4741 { 4742 struct drm_i915_gem_object *obj; 4743 struct address_space *mapping; 4744 unsigned int cache_level; 4745 gfp_t mask; 4746 int ret; 4747 4748 /* There is a prevalence of the assumption that we fit the object's 4749 * page count inside a 32bit _signed_ variable. Let's document this and 4750 * catch if we ever need to fix it. In the meantime, if you do spot 4751 * such a local variable, please consider fixing! 4752 */ 4753 if (size >> PAGE_SHIFT > INT_MAX) 4754 return ERR_PTR(-E2BIG); 4755 4756 if (overflows_type(size, obj->base.size)) 4757 return ERR_PTR(-E2BIG); 4758 4759 obj = i915_gem_object_alloc(dev_priv); 4760 if (obj == NULL) 4761 return ERR_PTR(-ENOMEM); 4762 4763 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4764 if (ret) 4765 goto fail; 4766 4767 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4768 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4769 /* 965gm cannot relocate objects above 4GiB. */ 4770 mask &= ~__GFP_HIGHMEM; 4771 mask |= __GFP_DMA32; 4772 } 4773 4774 mapping = obj->base.filp->f_mapping; 4775 mapping_set_gfp_mask(mapping, mask); 4776 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4777 4778 i915_gem_object_init(obj, &i915_gem_object_ops); 4779 4780 obj->write_domain = I915_GEM_DOMAIN_CPU; 4781 obj->read_domains = I915_GEM_DOMAIN_CPU; 4782 4783 if (HAS_LLC(dev_priv)) 4784 /* On some devices, we can have the GPU use the LLC (the CPU 4785 * cache) for about a 10% performance improvement 4786 * compared to uncached. Graphics requests other than 4787 * display scanout are coherent with the CPU in 4788 * accessing this cache. This means in this mode we 4789 * don't need to clflush on the CPU side, and on the 4790 * GPU side we only need to flush internal caches to 4791 * get data visible to the CPU. 4792 * 4793 * However, we maintain the display planes as UC, and so 4794 * need to rebind when first used as such. 4795 */ 4796 cache_level = I915_CACHE_LLC; 4797 else 4798 cache_level = I915_CACHE_NONE; 4799 4800 i915_gem_object_set_cache_coherency(obj, cache_level); 4801 4802 trace_i915_gem_object_create(obj); 4803 4804 return obj; 4805 4806 fail: 4807 i915_gem_object_free(obj); 4808 return ERR_PTR(ret); 4809 } 4810 4811 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4812 { 4813 /* If we are the last user of the backing storage (be it shmemfs 4814 * pages or stolen etc), we know that the pages are going to be 4815 * immediately released. In this case, we can then skip copying 4816 * back the contents from the GPU. 4817 */ 4818 4819 if (obj->mm.madv != I915_MADV_WILLNEED) 4820 return false; 4821 4822 if (obj->base.filp == NULL) 4823 return true; 4824 4825 /* At first glance, this looks racy, but then again so would be 4826 * userspace racing mmap against close. However, the first external 4827 * reference to the filp can only be obtained through the 4828 * i915_gem_mmap_ioctl() which safeguards us against the user 4829 * acquiring such a reference whilst we are in the middle of 4830 * freeing the object. 4831 */ 4832 return atomic_long_read(&obj->base.filp->f_count) == 1; 4833 } 4834 4835 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4836 struct llist_node *freed) 4837 { 4838 struct drm_i915_gem_object *obj, *on; 4839 4840 intel_runtime_pm_get(i915); 4841 llist_for_each_entry_safe(obj, on, freed, freed) { 4842 struct i915_vma *vma, *vn; 4843 4844 trace_i915_gem_object_destroy(obj); 4845 4846 mutex_lock(&i915->drm.struct_mutex); 4847 4848 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4849 list_for_each_entry_safe(vma, vn, 4850 &obj->vma_list, obj_link) { 4851 GEM_BUG_ON(i915_vma_is_active(vma)); 4852 vma->flags &= ~I915_VMA_PIN_MASK; 4853 i915_vma_destroy(vma); 4854 } 4855 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4856 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4857 4858 /* This serializes freeing with the shrinker. Since the free 4859 * is delayed, first by RCU then by the workqueue, we want the 4860 * shrinker to be able to free pages of unreferenced objects, 4861 * or else we may oom whilst there are plenty of deferred 4862 * freed objects. 4863 */ 4864 if (i915_gem_object_has_pages(obj)) { 4865 spin_lock(&i915->mm.obj_lock); 4866 list_del_init(&obj->mm.link); 4867 spin_unlock(&i915->mm.obj_lock); 4868 } 4869 4870 mutex_unlock(&i915->drm.struct_mutex); 4871 4872 GEM_BUG_ON(obj->bind_count); 4873 GEM_BUG_ON(obj->userfault_count); 4874 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4875 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4876 4877 if (obj->ops->release) 4878 obj->ops->release(obj); 4879 4880 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4881 atomic_set(&obj->mm.pages_pin_count, 0); 4882 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4883 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4884 4885 if (obj->base.import_attach) 4886 drm_prime_gem_destroy(&obj->base, NULL); 4887 4888 reservation_object_fini(&obj->__builtin_resv); 4889 drm_gem_object_release(&obj->base); 4890 i915_gem_info_remove_obj(i915, obj->base.size); 4891 4892 kfree(obj->bit_17); 4893 i915_gem_object_free(obj); 4894 4895 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4896 atomic_dec(&i915->mm.free_count); 4897 4898 if (on) 4899 cond_resched(); 4900 } 4901 intel_runtime_pm_put(i915); 4902 } 4903 4904 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4905 { 4906 struct llist_node *freed; 4907 4908 /* Free the oldest, most stale object to keep the free_list short */ 4909 freed = NULL; 4910 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4911 /* Only one consumer of llist_del_first() allowed */ 4912 spin_lock(&i915->mm.free_lock); 4913 freed = llist_del_first(&i915->mm.free_list); 4914 spin_unlock(&i915->mm.free_lock); 4915 } 4916 if (unlikely(freed)) { 4917 freed->next = NULL; 4918 __i915_gem_free_objects(i915, freed); 4919 } 4920 } 4921 4922 static void __i915_gem_free_work(struct work_struct *work) 4923 { 4924 struct drm_i915_private *i915 = 4925 container_of(work, struct drm_i915_private, mm.free_work); 4926 struct llist_node *freed; 4927 4928 /* 4929 * All file-owned VMA should have been released by this point through 4930 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4931 * However, the object may also be bound into the global GTT (e.g. 4932 * older GPUs without per-process support, or for direct access through 4933 * the GTT either for the user or for scanout). Those VMA still need to 4934 * unbound now. 4935 */ 4936 4937 spin_lock(&i915->mm.free_lock); 4938 while ((freed = llist_del_all(&i915->mm.free_list))) { 4939 spin_unlock(&i915->mm.free_lock); 4940 4941 __i915_gem_free_objects(i915, freed); 4942 if (need_resched()) 4943 return; 4944 4945 spin_lock(&i915->mm.free_lock); 4946 } 4947 spin_unlock(&i915->mm.free_lock); 4948 } 4949 4950 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4951 { 4952 struct drm_i915_gem_object *obj = 4953 container_of(head, typeof(*obj), rcu); 4954 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4955 4956 /* 4957 * Since we require blocking on struct_mutex to unbind the freed 4958 * object from the GPU before releasing resources back to the 4959 * system, we can not do that directly from the RCU callback (which may 4960 * be a softirq context), but must instead then defer that work onto a 4961 * kthread. We use the RCU callback rather than move the freed object 4962 * directly onto the work queue so that we can mix between using the 4963 * worker and performing frees directly from subsequent allocations for 4964 * crude but effective memory throttling. 4965 */ 4966 if (llist_add(&obj->freed, &i915->mm.free_list)) 4967 queue_work(i915->wq, &i915->mm.free_work); 4968 } 4969 4970 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4971 { 4972 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4973 4974 if (obj->mm.quirked) 4975 __i915_gem_object_unpin_pages(obj); 4976 4977 if (discard_backing_storage(obj)) 4978 obj->mm.madv = I915_MADV_DONTNEED; 4979 4980 /* 4981 * Before we free the object, make sure any pure RCU-only 4982 * read-side critical sections are complete, e.g. 4983 * i915_gem_busy_ioctl(). For the corresponding synchronized 4984 * lookup see i915_gem_object_lookup_rcu(). 4985 */ 4986 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4987 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4988 } 4989 4990 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4991 { 4992 lockdep_assert_held(&obj->base.dev->struct_mutex); 4993 4994 if (!i915_gem_object_has_active_reference(obj) && 4995 i915_gem_object_is_active(obj)) 4996 i915_gem_object_set_active_reference(obj); 4997 else 4998 i915_gem_object_put(obj); 4999 } 5000 5001 void i915_gem_sanitize(struct drm_i915_private *i915) 5002 { 5003 int err; 5004 5005 GEM_TRACE("\n"); 5006 5007 mutex_lock(&i915->drm.struct_mutex); 5008 5009 intel_runtime_pm_get(i915); 5010 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5011 5012 /* 5013 * As we have just resumed the machine and woken the device up from 5014 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5015 * back to defaults, recovering from whatever wedged state we left it 5016 * in and so worth trying to use the device once more. 5017 */ 5018 if (i915_terminally_wedged(&i915->gpu_error)) 5019 i915_gem_unset_wedged(i915); 5020 5021 /* 5022 * If we inherit context state from the BIOS or earlier occupants 5023 * of the GPU, the GPU may be in an inconsistent state when we 5024 * try to take over. The only way to remove the earlier state 5025 * is by resetting. However, resetting on earlier gen is tricky as 5026 * it may impact the display and we are uncertain about the stability 5027 * of the reset, so this could be applied to even earlier gen. 5028 */ 5029 err = -ENODEV; 5030 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5031 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5032 if (!err) 5033 intel_engines_sanitize(i915); 5034 5035 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5036 intel_runtime_pm_put(i915); 5037 5038 i915_gem_contexts_lost(i915); 5039 mutex_unlock(&i915->drm.struct_mutex); 5040 } 5041 5042 int i915_gem_suspend(struct drm_i915_private *i915) 5043 { 5044 int ret; 5045 5046 GEM_TRACE("\n"); 5047 5048 intel_runtime_pm_get(i915); 5049 intel_suspend_gt_powersave(i915); 5050 5051 mutex_lock(&i915->drm.struct_mutex); 5052 5053 /* 5054 * We have to flush all the executing contexts to main memory so 5055 * that they can saved in the hibernation image. To ensure the last 5056 * context image is coherent, we have to switch away from it. That 5057 * leaves the i915->kernel_context still active when 5058 * we actually suspend, and its image in memory may not match the GPU 5059 * state. Fortunately, the kernel_context is disposable and we do 5060 * not rely on its state. 5061 */ 5062 if (!i915_terminally_wedged(&i915->gpu_error)) { 5063 ret = i915_gem_switch_to_kernel_context(i915); 5064 if (ret) 5065 goto err_unlock; 5066 5067 ret = i915_gem_wait_for_idle(i915, 5068 I915_WAIT_INTERRUPTIBLE | 5069 I915_WAIT_LOCKED | 5070 I915_WAIT_FOR_IDLE_BOOST, 5071 MAX_SCHEDULE_TIMEOUT); 5072 if (ret && ret != -EIO) 5073 goto err_unlock; 5074 5075 assert_kernel_context_is_current(i915); 5076 } 5077 i915_retire_requests(i915); /* ensure we flush after wedging */ 5078 5079 mutex_unlock(&i915->drm.struct_mutex); 5080 5081 intel_uc_suspend(i915); 5082 5083 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5084 cancel_delayed_work_sync(&i915->gt.retire_work); 5085 5086 /* 5087 * As the idle_work is rearming if it detects a race, play safe and 5088 * repeat the flush until it is definitely idle. 5089 */ 5090 drain_delayed_work(&i915->gt.idle_work); 5091 5092 /* 5093 * Assert that we successfully flushed all the work and 5094 * reset the GPU back to its idle, low power state. 5095 */ 5096 WARN_ON(i915->gt.awake); 5097 if (WARN_ON(!intel_engines_are_idle(i915))) 5098 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5099 5100 intel_runtime_pm_put(i915); 5101 return 0; 5102 5103 err_unlock: 5104 mutex_unlock(&i915->drm.struct_mutex); 5105 intel_runtime_pm_put(i915); 5106 return ret; 5107 } 5108 5109 void i915_gem_suspend_late(struct drm_i915_private *i915) 5110 { 5111 struct drm_i915_gem_object *obj; 5112 struct list_head *phases[] = { 5113 &i915->mm.unbound_list, 5114 &i915->mm.bound_list, 5115 NULL 5116 }, **phase; 5117 5118 /* 5119 * Neither the BIOS, ourselves or any other kernel 5120 * expects the system to be in execlists mode on startup, 5121 * so we need to reset the GPU back to legacy mode. And the only 5122 * known way to disable logical contexts is through a GPU reset. 5123 * 5124 * So in order to leave the system in a known default configuration, 5125 * always reset the GPU upon unload and suspend. Afterwards we then 5126 * clean up the GEM state tracking, flushing off the requests and 5127 * leaving the system in a known idle state. 5128 * 5129 * Note that is of the upmost importance that the GPU is idle and 5130 * all stray writes are flushed *before* we dismantle the backing 5131 * storage for the pinned objects. 5132 * 5133 * However, since we are uncertain that resetting the GPU on older 5134 * machines is a good idea, we don't - just in case it leaves the 5135 * machine in an unusable condition. 5136 */ 5137 5138 mutex_lock(&i915->drm.struct_mutex); 5139 for (phase = phases; *phase; phase++) { 5140 list_for_each_entry(obj, *phase, mm.link) 5141 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5142 } 5143 mutex_unlock(&i915->drm.struct_mutex); 5144 5145 intel_uc_sanitize(i915); 5146 i915_gem_sanitize(i915); 5147 } 5148 5149 void i915_gem_resume(struct drm_i915_private *i915) 5150 { 5151 GEM_TRACE("\n"); 5152 5153 WARN_ON(i915->gt.awake); 5154 5155 mutex_lock(&i915->drm.struct_mutex); 5156 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5157 5158 i915_gem_restore_gtt_mappings(i915); 5159 i915_gem_restore_fences(i915); 5160 5161 /* 5162 * As we didn't flush the kernel context before suspend, we cannot 5163 * guarantee that the context image is complete. So let's just reset 5164 * it and start again. 5165 */ 5166 i915->gt.resume(i915); 5167 5168 if (i915_gem_init_hw(i915)) 5169 goto err_wedged; 5170 5171 intel_uc_resume(i915); 5172 5173 /* Always reload a context for powersaving. */ 5174 if (i915_gem_switch_to_kernel_context(i915)) 5175 goto err_wedged; 5176 5177 out_unlock: 5178 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5179 mutex_unlock(&i915->drm.struct_mutex); 5180 return; 5181 5182 err_wedged: 5183 if (!i915_terminally_wedged(&i915->gpu_error)) { 5184 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5185 i915_gem_set_wedged(i915); 5186 } 5187 goto out_unlock; 5188 } 5189 5190 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5191 { 5192 if (INTEL_GEN(dev_priv) < 5 || 5193 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5194 return; 5195 5196 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5197 DISP_TILE_SURFACE_SWIZZLING); 5198 5199 if (IS_GEN5(dev_priv)) 5200 return; 5201 5202 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5203 if (IS_GEN6(dev_priv)) 5204 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5205 else if (IS_GEN7(dev_priv)) 5206 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5207 else if (IS_GEN8(dev_priv)) 5208 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5209 else 5210 BUG(); 5211 } 5212 5213 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5214 { 5215 I915_WRITE(RING_CTL(base), 0); 5216 I915_WRITE(RING_HEAD(base), 0); 5217 I915_WRITE(RING_TAIL(base), 0); 5218 I915_WRITE(RING_START(base), 0); 5219 } 5220 5221 static void init_unused_rings(struct drm_i915_private *dev_priv) 5222 { 5223 if (IS_I830(dev_priv)) { 5224 init_unused_ring(dev_priv, PRB1_BASE); 5225 init_unused_ring(dev_priv, SRB0_BASE); 5226 init_unused_ring(dev_priv, SRB1_BASE); 5227 init_unused_ring(dev_priv, SRB2_BASE); 5228 init_unused_ring(dev_priv, SRB3_BASE); 5229 } else if (IS_GEN2(dev_priv)) { 5230 init_unused_ring(dev_priv, SRB0_BASE); 5231 init_unused_ring(dev_priv, SRB1_BASE); 5232 } else if (IS_GEN3(dev_priv)) { 5233 init_unused_ring(dev_priv, PRB1_BASE); 5234 init_unused_ring(dev_priv, PRB2_BASE); 5235 } 5236 } 5237 5238 static int __i915_gem_restart_engines(void *data) 5239 { 5240 struct drm_i915_private *i915 = data; 5241 struct intel_engine_cs *engine; 5242 enum intel_engine_id id; 5243 int err; 5244 5245 for_each_engine(engine, i915, id) { 5246 err = engine->init_hw(engine); 5247 if (err) { 5248 DRM_ERROR("Failed to restart %s (%d)\n", 5249 engine->name, err); 5250 return err; 5251 } 5252 } 5253 5254 return 0; 5255 } 5256 5257 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5258 { 5259 int ret; 5260 5261 dev_priv->gt.last_init_time = ktime_get(); 5262 5263 /* Double layer security blanket, see i915_gem_init() */ 5264 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5265 5266 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5267 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5268 5269 if (IS_HASWELL(dev_priv)) 5270 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5271 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5272 5273 if (HAS_PCH_NOP(dev_priv)) { 5274 if (IS_IVYBRIDGE(dev_priv)) { 5275 u32 temp = I915_READ(GEN7_MSG_CTL); 5276 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5277 I915_WRITE(GEN7_MSG_CTL, temp); 5278 } else if (INTEL_GEN(dev_priv) >= 7) { 5279 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5280 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5281 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5282 } 5283 } 5284 5285 intel_gt_workarounds_apply(dev_priv); 5286 5287 i915_gem_init_swizzling(dev_priv); 5288 5289 /* 5290 * At least 830 can leave some of the unused rings 5291 * "active" (ie. head != tail) after resume which 5292 * will prevent c3 entry. Makes sure all unused rings 5293 * are totally idle. 5294 */ 5295 init_unused_rings(dev_priv); 5296 5297 BUG_ON(!dev_priv->kernel_context); 5298 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5299 ret = -EIO; 5300 goto out; 5301 } 5302 5303 ret = i915_ppgtt_init_hw(dev_priv); 5304 if (ret) { 5305 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5306 goto out; 5307 } 5308 5309 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5310 if (ret) { 5311 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5312 goto out; 5313 } 5314 5315 /* We can't enable contexts until all firmware is loaded */ 5316 ret = intel_uc_init_hw(dev_priv); 5317 if (ret) { 5318 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5319 goto out; 5320 } 5321 5322 intel_mocs_init_l3cc_table(dev_priv); 5323 5324 /* Only when the HW is re-initialised, can we replay the requests */ 5325 ret = __i915_gem_restart_engines(dev_priv); 5326 if (ret) 5327 goto cleanup_uc; 5328 5329 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5330 5331 return 0; 5332 5333 cleanup_uc: 5334 intel_uc_fini_hw(dev_priv); 5335 out: 5336 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5337 5338 return ret; 5339 } 5340 5341 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5342 { 5343 struct i915_gem_context *ctx; 5344 struct intel_engine_cs *engine; 5345 enum intel_engine_id id; 5346 int err; 5347 5348 /* 5349 * As we reset the gpu during very early sanitisation, the current 5350 * register state on the GPU should reflect its defaults values. 5351 * We load a context onto the hw (with restore-inhibit), then switch 5352 * over to a second context to save that default register state. We 5353 * can then prime every new context with that state so they all start 5354 * from the same default HW values. 5355 */ 5356 5357 ctx = i915_gem_context_create_kernel(i915, 0); 5358 if (IS_ERR(ctx)) 5359 return PTR_ERR(ctx); 5360 5361 for_each_engine(engine, i915, id) { 5362 struct i915_request *rq; 5363 5364 rq = i915_request_alloc(engine, ctx); 5365 if (IS_ERR(rq)) { 5366 err = PTR_ERR(rq); 5367 goto out_ctx; 5368 } 5369 5370 err = 0; 5371 if (engine->init_context) 5372 err = engine->init_context(rq); 5373 5374 i915_request_add(rq); 5375 if (err) 5376 goto err_active; 5377 } 5378 5379 err = i915_gem_switch_to_kernel_context(i915); 5380 if (err) 5381 goto err_active; 5382 5383 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5384 i915_gem_set_wedged(i915); 5385 err = -EIO; /* Caller will declare us wedged */ 5386 goto err_active; 5387 } 5388 5389 assert_kernel_context_is_current(i915); 5390 5391 for_each_engine(engine, i915, id) { 5392 struct i915_vma *state; 5393 5394 state = to_intel_context(ctx, engine)->state; 5395 if (!state) 5396 continue; 5397 5398 /* 5399 * As we will hold a reference to the logical state, it will 5400 * not be torn down with the context, and importantly the 5401 * object will hold onto its vma (making it possible for a 5402 * stray GTT write to corrupt our defaults). Unmap the vma 5403 * from the GTT to prevent such accidents and reclaim the 5404 * space. 5405 */ 5406 err = i915_vma_unbind(state); 5407 if (err) 5408 goto err_active; 5409 5410 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5411 if (err) 5412 goto err_active; 5413 5414 engine->default_state = i915_gem_object_get(state->obj); 5415 } 5416 5417 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5418 unsigned int found = intel_engines_has_context_isolation(i915); 5419 5420 /* 5421 * Make sure that classes with multiple engine instances all 5422 * share the same basic configuration. 5423 */ 5424 for_each_engine(engine, i915, id) { 5425 unsigned int bit = BIT(engine->uabi_class); 5426 unsigned int expected = engine->default_state ? bit : 0; 5427 5428 if ((found & bit) != expected) { 5429 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5430 engine->uabi_class, engine->name); 5431 } 5432 } 5433 } 5434 5435 out_ctx: 5436 i915_gem_context_set_closed(ctx); 5437 i915_gem_context_put(ctx); 5438 return err; 5439 5440 err_active: 5441 /* 5442 * If we have to abandon now, we expect the engines to be idle 5443 * and ready to be torn-down. First try to flush any remaining 5444 * request, ensure we are pointing at the kernel context and 5445 * then remove it. 5446 */ 5447 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5448 goto out_ctx; 5449 5450 if (WARN_ON(i915_gem_wait_for_idle(i915, 5451 I915_WAIT_LOCKED, 5452 MAX_SCHEDULE_TIMEOUT))) 5453 goto out_ctx; 5454 5455 i915_gem_contexts_lost(i915); 5456 goto out_ctx; 5457 } 5458 5459 int i915_gem_init(struct drm_i915_private *dev_priv) 5460 { 5461 int ret; 5462 5463 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5464 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5465 mkwrite_device_info(dev_priv)->page_sizes = 5466 I915_GTT_PAGE_SIZE_4K; 5467 5468 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5469 5470 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5471 dev_priv->gt.resume = intel_lr_context_resume; 5472 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5473 } else { 5474 dev_priv->gt.resume = intel_legacy_submission_resume; 5475 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5476 } 5477 5478 ret = i915_gem_init_userptr(dev_priv); 5479 if (ret) 5480 return ret; 5481 5482 ret = intel_uc_init_misc(dev_priv); 5483 if (ret) 5484 return ret; 5485 5486 ret = intel_wopcm_init(&dev_priv->wopcm); 5487 if (ret) 5488 goto err_uc_misc; 5489 5490 /* This is just a security blanket to placate dragons. 5491 * On some systems, we very sporadically observe that the first TLBs 5492 * used by the CS may be stale, despite us poking the TLB reset. If 5493 * we hold the forcewake during initialisation these problems 5494 * just magically go away. 5495 */ 5496 mutex_lock(&dev_priv->drm.struct_mutex); 5497 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5498 5499 ret = i915_gem_init_ggtt(dev_priv); 5500 if (ret) { 5501 GEM_BUG_ON(ret == -EIO); 5502 goto err_unlock; 5503 } 5504 5505 ret = i915_gem_contexts_init(dev_priv); 5506 if (ret) { 5507 GEM_BUG_ON(ret == -EIO); 5508 goto err_ggtt; 5509 } 5510 5511 ret = intel_engines_init(dev_priv); 5512 if (ret) { 5513 GEM_BUG_ON(ret == -EIO); 5514 goto err_context; 5515 } 5516 5517 intel_init_gt_powersave(dev_priv); 5518 5519 ret = intel_uc_init(dev_priv); 5520 if (ret) 5521 goto err_pm; 5522 5523 ret = i915_gem_init_hw(dev_priv); 5524 if (ret) 5525 goto err_uc_init; 5526 5527 /* 5528 * Despite its name intel_init_clock_gating applies both display 5529 * clock gating workarounds; GT mmio workarounds and the occasional 5530 * GT power context workaround. Worse, sometimes it includes a context 5531 * register workaround which we need to apply before we record the 5532 * default HW state for all contexts. 5533 * 5534 * FIXME: break up the workarounds and apply them at the right time! 5535 */ 5536 intel_init_clock_gating(dev_priv); 5537 5538 ret = __intel_engines_record_defaults(dev_priv); 5539 if (ret) 5540 goto err_init_hw; 5541 5542 if (i915_inject_load_failure()) { 5543 ret = -ENODEV; 5544 goto err_init_hw; 5545 } 5546 5547 if (i915_inject_load_failure()) { 5548 ret = -EIO; 5549 goto err_init_hw; 5550 } 5551 5552 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5553 mutex_unlock(&dev_priv->drm.struct_mutex); 5554 5555 return 0; 5556 5557 /* 5558 * Unwinding is complicated by that we want to handle -EIO to mean 5559 * disable GPU submission but keep KMS alive. We want to mark the 5560 * HW as irrevisibly wedged, but keep enough state around that the 5561 * driver doesn't explode during runtime. 5562 */ 5563 err_init_hw: 5564 mutex_unlock(&dev_priv->drm.struct_mutex); 5565 5566 WARN_ON(i915_gem_suspend(dev_priv)); 5567 i915_gem_suspend_late(dev_priv); 5568 5569 i915_gem_drain_workqueue(dev_priv); 5570 5571 mutex_lock(&dev_priv->drm.struct_mutex); 5572 intel_uc_fini_hw(dev_priv); 5573 err_uc_init: 5574 intel_uc_fini(dev_priv); 5575 err_pm: 5576 if (ret != -EIO) { 5577 intel_cleanup_gt_powersave(dev_priv); 5578 i915_gem_cleanup_engines(dev_priv); 5579 } 5580 err_context: 5581 if (ret != -EIO) 5582 i915_gem_contexts_fini(dev_priv); 5583 err_ggtt: 5584 err_unlock: 5585 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5586 mutex_unlock(&dev_priv->drm.struct_mutex); 5587 5588 err_uc_misc: 5589 intel_uc_fini_misc(dev_priv); 5590 5591 if (ret != -EIO) 5592 i915_gem_cleanup_userptr(dev_priv); 5593 5594 if (ret == -EIO) { 5595 /* 5596 * Allow engine initialisation to fail by marking the GPU as 5597 * wedged. But we only want to do this where the GPU is angry, 5598 * for all other failure, such as an allocation failure, bail. 5599 */ 5600 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5601 i915_load_error(dev_priv, 5602 "Failed to initialize GPU, declaring it wedged!\n"); 5603 i915_gem_set_wedged(dev_priv); 5604 } 5605 ret = 0; 5606 } 5607 5608 i915_gem_drain_freed_objects(dev_priv); 5609 return ret; 5610 } 5611 5612 void i915_gem_fini(struct drm_i915_private *dev_priv) 5613 { 5614 i915_gem_suspend_late(dev_priv); 5615 5616 /* Flush any outstanding unpin_work. */ 5617 i915_gem_drain_workqueue(dev_priv); 5618 5619 mutex_lock(&dev_priv->drm.struct_mutex); 5620 intel_uc_fini_hw(dev_priv); 5621 intel_uc_fini(dev_priv); 5622 i915_gem_cleanup_engines(dev_priv); 5623 i915_gem_contexts_fini(dev_priv); 5624 mutex_unlock(&dev_priv->drm.struct_mutex); 5625 5626 intel_uc_fini_misc(dev_priv); 5627 i915_gem_cleanup_userptr(dev_priv); 5628 5629 i915_gem_drain_freed_objects(dev_priv); 5630 5631 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5632 } 5633 5634 void i915_gem_init_mmio(struct drm_i915_private *i915) 5635 { 5636 i915_gem_sanitize(i915); 5637 } 5638 5639 void 5640 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5641 { 5642 struct intel_engine_cs *engine; 5643 enum intel_engine_id id; 5644 5645 for_each_engine(engine, dev_priv, id) 5646 dev_priv->gt.cleanup_engine(engine); 5647 } 5648 5649 void 5650 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5651 { 5652 int i; 5653 5654 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5655 !IS_CHERRYVIEW(dev_priv)) 5656 dev_priv->num_fence_regs = 32; 5657 else if (INTEL_GEN(dev_priv) >= 4 || 5658 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5659 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5660 dev_priv->num_fence_regs = 16; 5661 else 5662 dev_priv->num_fence_regs = 8; 5663 5664 if (intel_vgpu_active(dev_priv)) 5665 dev_priv->num_fence_regs = 5666 I915_READ(vgtif_reg(avail_rs.fence_num)); 5667 5668 /* Initialize fence registers to zero */ 5669 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5670 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5671 5672 fence->i915 = dev_priv; 5673 fence->id = i; 5674 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5675 } 5676 i915_gem_restore_fences(dev_priv); 5677 5678 i915_gem_detect_bit_6_swizzle(dev_priv); 5679 } 5680 5681 static void i915_gem_init__mm(struct drm_i915_private *i915) 5682 { 5683 spin_lock_init(&i915->mm.object_stat_lock); 5684 spin_lock_init(&i915->mm.obj_lock); 5685 spin_lock_init(&i915->mm.free_lock); 5686 5687 init_llist_head(&i915->mm.free_list); 5688 5689 INIT_LIST_HEAD(&i915->mm.unbound_list); 5690 INIT_LIST_HEAD(&i915->mm.bound_list); 5691 INIT_LIST_HEAD(&i915->mm.fence_list); 5692 INIT_LIST_HEAD(&i915->mm.userfault_list); 5693 5694 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5695 } 5696 5697 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5698 { 5699 int err = -ENOMEM; 5700 5701 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5702 if (!dev_priv->objects) 5703 goto err_out; 5704 5705 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5706 if (!dev_priv->vmas) 5707 goto err_objects; 5708 5709 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5710 if (!dev_priv->luts) 5711 goto err_vmas; 5712 5713 dev_priv->requests = KMEM_CACHE(i915_request, 5714 SLAB_HWCACHE_ALIGN | 5715 SLAB_RECLAIM_ACCOUNT | 5716 SLAB_TYPESAFE_BY_RCU); 5717 if (!dev_priv->requests) 5718 goto err_luts; 5719 5720 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5721 SLAB_HWCACHE_ALIGN | 5722 SLAB_RECLAIM_ACCOUNT); 5723 if (!dev_priv->dependencies) 5724 goto err_requests; 5725 5726 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5727 if (!dev_priv->priorities) 5728 goto err_dependencies; 5729 5730 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5731 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5732 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5733 5734 i915_gem_init__mm(dev_priv); 5735 5736 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5737 i915_gem_retire_work_handler); 5738 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5739 i915_gem_idle_work_handler); 5740 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5741 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5742 5743 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5744 5745 spin_lock_init(&dev_priv->fb_tracking.lock); 5746 5747 err = i915_gemfs_init(dev_priv); 5748 if (err) 5749 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5750 5751 return 0; 5752 5753 err_dependencies: 5754 kmem_cache_destroy(dev_priv->dependencies); 5755 err_requests: 5756 kmem_cache_destroy(dev_priv->requests); 5757 err_luts: 5758 kmem_cache_destroy(dev_priv->luts); 5759 err_vmas: 5760 kmem_cache_destroy(dev_priv->vmas); 5761 err_objects: 5762 kmem_cache_destroy(dev_priv->objects); 5763 err_out: 5764 return err; 5765 } 5766 5767 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5768 { 5769 i915_gem_drain_freed_objects(dev_priv); 5770 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5771 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5772 WARN_ON(dev_priv->mm.object_count); 5773 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5774 5775 kmem_cache_destroy(dev_priv->priorities); 5776 kmem_cache_destroy(dev_priv->dependencies); 5777 kmem_cache_destroy(dev_priv->requests); 5778 kmem_cache_destroy(dev_priv->luts); 5779 kmem_cache_destroy(dev_priv->vmas); 5780 kmem_cache_destroy(dev_priv->objects); 5781 5782 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5783 rcu_barrier(); 5784 5785 i915_gemfs_fini(dev_priv); 5786 } 5787 5788 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5789 { 5790 /* Discard all purgeable objects, let userspace recover those as 5791 * required after resuming. 5792 */ 5793 i915_gem_shrink_all(dev_priv); 5794 5795 return 0; 5796 } 5797 5798 int i915_gem_freeze_late(struct drm_i915_private *i915) 5799 { 5800 struct drm_i915_gem_object *obj; 5801 struct list_head *phases[] = { 5802 &i915->mm.unbound_list, 5803 &i915->mm.bound_list, 5804 NULL 5805 }, **phase; 5806 5807 /* 5808 * Called just before we write the hibernation image. 5809 * 5810 * We need to update the domain tracking to reflect that the CPU 5811 * will be accessing all the pages to create and restore from the 5812 * hibernation, and so upon restoration those pages will be in the 5813 * CPU domain. 5814 * 5815 * To make sure the hibernation image contains the latest state, 5816 * we update that state just before writing out the image. 5817 * 5818 * To try and reduce the hibernation image, we manually shrink 5819 * the objects as well, see i915_gem_freeze() 5820 */ 5821 5822 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5823 i915_gem_drain_freed_objects(i915); 5824 5825 mutex_lock(&i915->drm.struct_mutex); 5826 for (phase = phases; *phase; phase++) { 5827 list_for_each_entry(obj, *phase, mm.link) 5828 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5829 } 5830 mutex_unlock(&i915->drm.struct_mutex); 5831 5832 return 0; 5833 } 5834 5835 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5836 { 5837 struct drm_i915_file_private *file_priv = file->driver_priv; 5838 struct i915_request *request; 5839 5840 /* Clean up our request list when the client is going away, so that 5841 * later retire_requests won't dereference our soon-to-be-gone 5842 * file_priv. 5843 */ 5844 spin_lock(&file_priv->mm.lock); 5845 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5846 request->file_priv = NULL; 5847 spin_unlock(&file_priv->mm.lock); 5848 } 5849 5850 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5851 { 5852 struct drm_i915_file_private *file_priv; 5853 int ret; 5854 5855 DRM_DEBUG("\n"); 5856 5857 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5858 if (!file_priv) 5859 return -ENOMEM; 5860 5861 file->driver_priv = file_priv; 5862 file_priv->dev_priv = i915; 5863 file_priv->file = file; 5864 5865 spin_lock_init(&file_priv->mm.lock); 5866 INIT_LIST_HEAD(&file_priv->mm.request_list); 5867 5868 file_priv->bsd_engine = -1; 5869 file_priv->hang_timestamp = jiffies; 5870 5871 ret = i915_gem_context_open(i915, file); 5872 if (ret) 5873 kfree(file_priv); 5874 5875 return ret; 5876 } 5877 5878 /** 5879 * i915_gem_track_fb - update frontbuffer tracking 5880 * @old: current GEM buffer for the frontbuffer slots 5881 * @new: new GEM buffer for the frontbuffer slots 5882 * @frontbuffer_bits: bitmask of frontbuffer slots 5883 * 5884 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5885 * from @old and setting them in @new. Both @old and @new can be NULL. 5886 */ 5887 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5888 struct drm_i915_gem_object *new, 5889 unsigned frontbuffer_bits) 5890 { 5891 /* Control of individual bits within the mask are guarded by 5892 * the owning plane->mutex, i.e. we can never see concurrent 5893 * manipulation of individual bits. But since the bitfield as a whole 5894 * is updated using RMW, we need to use atomics in order to update 5895 * the bits. 5896 */ 5897 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5898 sizeof(atomic_t) * BITS_PER_BYTE); 5899 5900 if (old) { 5901 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5902 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5903 } 5904 5905 if (new) { 5906 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5907 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5908 } 5909 } 5910 5911 /* Allocate a new GEM object and fill it with the supplied data */ 5912 struct drm_i915_gem_object * 5913 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5914 const void *data, size_t size) 5915 { 5916 struct drm_i915_gem_object *obj; 5917 struct file *file; 5918 size_t offset; 5919 int err; 5920 5921 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5922 if (IS_ERR(obj)) 5923 return obj; 5924 5925 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5926 5927 file = obj->base.filp; 5928 offset = 0; 5929 do { 5930 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5931 struct page *page; 5932 void *pgdata, *vaddr; 5933 5934 err = pagecache_write_begin(file, file->f_mapping, 5935 offset, len, 0, 5936 &page, &pgdata); 5937 if (err < 0) 5938 goto fail; 5939 5940 vaddr = kmap(page); 5941 memcpy(vaddr, data, len); 5942 kunmap(page); 5943 5944 err = pagecache_write_end(file, file->f_mapping, 5945 offset, len, len, 5946 page, pgdata); 5947 if (err < 0) 5948 goto fail; 5949 5950 size -= len; 5951 data += len; 5952 offset += len; 5953 } while (size); 5954 5955 return obj; 5956 5957 fail: 5958 i915_gem_object_put(obj); 5959 return ERR_PTR(err); 5960 } 5961 5962 struct scatterlist * 5963 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5964 unsigned int n, 5965 unsigned int *offset) 5966 { 5967 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5968 struct scatterlist *sg; 5969 unsigned int idx, count; 5970 5971 might_sleep(); 5972 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5973 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5974 5975 /* As we iterate forward through the sg, we record each entry in a 5976 * radixtree for quick repeated (backwards) lookups. If we have seen 5977 * this index previously, we will have an entry for it. 5978 * 5979 * Initial lookup is O(N), but this is amortized to O(1) for 5980 * sequential page access (where each new request is consecutive 5981 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5982 * i.e. O(1) with a large constant! 5983 */ 5984 if (n < READ_ONCE(iter->sg_idx)) 5985 goto lookup; 5986 5987 mutex_lock(&iter->lock); 5988 5989 /* We prefer to reuse the last sg so that repeated lookup of this 5990 * (or the subsequent) sg are fast - comparing against the last 5991 * sg is faster than going through the radixtree. 5992 */ 5993 5994 sg = iter->sg_pos; 5995 idx = iter->sg_idx; 5996 count = __sg_page_count(sg); 5997 5998 while (idx + count <= n) { 5999 unsigned long exception, i; 6000 int ret; 6001 6002 /* If we cannot allocate and insert this entry, or the 6003 * individual pages from this range, cancel updating the 6004 * sg_idx so that on this lookup we are forced to linearly 6005 * scan onwards, but on future lookups we will try the 6006 * insertion again (in which case we need to be careful of 6007 * the error return reporting that we have already inserted 6008 * this index). 6009 */ 6010 ret = radix_tree_insert(&iter->radix, idx, sg); 6011 if (ret && ret != -EEXIST) 6012 goto scan; 6013 6014 exception = 6015 RADIX_TREE_EXCEPTIONAL_ENTRY | 6016 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 6017 for (i = 1; i < count; i++) { 6018 ret = radix_tree_insert(&iter->radix, idx + i, 6019 (void *)exception); 6020 if (ret && ret != -EEXIST) 6021 goto scan; 6022 } 6023 6024 idx += count; 6025 sg = ____sg_next(sg); 6026 count = __sg_page_count(sg); 6027 } 6028 6029 scan: 6030 iter->sg_pos = sg; 6031 iter->sg_idx = idx; 6032 6033 mutex_unlock(&iter->lock); 6034 6035 if (unlikely(n < idx)) /* insertion completed by another thread */ 6036 goto lookup; 6037 6038 /* In case we failed to insert the entry into the radixtree, we need 6039 * to look beyond the current sg. 6040 */ 6041 while (idx + count <= n) { 6042 idx += count; 6043 sg = ____sg_next(sg); 6044 count = __sg_page_count(sg); 6045 } 6046 6047 *offset = n - idx; 6048 return sg; 6049 6050 lookup: 6051 rcu_read_lock(); 6052 6053 sg = radix_tree_lookup(&iter->radix, n); 6054 GEM_BUG_ON(!sg); 6055 6056 /* If this index is in the middle of multi-page sg entry, 6057 * the radixtree will contain an exceptional entry that points 6058 * to the start of that range. We will return the pointer to 6059 * the base page and the offset of this page within the 6060 * sg entry's range. 6061 */ 6062 *offset = 0; 6063 if (unlikely(radix_tree_exception(sg))) { 6064 unsigned long base = 6065 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 6066 6067 sg = radix_tree_lookup(&iter->radix, base); 6068 GEM_BUG_ON(!sg); 6069 6070 *offset = n - base; 6071 } 6072 6073 rcu_read_unlock(); 6074 6075 return sg; 6076 } 6077 6078 struct page * 6079 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6080 { 6081 struct scatterlist *sg; 6082 unsigned int offset; 6083 6084 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6085 6086 sg = i915_gem_object_get_sg(obj, n, &offset); 6087 return nth_page(sg_page(sg), offset); 6088 } 6089 6090 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6091 struct page * 6092 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6093 unsigned int n) 6094 { 6095 struct page *page; 6096 6097 page = i915_gem_object_get_page(obj, n); 6098 if (!obj->mm.dirty) 6099 set_page_dirty(page); 6100 6101 return page; 6102 } 6103 6104 dma_addr_t 6105 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6106 unsigned long n) 6107 { 6108 struct scatterlist *sg; 6109 unsigned int offset; 6110 6111 sg = i915_gem_object_get_sg(obj, n, &offset); 6112 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6113 } 6114 6115 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6116 { 6117 struct sg_table *pages; 6118 int err; 6119 6120 if (align > obj->base.size) 6121 return -EINVAL; 6122 6123 if (obj->ops == &i915_gem_phys_ops) 6124 return 0; 6125 6126 if (obj->ops != &i915_gem_object_ops) 6127 return -EINVAL; 6128 6129 err = i915_gem_object_unbind(obj); 6130 if (err) 6131 return err; 6132 6133 mutex_lock(&obj->mm.lock); 6134 6135 if (obj->mm.madv != I915_MADV_WILLNEED) { 6136 err = -EFAULT; 6137 goto err_unlock; 6138 } 6139 6140 if (obj->mm.quirked) { 6141 err = -EFAULT; 6142 goto err_unlock; 6143 } 6144 6145 if (obj->mm.mapping) { 6146 err = -EBUSY; 6147 goto err_unlock; 6148 } 6149 6150 pages = __i915_gem_object_unset_pages(obj); 6151 6152 obj->ops = &i915_gem_phys_ops; 6153 6154 err = ____i915_gem_object_get_pages(obj); 6155 if (err) 6156 goto err_xfer; 6157 6158 /* Perma-pin (until release) the physical set of pages */ 6159 __i915_gem_object_pin_pages(obj); 6160 6161 if (!IS_ERR_OR_NULL(pages)) 6162 i915_gem_object_ops.put_pages(obj, pages); 6163 mutex_unlock(&obj->mm.lock); 6164 return 0; 6165 6166 err_xfer: 6167 obj->ops = &i915_gem_object_ops; 6168 if (!IS_ERR_OR_NULL(pages)) { 6169 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6170 6171 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6172 } 6173 err_unlock: 6174 mutex_unlock(&obj->mm.lock); 6175 return err; 6176 } 6177 6178 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6179 #include "selftests/scatterlist.c" 6180 #include "selftests/mock_gem_device.c" 6181 #include "selftests/huge_gem_object.c" 6182 #include "selftests/huge_pages.c" 6183 #include "selftests/i915_gem_object.c" 6184 #include "selftests/i915_gem_coherency.c" 6185 #endif 6186