1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 wmb(); 806 807 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 808 return; 809 810 i915_gem_chipset_flush(dev_priv); 811 812 intel_runtime_pm_get(dev_priv); 813 spin_lock_irq(&dev_priv->uncore.lock); 814 815 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 816 817 spin_unlock_irq(&dev_priv->uncore.lock); 818 intel_runtime_pm_put(dev_priv); 819 } 820 821 static void 822 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 823 { 824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 825 struct i915_vma *vma; 826 827 if (!(obj->write_domain & flush_domains)) 828 return; 829 830 switch (obj->write_domain) { 831 case I915_GEM_DOMAIN_GTT: 832 i915_gem_flush_ggtt_writes(dev_priv); 833 834 intel_fb_obj_flush(obj, 835 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 836 837 for_each_ggtt_vma(vma, obj) { 838 if (vma->iomap) 839 continue; 840 841 i915_vma_unset_ggtt_write(vma); 842 } 843 break; 844 845 case I915_GEM_DOMAIN_WC: 846 wmb(); 847 break; 848 849 case I915_GEM_DOMAIN_CPU: 850 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 851 break; 852 853 case I915_GEM_DOMAIN_RENDER: 854 if (gpu_write_needs_clflush(obj)) 855 obj->cache_dirty = true; 856 break; 857 } 858 859 obj->write_domain = 0; 860 } 861 862 static inline int 863 __copy_to_user_swizzled(char __user *cpu_vaddr, 864 const char *gpu_vaddr, int gpu_offset, 865 int length) 866 { 867 int ret, cpu_offset = 0; 868 869 while (length > 0) { 870 int cacheline_end = ALIGN(gpu_offset + 1, 64); 871 int this_length = min(cacheline_end - gpu_offset, length); 872 int swizzled_gpu_offset = gpu_offset ^ 64; 873 874 ret = __copy_to_user(cpu_vaddr + cpu_offset, 875 gpu_vaddr + swizzled_gpu_offset, 876 this_length); 877 if (ret) 878 return ret + length; 879 880 cpu_offset += this_length; 881 gpu_offset += this_length; 882 length -= this_length; 883 } 884 885 return 0; 886 } 887 888 static inline int 889 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 890 const char __user *cpu_vaddr, 891 int length) 892 { 893 int ret, cpu_offset = 0; 894 895 while (length > 0) { 896 int cacheline_end = ALIGN(gpu_offset + 1, 64); 897 int this_length = min(cacheline_end - gpu_offset, length); 898 int swizzled_gpu_offset = gpu_offset ^ 64; 899 900 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 901 cpu_vaddr + cpu_offset, 902 this_length); 903 if (ret) 904 return ret + length; 905 906 cpu_offset += this_length; 907 gpu_offset += this_length; 908 length -= this_length; 909 } 910 911 return 0; 912 } 913 914 /* 915 * Pins the specified object's pages and synchronizes the object with 916 * GPU accesses. Sets needs_clflush to non-zero if the caller should 917 * flush the object from the CPU cache. 918 */ 919 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 920 unsigned int *needs_clflush) 921 { 922 int ret; 923 924 lockdep_assert_held(&obj->base.dev->struct_mutex); 925 926 *needs_clflush = 0; 927 if (!i915_gem_object_has_struct_page(obj)) 928 return -ENODEV; 929 930 ret = i915_gem_object_wait(obj, 931 I915_WAIT_INTERRUPTIBLE | 932 I915_WAIT_LOCKED, 933 MAX_SCHEDULE_TIMEOUT, 934 NULL); 935 if (ret) 936 return ret; 937 938 ret = i915_gem_object_pin_pages(obj); 939 if (ret) 940 return ret; 941 942 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 943 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 944 ret = i915_gem_object_set_to_cpu_domain(obj, false); 945 if (ret) 946 goto err_unpin; 947 else 948 goto out; 949 } 950 951 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 952 953 /* If we're not in the cpu read domain, set ourself into the gtt 954 * read domain and manually flush cachelines (if required). This 955 * optimizes for the case when the gpu will dirty the data 956 * anyway again before the next pread happens. 957 */ 958 if (!obj->cache_dirty && 959 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 960 *needs_clflush = CLFLUSH_BEFORE; 961 962 out: 963 /* return with the pages pinned */ 964 return 0; 965 966 err_unpin: 967 i915_gem_object_unpin_pages(obj); 968 return ret; 969 } 970 971 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 972 unsigned int *needs_clflush) 973 { 974 int ret; 975 976 lockdep_assert_held(&obj->base.dev->struct_mutex); 977 978 *needs_clflush = 0; 979 if (!i915_gem_object_has_struct_page(obj)) 980 return -ENODEV; 981 982 ret = i915_gem_object_wait(obj, 983 I915_WAIT_INTERRUPTIBLE | 984 I915_WAIT_LOCKED | 985 I915_WAIT_ALL, 986 MAX_SCHEDULE_TIMEOUT, 987 NULL); 988 if (ret) 989 return ret; 990 991 ret = i915_gem_object_pin_pages(obj); 992 if (ret) 993 return ret; 994 995 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 996 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 997 ret = i915_gem_object_set_to_cpu_domain(obj, true); 998 if (ret) 999 goto err_unpin; 1000 else 1001 goto out; 1002 } 1003 1004 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1005 1006 /* If we're not in the cpu write domain, set ourself into the 1007 * gtt write domain and manually flush cachelines (as required). 1008 * This optimizes for the case when the gpu will use the data 1009 * right away and we therefore have to clflush anyway. 1010 */ 1011 if (!obj->cache_dirty) { 1012 *needs_clflush |= CLFLUSH_AFTER; 1013 1014 /* 1015 * Same trick applies to invalidate partially written 1016 * cachelines read before writing. 1017 */ 1018 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1019 *needs_clflush |= CLFLUSH_BEFORE; 1020 } 1021 1022 out: 1023 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1024 obj->mm.dirty = true; 1025 /* return with the pages pinned */ 1026 return 0; 1027 1028 err_unpin: 1029 i915_gem_object_unpin_pages(obj); 1030 return ret; 1031 } 1032 1033 static void 1034 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1035 bool swizzled) 1036 { 1037 if (unlikely(swizzled)) { 1038 unsigned long start = (unsigned long) addr; 1039 unsigned long end = (unsigned long) addr + length; 1040 1041 /* For swizzling simply ensure that we always flush both 1042 * channels. Lame, but simple and it works. Swizzled 1043 * pwrite/pread is far from a hotpath - current userspace 1044 * doesn't use it at all. */ 1045 start = round_down(start, 128); 1046 end = round_up(end, 128); 1047 1048 drm_clflush_virt_range((void *)start, end - start); 1049 } else { 1050 drm_clflush_virt_range(addr, length); 1051 } 1052 1053 } 1054 1055 /* Only difference to the fast-path function is that this can handle bit17 1056 * and uses non-atomic copy and kmap functions. */ 1057 static int 1058 shmem_pread_slow(struct page *page, int offset, int length, 1059 char __user *user_data, 1060 bool page_do_bit17_swizzling, bool needs_clflush) 1061 { 1062 char *vaddr; 1063 int ret; 1064 1065 vaddr = kmap(page); 1066 if (needs_clflush) 1067 shmem_clflush_swizzled_range(vaddr + offset, length, 1068 page_do_bit17_swizzling); 1069 1070 if (page_do_bit17_swizzling) 1071 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1072 else 1073 ret = __copy_to_user(user_data, vaddr + offset, length); 1074 kunmap(page); 1075 1076 return ret ? - EFAULT : 0; 1077 } 1078 1079 static int 1080 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1081 bool page_do_bit17_swizzling, bool needs_clflush) 1082 { 1083 int ret; 1084 1085 ret = -ENODEV; 1086 if (!page_do_bit17_swizzling) { 1087 char *vaddr = kmap_atomic(page); 1088 1089 if (needs_clflush) 1090 drm_clflush_virt_range(vaddr + offset, length); 1091 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1092 kunmap_atomic(vaddr); 1093 } 1094 if (ret == 0) 1095 return 0; 1096 1097 return shmem_pread_slow(page, offset, length, user_data, 1098 page_do_bit17_swizzling, needs_clflush); 1099 } 1100 1101 static int 1102 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1103 struct drm_i915_gem_pread *args) 1104 { 1105 char __user *user_data; 1106 u64 remain; 1107 unsigned int obj_do_bit17_swizzling; 1108 unsigned int needs_clflush; 1109 unsigned int idx, offset; 1110 int ret; 1111 1112 obj_do_bit17_swizzling = 0; 1113 if (i915_gem_object_needs_bit17_swizzle(obj)) 1114 obj_do_bit17_swizzling = BIT(17); 1115 1116 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1121 mutex_unlock(&obj->base.dev->struct_mutex); 1122 if (ret) 1123 return ret; 1124 1125 remain = args->size; 1126 user_data = u64_to_user_ptr(args->data_ptr); 1127 offset = offset_in_page(args->offset); 1128 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1129 struct page *page = i915_gem_object_get_page(obj, idx); 1130 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1131 1132 ret = shmem_pread(page, offset, length, user_data, 1133 page_to_phys(page) & obj_do_bit17_swizzling, 1134 needs_clflush); 1135 if (ret) 1136 break; 1137 1138 remain -= length; 1139 user_data += length; 1140 offset = 0; 1141 } 1142 1143 i915_gem_obj_finish_shmem_access(obj); 1144 return ret; 1145 } 1146 1147 static inline bool 1148 gtt_user_read(struct io_mapping *mapping, 1149 loff_t base, int offset, 1150 char __user *user_data, int length) 1151 { 1152 void __iomem *vaddr; 1153 unsigned long unwritten; 1154 1155 /* We can use the cpu mem copy function because this is X86. */ 1156 vaddr = io_mapping_map_atomic_wc(mapping, base); 1157 unwritten = __copy_to_user_inatomic(user_data, 1158 (void __force *)vaddr + offset, 1159 length); 1160 io_mapping_unmap_atomic(vaddr); 1161 if (unwritten) { 1162 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1163 unwritten = copy_to_user(user_data, 1164 (void __force *)vaddr + offset, 1165 length); 1166 io_mapping_unmap(vaddr); 1167 } 1168 return unwritten; 1169 } 1170 1171 static int 1172 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1173 const struct drm_i915_gem_pread *args) 1174 { 1175 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1176 struct i915_ggtt *ggtt = &i915->ggtt; 1177 struct drm_mm_node node; 1178 struct i915_vma *vma; 1179 void __user *user_data; 1180 u64 remain, offset; 1181 int ret; 1182 1183 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1184 if (ret) 1185 return ret; 1186 1187 intel_runtime_pm_get(i915); 1188 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1189 PIN_MAPPABLE | 1190 PIN_NONFAULT | 1191 PIN_NONBLOCK); 1192 if (!IS_ERR(vma)) { 1193 node.start = i915_ggtt_offset(vma); 1194 node.allocated = false; 1195 ret = i915_vma_put_fence(vma); 1196 if (ret) { 1197 i915_vma_unpin(vma); 1198 vma = ERR_PTR(ret); 1199 } 1200 } 1201 if (IS_ERR(vma)) { 1202 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1203 if (ret) 1204 goto out_unlock; 1205 GEM_BUG_ON(!node.allocated); 1206 } 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1209 if (ret) 1210 goto out_unpin; 1211 1212 mutex_unlock(&i915->drm.struct_mutex); 1213 1214 user_data = u64_to_user_ptr(args->data_ptr); 1215 remain = args->size; 1216 offset = args->offset; 1217 1218 while (remain > 0) { 1219 /* Operation in this page 1220 * 1221 * page_base = page offset within aperture 1222 * page_offset = offset within page 1223 * page_length = bytes to copy for this page 1224 */ 1225 u32 page_base = node.start; 1226 unsigned page_offset = offset_in_page(offset); 1227 unsigned page_length = PAGE_SIZE - page_offset; 1228 page_length = remain < page_length ? remain : page_length; 1229 if (node.allocated) { 1230 wmb(); 1231 ggtt->vm.insert_page(&ggtt->vm, 1232 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1233 node.start, I915_CACHE_NONE, 0); 1234 wmb(); 1235 } else { 1236 page_base += offset & PAGE_MASK; 1237 } 1238 1239 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1240 user_data, page_length)) { 1241 ret = -EFAULT; 1242 break; 1243 } 1244 1245 remain -= page_length; 1246 user_data += page_length; 1247 offset += page_length; 1248 } 1249 1250 mutex_lock(&i915->drm.struct_mutex); 1251 out_unpin: 1252 if (node.allocated) { 1253 wmb(); 1254 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1255 remove_mappable_node(&node); 1256 } else { 1257 i915_vma_unpin(vma); 1258 } 1259 out_unlock: 1260 intel_runtime_pm_put(i915); 1261 mutex_unlock(&i915->drm.struct_mutex); 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * Reads data from the object referenced by handle. 1268 * @dev: drm device pointer 1269 * @data: ioctl data blob 1270 * @file: drm file pointer 1271 * 1272 * On error, the contents of *data are undefined. 1273 */ 1274 int 1275 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1276 struct drm_file *file) 1277 { 1278 struct drm_i915_gem_pread *args = data; 1279 struct drm_i915_gem_object *obj; 1280 int ret; 1281 1282 if (args->size == 0) 1283 return 0; 1284 1285 if (!access_ok(u64_to_user_ptr(args->data_ptr), 1286 args->size)) 1287 return -EFAULT; 1288 1289 obj = i915_gem_object_lookup(file, args->handle); 1290 if (!obj) 1291 return -ENOENT; 1292 1293 /* Bounds check source. */ 1294 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 1299 trace_i915_gem_object_pread(obj, args->offset, args->size); 1300 1301 ret = i915_gem_object_wait(obj, 1302 I915_WAIT_INTERRUPTIBLE, 1303 MAX_SCHEDULE_TIMEOUT, 1304 to_rps_client(file)); 1305 if (ret) 1306 goto out; 1307 1308 ret = i915_gem_object_pin_pages(obj); 1309 if (ret) 1310 goto out; 1311 1312 ret = i915_gem_shmem_pread(obj, args); 1313 if (ret == -EFAULT || ret == -ENODEV) 1314 ret = i915_gem_gtt_pread(obj, args); 1315 1316 i915_gem_object_unpin_pages(obj); 1317 out: 1318 i915_gem_object_put(obj); 1319 return ret; 1320 } 1321 1322 /* This is the fast write path which cannot handle 1323 * page faults in the source data 1324 */ 1325 1326 static inline bool 1327 ggtt_write(struct io_mapping *mapping, 1328 loff_t base, int offset, 1329 char __user *user_data, int length) 1330 { 1331 void __iomem *vaddr; 1332 unsigned long unwritten; 1333 1334 /* We can use the cpu mem copy function because this is X86. */ 1335 vaddr = io_mapping_map_atomic_wc(mapping, base); 1336 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1337 user_data, length); 1338 io_mapping_unmap_atomic(vaddr); 1339 if (unwritten) { 1340 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1341 unwritten = copy_from_user((void __force *)vaddr + offset, 1342 user_data, length); 1343 io_mapping_unmap(vaddr); 1344 } 1345 1346 return unwritten; 1347 } 1348 1349 /** 1350 * This is the fast pwrite path, where we copy the data directly from the 1351 * user into the GTT, uncached. 1352 * @obj: i915 GEM object 1353 * @args: pwrite arguments structure 1354 */ 1355 static int 1356 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1357 const struct drm_i915_gem_pwrite *args) 1358 { 1359 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1360 struct i915_ggtt *ggtt = &i915->ggtt; 1361 struct drm_mm_node node; 1362 struct i915_vma *vma; 1363 u64 remain, offset; 1364 void __user *user_data; 1365 int ret; 1366 1367 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1368 if (ret) 1369 return ret; 1370 1371 if (i915_gem_object_has_struct_page(obj)) { 1372 /* 1373 * Avoid waking the device up if we can fallback, as 1374 * waking/resuming is very slow (worst-case 10-100 ms 1375 * depending on PCI sleeps and our own resume time). 1376 * This easily dwarfs any performance advantage from 1377 * using the cache bypass of indirect GGTT access. 1378 */ 1379 if (!intel_runtime_pm_get_if_in_use(i915)) { 1380 ret = -EFAULT; 1381 goto out_unlock; 1382 } 1383 } else { 1384 /* No backing pages, no fallback, we must force GGTT access */ 1385 intel_runtime_pm_get(i915); 1386 } 1387 1388 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1389 PIN_MAPPABLE | 1390 PIN_NONFAULT | 1391 PIN_NONBLOCK); 1392 if (!IS_ERR(vma)) { 1393 node.start = i915_ggtt_offset(vma); 1394 node.allocated = false; 1395 ret = i915_vma_put_fence(vma); 1396 if (ret) { 1397 i915_vma_unpin(vma); 1398 vma = ERR_PTR(ret); 1399 } 1400 } 1401 if (IS_ERR(vma)) { 1402 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1403 if (ret) 1404 goto out_rpm; 1405 GEM_BUG_ON(!node.allocated); 1406 } 1407 1408 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1409 if (ret) 1410 goto out_unpin; 1411 1412 mutex_unlock(&i915->drm.struct_mutex); 1413 1414 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1415 1416 user_data = u64_to_user_ptr(args->data_ptr); 1417 offset = args->offset; 1418 remain = args->size; 1419 while (remain) { 1420 /* Operation in this page 1421 * 1422 * page_base = page offset within aperture 1423 * page_offset = offset within page 1424 * page_length = bytes to copy for this page 1425 */ 1426 u32 page_base = node.start; 1427 unsigned int page_offset = offset_in_page(offset); 1428 unsigned int page_length = PAGE_SIZE - page_offset; 1429 page_length = remain < page_length ? remain : page_length; 1430 if (node.allocated) { 1431 wmb(); /* flush the write before we modify the GGTT */ 1432 ggtt->vm.insert_page(&ggtt->vm, 1433 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1434 node.start, I915_CACHE_NONE, 0); 1435 wmb(); /* flush modifications to the GGTT (insert_page) */ 1436 } else { 1437 page_base += offset & PAGE_MASK; 1438 } 1439 /* If we get a fault while copying data, then (presumably) our 1440 * source page isn't available. Return the error and we'll 1441 * retry in the slow path. 1442 * If the object is non-shmem backed, we retry again with the 1443 * path that handles page fault. 1444 */ 1445 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1446 user_data, page_length)) { 1447 ret = -EFAULT; 1448 break; 1449 } 1450 1451 remain -= page_length; 1452 user_data += page_length; 1453 offset += page_length; 1454 } 1455 intel_fb_obj_flush(obj, ORIGIN_CPU); 1456 1457 mutex_lock(&i915->drm.struct_mutex); 1458 out_unpin: 1459 if (node.allocated) { 1460 wmb(); 1461 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1462 remove_mappable_node(&node); 1463 } else { 1464 i915_vma_unpin(vma); 1465 } 1466 out_rpm: 1467 intel_runtime_pm_put(i915); 1468 out_unlock: 1469 mutex_unlock(&i915->drm.struct_mutex); 1470 return ret; 1471 } 1472 1473 static int 1474 shmem_pwrite_slow(struct page *page, int offset, int length, 1475 char __user *user_data, 1476 bool page_do_bit17_swizzling, 1477 bool needs_clflush_before, 1478 bool needs_clflush_after) 1479 { 1480 char *vaddr; 1481 int ret; 1482 1483 vaddr = kmap(page); 1484 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1485 shmem_clflush_swizzled_range(vaddr + offset, length, 1486 page_do_bit17_swizzling); 1487 if (page_do_bit17_swizzling) 1488 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1489 length); 1490 else 1491 ret = __copy_from_user(vaddr + offset, user_data, length); 1492 if (needs_clflush_after) 1493 shmem_clflush_swizzled_range(vaddr + offset, length, 1494 page_do_bit17_swizzling); 1495 kunmap(page); 1496 1497 return ret ? -EFAULT : 0; 1498 } 1499 1500 /* Per-page copy function for the shmem pwrite fastpath. 1501 * Flushes invalid cachelines before writing to the target if 1502 * needs_clflush_before is set and flushes out any written cachelines after 1503 * writing if needs_clflush is set. 1504 */ 1505 static int 1506 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1507 bool page_do_bit17_swizzling, 1508 bool needs_clflush_before, 1509 bool needs_clflush_after) 1510 { 1511 int ret; 1512 1513 ret = -ENODEV; 1514 if (!page_do_bit17_swizzling) { 1515 char *vaddr = kmap_atomic(page); 1516 1517 if (needs_clflush_before) 1518 drm_clflush_virt_range(vaddr + offset, len); 1519 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1520 if (needs_clflush_after) 1521 drm_clflush_virt_range(vaddr + offset, len); 1522 1523 kunmap_atomic(vaddr); 1524 } 1525 if (ret == 0) 1526 return ret; 1527 1528 return shmem_pwrite_slow(page, offset, len, user_data, 1529 page_do_bit17_swizzling, 1530 needs_clflush_before, 1531 needs_clflush_after); 1532 } 1533 1534 static int 1535 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1536 const struct drm_i915_gem_pwrite *args) 1537 { 1538 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1539 void __user *user_data; 1540 u64 remain; 1541 unsigned int obj_do_bit17_swizzling; 1542 unsigned int partial_cacheline_write; 1543 unsigned int needs_clflush; 1544 unsigned int offset, idx; 1545 int ret; 1546 1547 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1548 if (ret) 1549 return ret; 1550 1551 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1552 mutex_unlock(&i915->drm.struct_mutex); 1553 if (ret) 1554 return ret; 1555 1556 obj_do_bit17_swizzling = 0; 1557 if (i915_gem_object_needs_bit17_swizzle(obj)) 1558 obj_do_bit17_swizzling = BIT(17); 1559 1560 /* If we don't overwrite a cacheline completely we need to be 1561 * careful to have up-to-date data by first clflushing. Don't 1562 * overcomplicate things and flush the entire patch. 1563 */ 1564 partial_cacheline_write = 0; 1565 if (needs_clflush & CLFLUSH_BEFORE) 1566 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1567 1568 user_data = u64_to_user_ptr(args->data_ptr); 1569 remain = args->size; 1570 offset = offset_in_page(args->offset); 1571 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1572 struct page *page = i915_gem_object_get_page(obj, idx); 1573 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1574 1575 ret = shmem_pwrite(page, offset, length, user_data, 1576 page_to_phys(page) & obj_do_bit17_swizzling, 1577 (offset | length) & partial_cacheline_write, 1578 needs_clflush & CLFLUSH_AFTER); 1579 if (ret) 1580 break; 1581 1582 remain -= length; 1583 user_data += length; 1584 offset = 0; 1585 } 1586 1587 intel_fb_obj_flush(obj, ORIGIN_CPU); 1588 i915_gem_obj_finish_shmem_access(obj); 1589 return ret; 1590 } 1591 1592 /** 1593 * Writes data to the object referenced by handle. 1594 * @dev: drm device 1595 * @data: ioctl data blob 1596 * @file: drm file 1597 * 1598 * On error, the contents of the buffer that were to be modified are undefined. 1599 */ 1600 int 1601 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1602 struct drm_file *file) 1603 { 1604 struct drm_i915_gem_pwrite *args = data; 1605 struct drm_i915_gem_object *obj; 1606 int ret; 1607 1608 if (args->size == 0) 1609 return 0; 1610 1611 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 1612 return -EFAULT; 1613 1614 obj = i915_gem_object_lookup(file, args->handle); 1615 if (!obj) 1616 return -ENOENT; 1617 1618 /* Bounds check destination. */ 1619 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1620 ret = -EINVAL; 1621 goto err; 1622 } 1623 1624 /* Writes not allowed into this read-only object */ 1625 if (i915_gem_object_is_readonly(obj)) { 1626 ret = -EINVAL; 1627 goto err; 1628 } 1629 1630 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1631 1632 ret = -ENODEV; 1633 if (obj->ops->pwrite) 1634 ret = obj->ops->pwrite(obj, args); 1635 if (ret != -ENODEV) 1636 goto err; 1637 1638 ret = i915_gem_object_wait(obj, 1639 I915_WAIT_INTERRUPTIBLE | 1640 I915_WAIT_ALL, 1641 MAX_SCHEDULE_TIMEOUT, 1642 to_rps_client(file)); 1643 if (ret) 1644 goto err; 1645 1646 ret = i915_gem_object_pin_pages(obj); 1647 if (ret) 1648 goto err; 1649 1650 ret = -EFAULT; 1651 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1652 * it would end up going through the fenced access, and we'll get 1653 * different detiling behavior between reading and writing. 1654 * pread/pwrite currently are reading and writing from the CPU 1655 * perspective, requiring manual detiling by the client. 1656 */ 1657 if (!i915_gem_object_has_struct_page(obj) || 1658 cpu_write_needs_clflush(obj)) 1659 /* Note that the gtt paths might fail with non-page-backed user 1660 * pointers (e.g. gtt mappings when moving data between 1661 * textures). Fallback to the shmem path in that case. 1662 */ 1663 ret = i915_gem_gtt_pwrite_fast(obj, args); 1664 1665 if (ret == -EFAULT || ret == -ENOSPC) { 1666 if (obj->phys_handle) 1667 ret = i915_gem_phys_pwrite(obj, args, file); 1668 else 1669 ret = i915_gem_shmem_pwrite(obj, args); 1670 } 1671 1672 i915_gem_object_unpin_pages(obj); 1673 err: 1674 i915_gem_object_put(obj); 1675 return ret; 1676 } 1677 1678 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1679 { 1680 struct drm_i915_private *i915; 1681 struct list_head *list; 1682 struct i915_vma *vma; 1683 1684 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1685 1686 for_each_ggtt_vma(vma, obj) { 1687 if (i915_vma_is_active(vma)) 1688 continue; 1689 1690 if (!drm_mm_node_allocated(&vma->node)) 1691 continue; 1692 1693 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1694 } 1695 1696 i915 = to_i915(obj->base.dev); 1697 spin_lock(&i915->mm.obj_lock); 1698 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1699 list_move_tail(&obj->mm.link, list); 1700 spin_unlock(&i915->mm.obj_lock); 1701 } 1702 1703 /** 1704 * Called when user space prepares to use an object with the CPU, either 1705 * through the mmap ioctl's mapping or a GTT mapping. 1706 * @dev: drm device 1707 * @data: ioctl data blob 1708 * @file: drm file 1709 */ 1710 int 1711 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1712 struct drm_file *file) 1713 { 1714 struct drm_i915_gem_set_domain *args = data; 1715 struct drm_i915_gem_object *obj; 1716 uint32_t read_domains = args->read_domains; 1717 uint32_t write_domain = args->write_domain; 1718 int err; 1719 1720 /* Only handle setting domains to types used by the CPU. */ 1721 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1722 return -EINVAL; 1723 1724 /* Having something in the write domain implies it's in the read 1725 * domain, and only that read domain. Enforce that in the request. 1726 */ 1727 if (write_domain != 0 && read_domains != write_domain) 1728 return -EINVAL; 1729 1730 obj = i915_gem_object_lookup(file, args->handle); 1731 if (!obj) 1732 return -ENOENT; 1733 1734 /* Try to flush the object off the GPU without holding the lock. 1735 * We will repeat the flush holding the lock in the normal manner 1736 * to catch cases where we are gazumped. 1737 */ 1738 err = i915_gem_object_wait(obj, 1739 I915_WAIT_INTERRUPTIBLE | 1740 I915_WAIT_PRIORITY | 1741 (write_domain ? I915_WAIT_ALL : 0), 1742 MAX_SCHEDULE_TIMEOUT, 1743 to_rps_client(file)); 1744 if (err) 1745 goto out; 1746 1747 /* 1748 * Proxy objects do not control access to the backing storage, ergo 1749 * they cannot be used as a means to manipulate the cache domain 1750 * tracking for that backing storage. The proxy object is always 1751 * considered to be outside of any cache domain. 1752 */ 1753 if (i915_gem_object_is_proxy(obj)) { 1754 err = -ENXIO; 1755 goto out; 1756 } 1757 1758 /* 1759 * Flush and acquire obj->pages so that we are coherent through 1760 * direct access in memory with previous cached writes through 1761 * shmemfs and that our cache domain tracking remains valid. 1762 * For example, if the obj->filp was moved to swap without us 1763 * being notified and releasing the pages, we would mistakenly 1764 * continue to assume that the obj remained out of the CPU cached 1765 * domain. 1766 */ 1767 err = i915_gem_object_pin_pages(obj); 1768 if (err) 1769 goto out; 1770 1771 err = i915_mutex_lock_interruptible(dev); 1772 if (err) 1773 goto out_unpin; 1774 1775 if (read_domains & I915_GEM_DOMAIN_WC) 1776 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1777 else if (read_domains & I915_GEM_DOMAIN_GTT) 1778 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1779 else 1780 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1781 1782 /* And bump the LRU for this access */ 1783 i915_gem_object_bump_inactive_ggtt(obj); 1784 1785 mutex_unlock(&dev->struct_mutex); 1786 1787 if (write_domain != 0) 1788 intel_fb_obj_invalidate(obj, 1789 fb_write_origin(obj, write_domain)); 1790 1791 out_unpin: 1792 i915_gem_object_unpin_pages(obj); 1793 out: 1794 i915_gem_object_put(obj); 1795 return err; 1796 } 1797 1798 /** 1799 * Called when user space has done writes to this buffer 1800 * @dev: drm device 1801 * @data: ioctl data blob 1802 * @file: drm file 1803 */ 1804 int 1805 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1806 struct drm_file *file) 1807 { 1808 struct drm_i915_gem_sw_finish *args = data; 1809 struct drm_i915_gem_object *obj; 1810 1811 obj = i915_gem_object_lookup(file, args->handle); 1812 if (!obj) 1813 return -ENOENT; 1814 1815 /* 1816 * Proxy objects are barred from CPU access, so there is no 1817 * need to ban sw_finish as it is a nop. 1818 */ 1819 1820 /* Pinned buffers may be scanout, so flush the cache */ 1821 i915_gem_object_flush_if_display(obj); 1822 i915_gem_object_put(obj); 1823 1824 return 0; 1825 } 1826 1827 static inline bool 1828 __vma_matches(struct vm_area_struct *vma, struct file *filp, 1829 unsigned long addr, unsigned long size) 1830 { 1831 if (vma->vm_file != filp) 1832 return false; 1833 1834 return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; 1835 } 1836 1837 /** 1838 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1839 * it is mapped to. 1840 * @dev: drm device 1841 * @data: ioctl data blob 1842 * @file: drm file 1843 * 1844 * While the mapping holds a reference on the contents of the object, it doesn't 1845 * imply a ref on the object itself. 1846 * 1847 * IMPORTANT: 1848 * 1849 * DRM driver writers who look a this function as an example for how to do GEM 1850 * mmap support, please don't implement mmap support like here. The modern way 1851 * to implement DRM mmap support is with an mmap offset ioctl (like 1852 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1853 * That way debug tooling like valgrind will understand what's going on, hiding 1854 * the mmap call in a driver private ioctl will break that. The i915 driver only 1855 * does cpu mmaps this way because we didn't know better. 1856 */ 1857 int 1858 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1859 struct drm_file *file) 1860 { 1861 struct drm_i915_gem_mmap *args = data; 1862 struct drm_i915_gem_object *obj; 1863 unsigned long addr; 1864 1865 if (args->flags & ~(I915_MMAP_WC)) 1866 return -EINVAL; 1867 1868 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1869 return -ENODEV; 1870 1871 obj = i915_gem_object_lookup(file, args->handle); 1872 if (!obj) 1873 return -ENOENT; 1874 1875 /* prime objects have no backing filp to GEM mmap 1876 * pages from. 1877 */ 1878 if (!obj->base.filp) { 1879 i915_gem_object_put(obj); 1880 return -ENXIO; 1881 } 1882 1883 addr = vm_mmap(obj->base.filp, 0, args->size, 1884 PROT_READ | PROT_WRITE, MAP_SHARED, 1885 args->offset); 1886 if (args->flags & I915_MMAP_WC) { 1887 struct mm_struct *mm = current->mm; 1888 struct vm_area_struct *vma; 1889 1890 if (down_write_killable(&mm->mmap_sem)) { 1891 i915_gem_object_put(obj); 1892 return -EINTR; 1893 } 1894 vma = find_vma(mm, addr); 1895 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) 1896 vma->vm_page_prot = 1897 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1898 else 1899 addr = -ENOMEM; 1900 up_write(&mm->mmap_sem); 1901 1902 /* This may race, but that's ok, it only gets set */ 1903 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1904 } 1905 i915_gem_object_put(obj); 1906 if (IS_ERR((void *)addr)) 1907 return addr; 1908 1909 args->addr_ptr = (uint64_t) addr; 1910 1911 return 0; 1912 } 1913 1914 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1915 { 1916 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1917 } 1918 1919 /** 1920 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1921 * 1922 * A history of the GTT mmap interface: 1923 * 1924 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1925 * aligned and suitable for fencing, and still fit into the available 1926 * mappable space left by the pinned display objects. A classic problem 1927 * we called the page-fault-of-doom where we would ping-pong between 1928 * two objects that could not fit inside the GTT and so the memcpy 1929 * would page one object in at the expense of the other between every 1930 * single byte. 1931 * 1932 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1933 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1934 * object is too large for the available space (or simply too large 1935 * for the mappable aperture!), a view is created instead and faulted 1936 * into userspace. (This view is aligned and sized appropriately for 1937 * fenced access.) 1938 * 1939 * 2 - Recognise WC as a separate cache domain so that we can flush the 1940 * delayed writes via GTT before performing direct access via WC. 1941 * 1942 * Restrictions: 1943 * 1944 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1945 * hangs on some architectures, corruption on others. An attempt to service 1946 * a GTT page fault from a snoopable object will generate a SIGBUS. 1947 * 1948 * * the object must be able to fit into RAM (physical memory, though no 1949 * limited to the mappable aperture). 1950 * 1951 * 1952 * Caveats: 1953 * 1954 * * a new GTT page fault will synchronize rendering from the GPU and flush 1955 * all data to system memory. Subsequent access will not be synchronized. 1956 * 1957 * * all mappings are revoked on runtime device suspend. 1958 * 1959 * * there are only 8, 16 or 32 fence registers to share between all users 1960 * (older machines require fence register for display and blitter access 1961 * as well). Contention of the fence registers will cause the previous users 1962 * to be unmapped and any new access will generate new page faults. 1963 * 1964 * * running out of memory while servicing a fault may generate a SIGBUS, 1965 * rather than the expected SIGSEGV. 1966 */ 1967 int i915_gem_mmap_gtt_version(void) 1968 { 1969 return 2; 1970 } 1971 1972 static inline struct i915_ggtt_view 1973 compute_partial_view(const struct drm_i915_gem_object *obj, 1974 pgoff_t page_offset, 1975 unsigned int chunk) 1976 { 1977 struct i915_ggtt_view view; 1978 1979 if (i915_gem_object_is_tiled(obj)) 1980 chunk = roundup(chunk, tile_row_pages(obj)); 1981 1982 view.type = I915_GGTT_VIEW_PARTIAL; 1983 view.partial.offset = rounddown(page_offset, chunk); 1984 view.partial.size = 1985 min_t(unsigned int, chunk, 1986 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1987 1988 /* If the partial covers the entire object, just create a normal VMA. */ 1989 if (chunk >= obj->base.size >> PAGE_SHIFT) 1990 view.type = I915_GGTT_VIEW_NORMAL; 1991 1992 return view; 1993 } 1994 1995 /** 1996 * i915_gem_fault - fault a page into the GTT 1997 * @vmf: fault info 1998 * 1999 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 2000 * from userspace. The fault handler takes care of binding the object to 2001 * the GTT (if needed), allocating and programming a fence register (again, 2002 * only if needed based on whether the old reg is still valid or the object 2003 * is tiled) and inserting a new PTE into the faulting process. 2004 * 2005 * Note that the faulting process may involve evicting existing objects 2006 * from the GTT and/or fence registers to make room. So performance may 2007 * suffer if the GTT working set is large or there are few fence registers 2008 * left. 2009 * 2010 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2011 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2012 */ 2013 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2014 { 2015 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2016 struct vm_area_struct *area = vmf->vma; 2017 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2018 struct drm_device *dev = obj->base.dev; 2019 struct drm_i915_private *dev_priv = to_i915(dev); 2020 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2021 bool write = area->vm_flags & VM_WRITE; 2022 struct i915_vma *vma; 2023 pgoff_t page_offset; 2024 int ret; 2025 2026 /* Sanity check that we allow writing into this object */ 2027 if (i915_gem_object_is_readonly(obj) && write) 2028 return VM_FAULT_SIGBUS; 2029 2030 /* We don't use vmf->pgoff since that has the fake offset */ 2031 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2032 2033 trace_i915_gem_object_fault(obj, page_offset, true, write); 2034 2035 /* Try to flush the object off the GPU first without holding the lock. 2036 * Upon acquiring the lock, we will perform our sanity checks and then 2037 * repeat the flush holding the lock in the normal manner to catch cases 2038 * where we are gazumped. 2039 */ 2040 ret = i915_gem_object_wait(obj, 2041 I915_WAIT_INTERRUPTIBLE, 2042 MAX_SCHEDULE_TIMEOUT, 2043 NULL); 2044 if (ret) 2045 goto err; 2046 2047 ret = i915_gem_object_pin_pages(obj); 2048 if (ret) 2049 goto err; 2050 2051 intel_runtime_pm_get(dev_priv); 2052 2053 ret = i915_mutex_lock_interruptible(dev); 2054 if (ret) 2055 goto err_rpm; 2056 2057 /* Access to snoopable pages through the GTT is incoherent. */ 2058 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2059 ret = -EFAULT; 2060 goto err_unlock; 2061 } 2062 2063 2064 /* Now pin it into the GTT as needed */ 2065 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2066 PIN_MAPPABLE | 2067 PIN_NONBLOCK | 2068 PIN_NONFAULT); 2069 if (IS_ERR(vma)) { 2070 /* Use a partial view if it is bigger than available space */ 2071 struct i915_ggtt_view view = 2072 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2073 unsigned int flags; 2074 2075 flags = PIN_MAPPABLE; 2076 if (view.type == I915_GGTT_VIEW_NORMAL) 2077 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2078 2079 /* 2080 * Userspace is now writing through an untracked VMA, abandon 2081 * all hope that the hardware is able to track future writes. 2082 */ 2083 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2084 2085 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2086 if (IS_ERR(vma) && !view.type) { 2087 flags = PIN_MAPPABLE; 2088 view.type = I915_GGTT_VIEW_PARTIAL; 2089 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2090 } 2091 } 2092 if (IS_ERR(vma)) { 2093 ret = PTR_ERR(vma); 2094 goto err_unlock; 2095 } 2096 2097 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2098 if (ret) 2099 goto err_unpin; 2100 2101 ret = i915_vma_pin_fence(vma); 2102 if (ret) 2103 goto err_unpin; 2104 2105 /* Finally, remap it using the new GTT offset */ 2106 ret = remap_io_mapping(area, 2107 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2108 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2109 min_t(u64, vma->size, area->vm_end - area->vm_start), 2110 &ggtt->iomap); 2111 if (ret) 2112 goto err_fence; 2113 2114 /* Mark as being mmapped into userspace for later revocation */ 2115 assert_rpm_wakelock_held(dev_priv); 2116 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2117 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2118 GEM_BUG_ON(!obj->userfault_count); 2119 2120 i915_vma_set_ggtt_write(vma); 2121 2122 err_fence: 2123 i915_vma_unpin_fence(vma); 2124 err_unpin: 2125 __i915_vma_unpin(vma); 2126 err_unlock: 2127 mutex_unlock(&dev->struct_mutex); 2128 err_rpm: 2129 intel_runtime_pm_put(dev_priv); 2130 i915_gem_object_unpin_pages(obj); 2131 err: 2132 switch (ret) { 2133 case -EIO: 2134 /* 2135 * We eat errors when the gpu is terminally wedged to avoid 2136 * userspace unduly crashing (gl has no provisions for mmaps to 2137 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2138 * and so needs to be reported. 2139 */ 2140 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2141 return VM_FAULT_SIGBUS; 2142 /* else: fall through */ 2143 case -EAGAIN: 2144 /* 2145 * EAGAIN means the gpu is hung and we'll wait for the error 2146 * handler to reset everything when re-faulting in 2147 * i915_mutex_lock_interruptible. 2148 */ 2149 case 0: 2150 case -ERESTARTSYS: 2151 case -EINTR: 2152 case -EBUSY: 2153 /* 2154 * EBUSY is ok: this just means that another thread 2155 * already did the job. 2156 */ 2157 return VM_FAULT_NOPAGE; 2158 case -ENOMEM: 2159 return VM_FAULT_OOM; 2160 case -ENOSPC: 2161 case -EFAULT: 2162 return VM_FAULT_SIGBUS; 2163 default: 2164 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2165 return VM_FAULT_SIGBUS; 2166 } 2167 } 2168 2169 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2170 { 2171 struct i915_vma *vma; 2172 2173 GEM_BUG_ON(!obj->userfault_count); 2174 2175 obj->userfault_count = 0; 2176 list_del(&obj->userfault_link); 2177 drm_vma_node_unmap(&obj->base.vma_node, 2178 obj->base.dev->anon_inode->i_mapping); 2179 2180 for_each_ggtt_vma(vma, obj) 2181 i915_vma_unset_userfault(vma); 2182 } 2183 2184 /** 2185 * i915_gem_release_mmap - remove physical page mappings 2186 * @obj: obj in question 2187 * 2188 * Preserve the reservation of the mmapping with the DRM core code, but 2189 * relinquish ownership of the pages back to the system. 2190 * 2191 * It is vital that we remove the page mapping if we have mapped a tiled 2192 * object through the GTT and then lose the fence register due to 2193 * resource pressure. Similarly if the object has been moved out of the 2194 * aperture, than pages mapped into userspace must be revoked. Removing the 2195 * mapping will then trigger a page fault on the next user access, allowing 2196 * fixup by i915_gem_fault(). 2197 */ 2198 void 2199 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2200 { 2201 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2202 2203 /* Serialisation between user GTT access and our code depends upon 2204 * revoking the CPU's PTE whilst the mutex is held. The next user 2205 * pagefault then has to wait until we release the mutex. 2206 * 2207 * Note that RPM complicates somewhat by adding an additional 2208 * requirement that operations to the GGTT be made holding the RPM 2209 * wakeref. 2210 */ 2211 lockdep_assert_held(&i915->drm.struct_mutex); 2212 intel_runtime_pm_get(i915); 2213 2214 if (!obj->userfault_count) 2215 goto out; 2216 2217 __i915_gem_object_release_mmap(obj); 2218 2219 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2220 * memory transactions from userspace before we return. The TLB 2221 * flushing implied above by changing the PTE above *should* be 2222 * sufficient, an extra barrier here just provides us with a bit 2223 * of paranoid documentation about our requirement to serialise 2224 * memory writes before touching registers / GSM. 2225 */ 2226 wmb(); 2227 2228 out: 2229 intel_runtime_pm_put(i915); 2230 } 2231 2232 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2233 { 2234 struct drm_i915_gem_object *obj, *on; 2235 int i; 2236 2237 /* 2238 * Only called during RPM suspend. All users of the userfault_list 2239 * must be holding an RPM wakeref to ensure that this can not 2240 * run concurrently with themselves (and use the struct_mutex for 2241 * protection between themselves). 2242 */ 2243 2244 list_for_each_entry_safe(obj, on, 2245 &dev_priv->mm.userfault_list, userfault_link) 2246 __i915_gem_object_release_mmap(obj); 2247 2248 /* The fence will be lost when the device powers down. If any were 2249 * in use by hardware (i.e. they are pinned), we should not be powering 2250 * down! All other fences will be reacquired by the user upon waking. 2251 */ 2252 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2253 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2254 2255 /* Ideally we want to assert that the fence register is not 2256 * live at this point (i.e. that no piece of code will be 2257 * trying to write through fence + GTT, as that both violates 2258 * our tracking of activity and associated locking/barriers, 2259 * but also is illegal given that the hw is powered down). 2260 * 2261 * Previously we used reg->pin_count as a "liveness" indicator. 2262 * That is not sufficient, and we need a more fine-grained 2263 * tool if we want to have a sanity check here. 2264 */ 2265 2266 if (!reg->vma) 2267 continue; 2268 2269 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2270 reg->dirty = true; 2271 } 2272 } 2273 2274 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2275 { 2276 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2277 int err; 2278 2279 err = drm_gem_create_mmap_offset(&obj->base); 2280 if (likely(!err)) 2281 return 0; 2282 2283 /* Attempt to reap some mmap space from dead objects */ 2284 do { 2285 err = i915_gem_wait_for_idle(dev_priv, 2286 I915_WAIT_INTERRUPTIBLE, 2287 MAX_SCHEDULE_TIMEOUT); 2288 if (err) 2289 break; 2290 2291 i915_gem_drain_freed_objects(dev_priv); 2292 err = drm_gem_create_mmap_offset(&obj->base); 2293 if (!err) 2294 break; 2295 2296 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2297 2298 return err; 2299 } 2300 2301 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2302 { 2303 drm_gem_free_mmap_offset(&obj->base); 2304 } 2305 2306 int 2307 i915_gem_mmap_gtt(struct drm_file *file, 2308 struct drm_device *dev, 2309 uint32_t handle, 2310 uint64_t *offset) 2311 { 2312 struct drm_i915_gem_object *obj; 2313 int ret; 2314 2315 obj = i915_gem_object_lookup(file, handle); 2316 if (!obj) 2317 return -ENOENT; 2318 2319 ret = i915_gem_object_create_mmap_offset(obj); 2320 if (ret == 0) 2321 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2322 2323 i915_gem_object_put(obj); 2324 return ret; 2325 } 2326 2327 /** 2328 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2329 * @dev: DRM device 2330 * @data: GTT mapping ioctl data 2331 * @file: GEM object info 2332 * 2333 * Simply returns the fake offset to userspace so it can mmap it. 2334 * The mmap call will end up in drm_gem_mmap(), which will set things 2335 * up so we can get faults in the handler above. 2336 * 2337 * The fault handler will take care of binding the object into the GTT 2338 * (since it may have been evicted to make room for something), allocating 2339 * a fence register, and mapping the appropriate aperture address into 2340 * userspace. 2341 */ 2342 int 2343 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2344 struct drm_file *file) 2345 { 2346 struct drm_i915_gem_mmap_gtt *args = data; 2347 2348 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2349 } 2350 2351 /* Immediately discard the backing storage */ 2352 static void 2353 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2354 { 2355 i915_gem_object_free_mmap_offset(obj); 2356 2357 if (obj->base.filp == NULL) 2358 return; 2359 2360 /* Our goal here is to return as much of the memory as 2361 * is possible back to the system as we are called from OOM. 2362 * To do this we must instruct the shmfs to drop all of its 2363 * backing pages, *now*. 2364 */ 2365 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2366 obj->mm.madv = __I915_MADV_PURGED; 2367 obj->mm.pages = ERR_PTR(-EFAULT); 2368 } 2369 2370 /* Try to discard unwanted pages */ 2371 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2372 { 2373 struct address_space *mapping; 2374 2375 lockdep_assert_held(&obj->mm.lock); 2376 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2377 2378 switch (obj->mm.madv) { 2379 case I915_MADV_DONTNEED: 2380 i915_gem_object_truncate(obj); 2381 case __I915_MADV_PURGED: 2382 return; 2383 } 2384 2385 if (obj->base.filp == NULL) 2386 return; 2387 2388 mapping = obj->base.filp->f_mapping, 2389 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2390 } 2391 2392 /* 2393 * Move pages to appropriate lru and release the pagevec, decrementing the 2394 * ref count of those pages. 2395 */ 2396 static void check_release_pagevec(struct pagevec *pvec) 2397 { 2398 check_move_unevictable_pages(pvec); 2399 __pagevec_release(pvec); 2400 cond_resched(); 2401 } 2402 2403 static void 2404 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2405 struct sg_table *pages) 2406 { 2407 struct sgt_iter sgt_iter; 2408 struct pagevec pvec; 2409 struct page *page; 2410 2411 __i915_gem_object_release_shmem(obj, pages, true); 2412 2413 i915_gem_gtt_finish_pages(obj, pages); 2414 2415 if (i915_gem_object_needs_bit17_swizzle(obj)) 2416 i915_gem_object_save_bit_17_swizzle(obj, pages); 2417 2418 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2419 2420 pagevec_init(&pvec); 2421 for_each_sgt_page(page, sgt_iter, pages) { 2422 if (obj->mm.dirty) 2423 set_page_dirty(page); 2424 2425 if (obj->mm.madv == I915_MADV_WILLNEED) 2426 mark_page_accessed(page); 2427 2428 if (!pagevec_add(&pvec, page)) 2429 check_release_pagevec(&pvec); 2430 } 2431 if (pagevec_count(&pvec)) 2432 check_release_pagevec(&pvec); 2433 obj->mm.dirty = false; 2434 2435 sg_free_table(pages); 2436 kfree(pages); 2437 } 2438 2439 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2440 { 2441 struct radix_tree_iter iter; 2442 void __rcu **slot; 2443 2444 rcu_read_lock(); 2445 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2446 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2447 rcu_read_unlock(); 2448 } 2449 2450 static struct sg_table * 2451 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2452 { 2453 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2454 struct sg_table *pages; 2455 2456 pages = fetch_and_zero(&obj->mm.pages); 2457 if (!pages) 2458 return NULL; 2459 2460 spin_lock(&i915->mm.obj_lock); 2461 list_del(&obj->mm.link); 2462 spin_unlock(&i915->mm.obj_lock); 2463 2464 if (obj->mm.mapping) { 2465 void *ptr; 2466 2467 ptr = page_mask_bits(obj->mm.mapping); 2468 if (is_vmalloc_addr(ptr)) 2469 vunmap(ptr); 2470 else 2471 kunmap(kmap_to_page(ptr)); 2472 2473 obj->mm.mapping = NULL; 2474 } 2475 2476 __i915_gem_object_reset_page_iter(obj); 2477 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2478 2479 return pages; 2480 } 2481 2482 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2483 enum i915_mm_subclass subclass) 2484 { 2485 struct sg_table *pages; 2486 2487 if (i915_gem_object_has_pinned_pages(obj)) 2488 return; 2489 2490 GEM_BUG_ON(obj->bind_count); 2491 if (!i915_gem_object_has_pages(obj)) 2492 return; 2493 2494 /* May be called by shrinker from within get_pages() (on another bo) */ 2495 mutex_lock_nested(&obj->mm.lock, subclass); 2496 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2497 goto unlock; 2498 2499 /* 2500 * ->put_pages might need to allocate memory for the bit17 swizzle 2501 * array, hence protect them from being reaped by removing them from gtt 2502 * lists early. 2503 */ 2504 pages = __i915_gem_object_unset_pages(obj); 2505 if (!IS_ERR(pages)) 2506 obj->ops->put_pages(obj, pages); 2507 2508 unlock: 2509 mutex_unlock(&obj->mm.lock); 2510 } 2511 2512 bool i915_sg_trim(struct sg_table *orig_st) 2513 { 2514 struct sg_table new_st; 2515 struct scatterlist *sg, *new_sg; 2516 unsigned int i; 2517 2518 if (orig_st->nents == orig_st->orig_nents) 2519 return false; 2520 2521 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2522 return false; 2523 2524 new_sg = new_st.sgl; 2525 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2526 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2527 sg_dma_address(new_sg) = sg_dma_address(sg); 2528 sg_dma_len(new_sg) = sg_dma_len(sg); 2529 2530 new_sg = sg_next(new_sg); 2531 } 2532 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2533 2534 sg_free_table(orig_st); 2535 2536 *orig_st = new_st; 2537 return true; 2538 } 2539 2540 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2541 { 2542 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2543 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2544 unsigned long i; 2545 struct address_space *mapping; 2546 struct sg_table *st; 2547 struct scatterlist *sg; 2548 struct sgt_iter sgt_iter; 2549 struct page *page; 2550 unsigned long last_pfn = 0; /* suppress gcc warning */ 2551 unsigned int max_segment = i915_sg_segment_size(); 2552 unsigned int sg_page_sizes; 2553 struct pagevec pvec; 2554 gfp_t noreclaim; 2555 int ret; 2556 2557 /* 2558 * Assert that the object is not currently in any GPU domain. As it 2559 * wasn't in the GTT, there shouldn't be any way it could have been in 2560 * a GPU cache 2561 */ 2562 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2563 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2564 2565 /* 2566 * If there's no chance of allocating enough pages for the whole 2567 * object, bail early. 2568 */ 2569 if (page_count > totalram_pages()) 2570 return -ENOMEM; 2571 2572 st = kmalloc(sizeof(*st), GFP_KERNEL); 2573 if (st == NULL) 2574 return -ENOMEM; 2575 2576 rebuild_st: 2577 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2578 kfree(st); 2579 return -ENOMEM; 2580 } 2581 2582 /* 2583 * Get the list of pages out of our struct file. They'll be pinned 2584 * at this point until we release them. 2585 * 2586 * Fail silently without starting the shrinker 2587 */ 2588 mapping = obj->base.filp->f_mapping; 2589 mapping_set_unevictable(mapping); 2590 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2591 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2592 2593 sg = st->sgl; 2594 st->nents = 0; 2595 sg_page_sizes = 0; 2596 for (i = 0; i < page_count; i++) { 2597 const unsigned int shrink[] = { 2598 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2599 0, 2600 }, *s = shrink; 2601 gfp_t gfp = noreclaim; 2602 2603 do { 2604 cond_resched(); 2605 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2606 if (likely(!IS_ERR(page))) 2607 break; 2608 2609 if (!*s) { 2610 ret = PTR_ERR(page); 2611 goto err_sg; 2612 } 2613 2614 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2615 2616 /* 2617 * We've tried hard to allocate the memory by reaping 2618 * our own buffer, now let the real VM do its job and 2619 * go down in flames if truly OOM. 2620 * 2621 * However, since graphics tend to be disposable, 2622 * defer the oom here by reporting the ENOMEM back 2623 * to userspace. 2624 */ 2625 if (!*s) { 2626 /* reclaim and warn, but no oom */ 2627 gfp = mapping_gfp_mask(mapping); 2628 2629 /* 2630 * Our bo are always dirty and so we require 2631 * kswapd to reclaim our pages (direct reclaim 2632 * does not effectively begin pageout of our 2633 * buffers on its own). However, direct reclaim 2634 * only waits for kswapd when under allocation 2635 * congestion. So as a result __GFP_RECLAIM is 2636 * unreliable and fails to actually reclaim our 2637 * dirty pages -- unless you try over and over 2638 * again with !__GFP_NORETRY. However, we still 2639 * want to fail this allocation rather than 2640 * trigger the out-of-memory killer and for 2641 * this we want __GFP_RETRY_MAYFAIL. 2642 */ 2643 gfp |= __GFP_RETRY_MAYFAIL; 2644 } 2645 } while (1); 2646 2647 if (!i || 2648 sg->length >= max_segment || 2649 page_to_pfn(page) != last_pfn + 1) { 2650 if (i) { 2651 sg_page_sizes |= sg->length; 2652 sg = sg_next(sg); 2653 } 2654 st->nents++; 2655 sg_set_page(sg, page, PAGE_SIZE, 0); 2656 } else { 2657 sg->length += PAGE_SIZE; 2658 } 2659 last_pfn = page_to_pfn(page); 2660 2661 /* Check that the i965g/gm workaround works. */ 2662 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2663 } 2664 if (sg) { /* loop terminated early; short sg table */ 2665 sg_page_sizes |= sg->length; 2666 sg_mark_end(sg); 2667 } 2668 2669 /* Trim unused sg entries to avoid wasting memory. */ 2670 i915_sg_trim(st); 2671 2672 ret = i915_gem_gtt_prepare_pages(obj, st); 2673 if (ret) { 2674 /* 2675 * DMA remapping failed? One possible cause is that 2676 * it could not reserve enough large entries, asking 2677 * for PAGE_SIZE chunks instead may be helpful. 2678 */ 2679 if (max_segment > PAGE_SIZE) { 2680 for_each_sgt_page(page, sgt_iter, st) 2681 put_page(page); 2682 sg_free_table(st); 2683 2684 max_segment = PAGE_SIZE; 2685 goto rebuild_st; 2686 } else { 2687 dev_warn(&dev_priv->drm.pdev->dev, 2688 "Failed to DMA remap %lu pages\n", 2689 page_count); 2690 goto err_pages; 2691 } 2692 } 2693 2694 if (i915_gem_object_needs_bit17_swizzle(obj)) 2695 i915_gem_object_do_bit_17_swizzle(obj, st); 2696 2697 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2698 2699 return 0; 2700 2701 err_sg: 2702 sg_mark_end(sg); 2703 err_pages: 2704 mapping_clear_unevictable(mapping); 2705 pagevec_init(&pvec); 2706 for_each_sgt_page(page, sgt_iter, st) { 2707 if (!pagevec_add(&pvec, page)) 2708 check_release_pagevec(&pvec); 2709 } 2710 if (pagevec_count(&pvec)) 2711 check_release_pagevec(&pvec); 2712 sg_free_table(st); 2713 kfree(st); 2714 2715 /* 2716 * shmemfs first checks if there is enough memory to allocate the page 2717 * and reports ENOSPC should there be insufficient, along with the usual 2718 * ENOMEM for a genuine allocation failure. 2719 * 2720 * We use ENOSPC in our driver to mean that we have run out of aperture 2721 * space and so want to translate the error from shmemfs back to our 2722 * usual understanding of ENOMEM. 2723 */ 2724 if (ret == -ENOSPC) 2725 ret = -ENOMEM; 2726 2727 return ret; 2728 } 2729 2730 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2731 struct sg_table *pages, 2732 unsigned int sg_page_sizes) 2733 { 2734 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2735 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2736 int i; 2737 2738 lockdep_assert_held(&obj->mm.lock); 2739 2740 obj->mm.get_page.sg_pos = pages->sgl; 2741 obj->mm.get_page.sg_idx = 0; 2742 2743 obj->mm.pages = pages; 2744 2745 if (i915_gem_object_is_tiled(obj) && 2746 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2747 GEM_BUG_ON(obj->mm.quirked); 2748 __i915_gem_object_pin_pages(obj); 2749 obj->mm.quirked = true; 2750 } 2751 2752 GEM_BUG_ON(!sg_page_sizes); 2753 obj->mm.page_sizes.phys = sg_page_sizes; 2754 2755 /* 2756 * Calculate the supported page-sizes which fit into the given 2757 * sg_page_sizes. This will give us the page-sizes which we may be able 2758 * to use opportunistically when later inserting into the GTT. For 2759 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2760 * 64K or 4K pages, although in practice this will depend on a number of 2761 * other factors. 2762 */ 2763 obj->mm.page_sizes.sg = 0; 2764 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2765 if (obj->mm.page_sizes.phys & ~0u << i) 2766 obj->mm.page_sizes.sg |= BIT(i); 2767 } 2768 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2769 2770 spin_lock(&i915->mm.obj_lock); 2771 list_add(&obj->mm.link, &i915->mm.unbound_list); 2772 spin_unlock(&i915->mm.obj_lock); 2773 } 2774 2775 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2776 { 2777 int err; 2778 2779 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2780 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2781 return -EFAULT; 2782 } 2783 2784 err = obj->ops->get_pages(obj); 2785 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2786 2787 return err; 2788 } 2789 2790 /* Ensure that the associated pages are gathered from the backing storage 2791 * and pinned into our object. i915_gem_object_pin_pages() may be called 2792 * multiple times before they are released by a single call to 2793 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2794 * either as a result of memory pressure (reaping pages under the shrinker) 2795 * or as the object is itself released. 2796 */ 2797 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2798 { 2799 int err; 2800 2801 err = mutex_lock_interruptible(&obj->mm.lock); 2802 if (err) 2803 return err; 2804 2805 if (unlikely(!i915_gem_object_has_pages(obj))) { 2806 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2807 2808 err = ____i915_gem_object_get_pages(obj); 2809 if (err) 2810 goto unlock; 2811 2812 smp_mb__before_atomic(); 2813 } 2814 atomic_inc(&obj->mm.pages_pin_count); 2815 2816 unlock: 2817 mutex_unlock(&obj->mm.lock); 2818 return err; 2819 } 2820 2821 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2822 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2823 enum i915_map_type type) 2824 { 2825 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2826 struct sg_table *sgt = obj->mm.pages; 2827 struct sgt_iter sgt_iter; 2828 struct page *page; 2829 struct page *stack_pages[32]; 2830 struct page **pages = stack_pages; 2831 unsigned long i = 0; 2832 pgprot_t pgprot; 2833 void *addr; 2834 2835 /* A single page can always be kmapped */ 2836 if (n_pages == 1 && type == I915_MAP_WB) 2837 return kmap(sg_page(sgt->sgl)); 2838 2839 if (n_pages > ARRAY_SIZE(stack_pages)) { 2840 /* Too big for stack -- allocate temporary array instead */ 2841 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2842 if (!pages) 2843 return NULL; 2844 } 2845 2846 for_each_sgt_page(page, sgt_iter, sgt) 2847 pages[i++] = page; 2848 2849 /* Check that we have the expected number of pages */ 2850 GEM_BUG_ON(i != n_pages); 2851 2852 switch (type) { 2853 default: 2854 MISSING_CASE(type); 2855 /* fallthrough to use PAGE_KERNEL anyway */ 2856 case I915_MAP_WB: 2857 pgprot = PAGE_KERNEL; 2858 break; 2859 case I915_MAP_WC: 2860 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2861 break; 2862 } 2863 addr = vmap(pages, n_pages, 0, pgprot); 2864 2865 if (pages != stack_pages) 2866 kvfree(pages); 2867 2868 return addr; 2869 } 2870 2871 /* get, pin, and map the pages of the object into kernel space */ 2872 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2873 enum i915_map_type type) 2874 { 2875 enum i915_map_type has_type; 2876 bool pinned; 2877 void *ptr; 2878 int ret; 2879 2880 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2881 return ERR_PTR(-ENXIO); 2882 2883 ret = mutex_lock_interruptible(&obj->mm.lock); 2884 if (ret) 2885 return ERR_PTR(ret); 2886 2887 pinned = !(type & I915_MAP_OVERRIDE); 2888 type &= ~I915_MAP_OVERRIDE; 2889 2890 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2891 if (unlikely(!i915_gem_object_has_pages(obj))) { 2892 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2893 2894 ret = ____i915_gem_object_get_pages(obj); 2895 if (ret) 2896 goto err_unlock; 2897 2898 smp_mb__before_atomic(); 2899 } 2900 atomic_inc(&obj->mm.pages_pin_count); 2901 pinned = false; 2902 } 2903 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2904 2905 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2906 if (ptr && has_type != type) { 2907 if (pinned) { 2908 ret = -EBUSY; 2909 goto err_unpin; 2910 } 2911 2912 if (is_vmalloc_addr(ptr)) 2913 vunmap(ptr); 2914 else 2915 kunmap(kmap_to_page(ptr)); 2916 2917 ptr = obj->mm.mapping = NULL; 2918 } 2919 2920 if (!ptr) { 2921 ptr = i915_gem_object_map(obj, type); 2922 if (!ptr) { 2923 ret = -ENOMEM; 2924 goto err_unpin; 2925 } 2926 2927 obj->mm.mapping = page_pack_bits(ptr, type); 2928 } 2929 2930 out_unlock: 2931 mutex_unlock(&obj->mm.lock); 2932 return ptr; 2933 2934 err_unpin: 2935 atomic_dec(&obj->mm.pages_pin_count); 2936 err_unlock: 2937 ptr = ERR_PTR(ret); 2938 goto out_unlock; 2939 } 2940 2941 static int 2942 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2943 const struct drm_i915_gem_pwrite *arg) 2944 { 2945 struct address_space *mapping = obj->base.filp->f_mapping; 2946 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2947 u64 remain, offset; 2948 unsigned int pg; 2949 2950 /* Before we instantiate/pin the backing store for our use, we 2951 * can prepopulate the shmemfs filp efficiently using a write into 2952 * the pagecache. We avoid the penalty of instantiating all the 2953 * pages, important if the user is just writing to a few and never 2954 * uses the object on the GPU, and using a direct write into shmemfs 2955 * allows it to avoid the cost of retrieving a page (either swapin 2956 * or clearing-before-use) before it is overwritten. 2957 */ 2958 if (i915_gem_object_has_pages(obj)) 2959 return -ENODEV; 2960 2961 if (obj->mm.madv != I915_MADV_WILLNEED) 2962 return -EFAULT; 2963 2964 /* Before the pages are instantiated the object is treated as being 2965 * in the CPU domain. The pages will be clflushed as required before 2966 * use, and we can freely write into the pages directly. If userspace 2967 * races pwrite with any other operation; corruption will ensue - 2968 * that is userspace's prerogative! 2969 */ 2970 2971 remain = arg->size; 2972 offset = arg->offset; 2973 pg = offset_in_page(offset); 2974 2975 do { 2976 unsigned int len, unwritten; 2977 struct page *page; 2978 void *data, *vaddr; 2979 int err; 2980 2981 len = PAGE_SIZE - pg; 2982 if (len > remain) 2983 len = remain; 2984 2985 err = pagecache_write_begin(obj->base.filp, mapping, 2986 offset, len, 0, 2987 &page, &data); 2988 if (err < 0) 2989 return err; 2990 2991 vaddr = kmap(page); 2992 unwritten = copy_from_user(vaddr + pg, user_data, len); 2993 kunmap(page); 2994 2995 err = pagecache_write_end(obj->base.filp, mapping, 2996 offset, len, len - unwritten, 2997 page, data); 2998 if (err < 0) 2999 return err; 3000 3001 if (unwritten) 3002 return -EFAULT; 3003 3004 remain -= len; 3005 user_data += len; 3006 offset += len; 3007 pg = 0; 3008 } while (remain); 3009 3010 return 0; 3011 } 3012 3013 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 3014 const struct i915_gem_context *ctx) 3015 { 3016 unsigned int score; 3017 unsigned long prev_hang; 3018 3019 if (i915_gem_context_is_banned(ctx)) 3020 score = I915_CLIENT_SCORE_CONTEXT_BAN; 3021 else 3022 score = 0; 3023 3024 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 3025 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 3026 score += I915_CLIENT_SCORE_HANG_FAST; 3027 3028 if (score) { 3029 atomic_add(score, &file_priv->ban_score); 3030 3031 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 3032 ctx->name, score, 3033 atomic_read(&file_priv->ban_score)); 3034 } 3035 } 3036 3037 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3038 { 3039 unsigned int score; 3040 bool banned, bannable; 3041 3042 atomic_inc(&ctx->guilty_count); 3043 3044 bannable = i915_gem_context_is_bannable(ctx); 3045 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3046 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3047 3048 /* Cool contexts don't accumulate client ban score */ 3049 if (!bannable) 3050 return; 3051 3052 if (banned) { 3053 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3054 ctx->name, atomic_read(&ctx->guilty_count), 3055 score); 3056 i915_gem_context_set_banned(ctx); 3057 } 3058 3059 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3060 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3061 } 3062 3063 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3064 { 3065 atomic_inc(&ctx->active_count); 3066 } 3067 3068 struct i915_request * 3069 i915_gem_find_active_request(struct intel_engine_cs *engine) 3070 { 3071 struct i915_request *request, *active = NULL; 3072 unsigned long flags; 3073 3074 /* 3075 * We are called by the error capture, reset and to dump engine 3076 * state at random points in time. In particular, note that neither is 3077 * crucially ordered with an interrupt. After a hang, the GPU is dead 3078 * and we assume that no more writes can happen (we waited long enough 3079 * for all writes that were in transaction to be flushed) - adding an 3080 * extra delay for a recent interrupt is pointless. Hence, we do 3081 * not need an engine->irq_seqno_barrier() before the seqno reads. 3082 * At all other times, we must assume the GPU is still running, but 3083 * we only care about the snapshot of this moment. 3084 */ 3085 spin_lock_irqsave(&engine->timeline.lock, flags); 3086 list_for_each_entry(request, &engine->timeline.requests, link) { 3087 if (__i915_request_completed(request, request->global_seqno)) 3088 continue; 3089 3090 active = request; 3091 break; 3092 } 3093 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3094 3095 return active; 3096 } 3097 3098 /* 3099 * Ensure irq handler finishes, and not run again. 3100 * Also return the active request so that we only search for it once. 3101 */ 3102 struct i915_request * 3103 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3104 { 3105 struct i915_request *request; 3106 3107 /* 3108 * During the reset sequence, we must prevent the engine from 3109 * entering RC6. As the context state is undefined until we restart 3110 * the engine, if it does enter RC6 during the reset, the state 3111 * written to the powercontext is undefined and so we may lose 3112 * GPU state upon resume, i.e. fail to restart after a reset. 3113 */ 3114 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3115 3116 request = engine->reset.prepare(engine); 3117 if (request && request->fence.error == -EIO) 3118 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3119 3120 return request; 3121 } 3122 3123 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3124 { 3125 struct intel_engine_cs *engine; 3126 struct i915_request *request; 3127 enum intel_engine_id id; 3128 int err = 0; 3129 3130 for_each_engine(engine, dev_priv, id) { 3131 request = i915_gem_reset_prepare_engine(engine); 3132 if (IS_ERR(request)) { 3133 err = PTR_ERR(request); 3134 continue; 3135 } 3136 3137 engine->hangcheck.active_request = request; 3138 } 3139 3140 i915_gem_revoke_fences(dev_priv); 3141 intel_uc_sanitize(dev_priv); 3142 3143 return err; 3144 } 3145 3146 static void engine_skip_context(struct i915_request *request) 3147 { 3148 struct intel_engine_cs *engine = request->engine; 3149 struct i915_gem_context *hung_ctx = request->gem_context; 3150 struct i915_timeline *timeline = request->timeline; 3151 unsigned long flags; 3152 3153 GEM_BUG_ON(timeline == &engine->timeline); 3154 3155 spin_lock_irqsave(&engine->timeline.lock, flags); 3156 spin_lock(&timeline->lock); 3157 3158 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3159 if (request->gem_context == hung_ctx) 3160 i915_request_skip(request, -EIO); 3161 3162 list_for_each_entry(request, &timeline->requests, link) 3163 i915_request_skip(request, -EIO); 3164 3165 spin_unlock(&timeline->lock); 3166 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3167 } 3168 3169 /* Returns the request if it was guilty of the hang */ 3170 static struct i915_request * 3171 i915_gem_reset_request(struct intel_engine_cs *engine, 3172 struct i915_request *request, 3173 bool stalled) 3174 { 3175 /* The guilty request will get skipped on a hung engine. 3176 * 3177 * Users of client default contexts do not rely on logical 3178 * state preserved between batches so it is safe to execute 3179 * queued requests following the hang. Non default contexts 3180 * rely on preserved state, so skipping a batch loses the 3181 * evolution of the state and it needs to be considered corrupted. 3182 * Executing more queued batches on top of corrupted state is 3183 * risky. But we take the risk by trying to advance through 3184 * the queued requests in order to make the client behaviour 3185 * more predictable around resets, by not throwing away random 3186 * amount of batches it has prepared for execution. Sophisticated 3187 * clients can use gem_reset_stats_ioctl and dma fence status 3188 * (exported via sync_file info ioctl on explicit fences) to observe 3189 * when it loses the context state and should rebuild accordingly. 3190 * 3191 * The context ban, and ultimately the client ban, mechanism are safety 3192 * valves if client submission ends up resulting in nothing more than 3193 * subsequent hangs. 3194 */ 3195 3196 if (i915_request_completed(request)) { 3197 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3198 engine->name, request->global_seqno, 3199 request->fence.context, request->fence.seqno, 3200 intel_engine_get_seqno(engine)); 3201 stalled = false; 3202 } 3203 3204 if (stalled) { 3205 i915_gem_context_mark_guilty(request->gem_context); 3206 i915_request_skip(request, -EIO); 3207 3208 /* If this context is now banned, skip all pending requests. */ 3209 if (i915_gem_context_is_banned(request->gem_context)) 3210 engine_skip_context(request); 3211 } else { 3212 /* 3213 * Since this is not the hung engine, it may have advanced 3214 * since the hang declaration. Double check by refinding 3215 * the active request at the time of the reset. 3216 */ 3217 request = i915_gem_find_active_request(engine); 3218 if (request) { 3219 unsigned long flags; 3220 3221 i915_gem_context_mark_innocent(request->gem_context); 3222 dma_fence_set_error(&request->fence, -EAGAIN); 3223 3224 /* Rewind the engine to replay the incomplete rq */ 3225 spin_lock_irqsave(&engine->timeline.lock, flags); 3226 request = list_prev_entry(request, link); 3227 if (&request->link == &engine->timeline.requests) 3228 request = NULL; 3229 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3230 } 3231 } 3232 3233 return request; 3234 } 3235 3236 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3237 struct i915_request *request, 3238 bool stalled) 3239 { 3240 /* 3241 * Make sure this write is visible before we re-enable the interrupt 3242 * handlers on another CPU, as tasklet_enable() resolves to just 3243 * a compiler barrier which is insufficient for our purpose here. 3244 */ 3245 smp_store_mb(engine->irq_posted, 0); 3246 3247 if (request) 3248 request = i915_gem_reset_request(engine, request, stalled); 3249 3250 /* Setup the CS to resume from the breadcrumb of the hung request */ 3251 engine->reset.reset(engine, request); 3252 } 3253 3254 void i915_gem_reset(struct drm_i915_private *dev_priv, 3255 unsigned int stalled_mask) 3256 { 3257 struct intel_engine_cs *engine; 3258 enum intel_engine_id id; 3259 3260 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3261 3262 i915_retire_requests(dev_priv); 3263 3264 for_each_engine(engine, dev_priv, id) { 3265 struct intel_context *ce; 3266 3267 i915_gem_reset_engine(engine, 3268 engine->hangcheck.active_request, 3269 stalled_mask & ENGINE_MASK(id)); 3270 ce = fetch_and_zero(&engine->last_retired_context); 3271 if (ce) 3272 intel_context_unpin(ce); 3273 3274 /* 3275 * Ostensibily, we always want a context loaded for powersaving, 3276 * so if the engine is idle after the reset, send a request 3277 * to load our scratch kernel_context. 3278 * 3279 * More mysteriously, if we leave the engine idle after a reset, 3280 * the next userspace batch may hang, with what appears to be 3281 * an incoherent read by the CS (presumably stale TLB). An 3282 * empty request appears sufficient to paper over the glitch. 3283 */ 3284 if (intel_engine_is_idle(engine)) { 3285 struct i915_request *rq; 3286 3287 rq = i915_request_alloc(engine, 3288 dev_priv->kernel_context); 3289 if (!IS_ERR(rq)) 3290 i915_request_add(rq); 3291 } 3292 } 3293 3294 i915_gem_restore_fences(dev_priv); 3295 } 3296 3297 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3298 { 3299 engine->reset.finish(engine); 3300 3301 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3302 } 3303 3304 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3305 { 3306 struct intel_engine_cs *engine; 3307 enum intel_engine_id id; 3308 3309 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3310 3311 for_each_engine(engine, dev_priv, id) { 3312 engine->hangcheck.active_request = NULL; 3313 i915_gem_reset_finish_engine(engine); 3314 } 3315 } 3316 3317 static void nop_submit_request(struct i915_request *request) 3318 { 3319 unsigned long flags; 3320 3321 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3322 request->engine->name, 3323 request->fence.context, request->fence.seqno); 3324 dma_fence_set_error(&request->fence, -EIO); 3325 3326 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3327 __i915_request_submit(request); 3328 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3329 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3330 } 3331 3332 void i915_gem_set_wedged(struct drm_i915_private *i915) 3333 { 3334 struct intel_engine_cs *engine; 3335 enum intel_engine_id id; 3336 3337 GEM_TRACE("start\n"); 3338 3339 if (GEM_SHOW_DEBUG()) { 3340 struct drm_printer p = drm_debug_printer(__func__); 3341 3342 for_each_engine(engine, i915, id) 3343 intel_engine_dump(engine, &p, "%s\n", engine->name); 3344 } 3345 3346 if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) 3347 goto out; 3348 3349 /* 3350 * First, stop submission to hw, but do not yet complete requests by 3351 * rolling the global seqno forward (since this would complete requests 3352 * for which we haven't set the fence error to EIO yet). 3353 */ 3354 for_each_engine(engine, i915, id) 3355 i915_gem_reset_prepare_engine(engine); 3356 3357 /* Even if the GPU reset fails, it should still stop the engines */ 3358 if (INTEL_GEN(i915) >= 5) 3359 intel_gpu_reset(i915, ALL_ENGINES); 3360 3361 for_each_engine(engine, i915, id) { 3362 engine->submit_request = nop_submit_request; 3363 engine->schedule = NULL; 3364 } 3365 i915->caps.scheduler = 0; 3366 3367 /* 3368 * Make sure no request can slip through without getting completed by 3369 * either this call here to intel_engine_init_global_seqno, or the one 3370 * in nop_submit_request. 3371 */ 3372 synchronize_rcu(); 3373 3374 /* Mark all executing requests as skipped */ 3375 for_each_engine(engine, i915, id) 3376 engine->cancel_requests(engine); 3377 3378 for_each_engine(engine, i915, id) { 3379 i915_gem_reset_finish_engine(engine); 3380 intel_engine_wakeup(engine); 3381 } 3382 3383 out: 3384 GEM_TRACE("end\n"); 3385 3386 wake_up_all(&i915->gpu_error.reset_queue); 3387 } 3388 3389 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3390 { 3391 struct i915_timeline *tl; 3392 3393 lockdep_assert_held(&i915->drm.struct_mutex); 3394 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3395 return true; 3396 3397 GEM_TRACE("start\n"); 3398 3399 /* 3400 * Before unwedging, make sure that all pending operations 3401 * are flushed and errored out - we may have requests waiting upon 3402 * third party fences. We marked all inflight requests as EIO, and 3403 * every execbuf since returned EIO, for consistency we want all 3404 * the currently pending requests to also be marked as EIO, which 3405 * is done inside our nop_submit_request - and so we must wait. 3406 * 3407 * No more can be submitted until we reset the wedged bit. 3408 */ 3409 list_for_each_entry(tl, &i915->gt.timelines, link) { 3410 struct i915_request *rq; 3411 3412 rq = i915_gem_active_peek(&tl->last_request, 3413 &i915->drm.struct_mutex); 3414 if (!rq) 3415 continue; 3416 3417 /* 3418 * We can't use our normal waiter as we want to 3419 * avoid recursively trying to handle the current 3420 * reset. The basic dma_fence_default_wait() installs 3421 * a callback for dma_fence_signal(), which is 3422 * triggered by our nop handler (indirectly, the 3423 * callback enables the signaler thread which is 3424 * woken by the nop_submit_request() advancing the seqno 3425 * and when the seqno passes the fence, the signaler 3426 * then signals the fence waking us up). 3427 */ 3428 if (dma_fence_default_wait(&rq->fence, true, 3429 MAX_SCHEDULE_TIMEOUT) < 0) 3430 return false; 3431 } 3432 i915_retire_requests(i915); 3433 GEM_BUG_ON(i915->gt.active_requests); 3434 3435 if (!intel_gpu_reset(i915, ALL_ENGINES)) 3436 intel_engines_sanitize(i915); 3437 3438 /* 3439 * Undo nop_submit_request. We prevent all new i915 requests from 3440 * being queued (by disallowing execbuf whilst wedged) so having 3441 * waited for all active requests above, we know the system is idle 3442 * and do not have to worry about a thread being inside 3443 * engine->submit_request() as we swap over. So unlike installing 3444 * the nop_submit_request on reset, we can do this from normal 3445 * context and do not require stop_machine(). 3446 */ 3447 intel_engines_reset_default_submission(i915); 3448 i915_gem_contexts_lost(i915); 3449 3450 GEM_TRACE("end\n"); 3451 3452 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3453 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3454 3455 return true; 3456 } 3457 3458 static void 3459 i915_gem_retire_work_handler(struct work_struct *work) 3460 { 3461 struct drm_i915_private *dev_priv = 3462 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3463 struct drm_device *dev = &dev_priv->drm; 3464 3465 /* Come back later if the device is busy... */ 3466 if (mutex_trylock(&dev->struct_mutex)) { 3467 i915_retire_requests(dev_priv); 3468 mutex_unlock(&dev->struct_mutex); 3469 } 3470 3471 /* 3472 * Keep the retire handler running until we are finally idle. 3473 * We do not need to do this test under locking as in the worst-case 3474 * we queue the retire worker once too often. 3475 */ 3476 if (READ_ONCE(dev_priv->gt.awake)) 3477 queue_delayed_work(dev_priv->wq, 3478 &dev_priv->gt.retire_work, 3479 round_jiffies_up_relative(HZ)); 3480 } 3481 3482 static void shrink_caches(struct drm_i915_private *i915) 3483 { 3484 /* 3485 * kmem_cache_shrink() discards empty slabs and reorders partially 3486 * filled slabs to prioritise allocating from the mostly full slabs, 3487 * with the aim of reducing fragmentation. 3488 */ 3489 kmem_cache_shrink(i915->priorities); 3490 kmem_cache_shrink(i915->dependencies); 3491 kmem_cache_shrink(i915->requests); 3492 kmem_cache_shrink(i915->luts); 3493 kmem_cache_shrink(i915->vmas); 3494 kmem_cache_shrink(i915->objects); 3495 } 3496 3497 struct sleep_rcu_work { 3498 union { 3499 struct rcu_head rcu; 3500 struct work_struct work; 3501 }; 3502 struct drm_i915_private *i915; 3503 unsigned int epoch; 3504 }; 3505 3506 static inline bool 3507 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3508 { 3509 /* 3510 * There is a small chance that the epoch wrapped since we started 3511 * sleeping. If we assume that epoch is at least a u32, then it will 3512 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3513 */ 3514 return epoch == READ_ONCE(i915->gt.epoch); 3515 } 3516 3517 static void __sleep_work(struct work_struct *work) 3518 { 3519 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3520 struct drm_i915_private *i915 = s->i915; 3521 unsigned int epoch = s->epoch; 3522 3523 kfree(s); 3524 if (same_epoch(i915, epoch)) 3525 shrink_caches(i915); 3526 } 3527 3528 static void __sleep_rcu(struct rcu_head *rcu) 3529 { 3530 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3531 struct drm_i915_private *i915 = s->i915; 3532 3533 destroy_rcu_head(&s->rcu); 3534 3535 if (same_epoch(i915, s->epoch)) { 3536 INIT_WORK(&s->work, __sleep_work); 3537 queue_work(i915->wq, &s->work); 3538 } else { 3539 kfree(s); 3540 } 3541 } 3542 3543 static inline bool 3544 new_requests_since_last_retire(const struct drm_i915_private *i915) 3545 { 3546 return (READ_ONCE(i915->gt.active_requests) || 3547 work_pending(&i915->gt.idle_work.work)); 3548 } 3549 3550 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3551 { 3552 struct intel_engine_cs *engine; 3553 enum intel_engine_id id; 3554 3555 if (i915_terminally_wedged(&i915->gpu_error)) 3556 return; 3557 3558 GEM_BUG_ON(i915->gt.active_requests); 3559 for_each_engine(engine, i915, id) { 3560 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3561 GEM_BUG_ON(engine->last_retired_context != 3562 to_intel_context(i915->kernel_context, engine)); 3563 } 3564 } 3565 3566 static void 3567 i915_gem_idle_work_handler(struct work_struct *work) 3568 { 3569 struct drm_i915_private *dev_priv = 3570 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3571 unsigned int epoch = I915_EPOCH_INVALID; 3572 bool rearm_hangcheck; 3573 3574 if (!READ_ONCE(dev_priv->gt.awake)) 3575 return; 3576 3577 if (READ_ONCE(dev_priv->gt.active_requests)) 3578 return; 3579 3580 /* 3581 * Flush out the last user context, leaving only the pinned 3582 * kernel context resident. When we are idling on the kernel_context, 3583 * no more new requests (with a context switch) are emitted and we 3584 * can finally rest. A consequence is that the idle work handler is 3585 * always called at least twice before idling (and if the system is 3586 * idle that implies a round trip through the retire worker). 3587 */ 3588 mutex_lock(&dev_priv->drm.struct_mutex); 3589 i915_gem_switch_to_kernel_context(dev_priv); 3590 mutex_unlock(&dev_priv->drm.struct_mutex); 3591 3592 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3593 READ_ONCE(dev_priv->gt.active_requests)); 3594 3595 /* 3596 * Wait for last execlists context complete, but bail out in case a 3597 * new request is submitted. As we don't trust the hardware, we 3598 * continue on if the wait times out. This is necessary to allow 3599 * the machine to suspend even if the hardware dies, and we will 3600 * try to recover in resume (after depriving the hardware of power, 3601 * it may be in a better mmod). 3602 */ 3603 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3604 intel_engines_are_idle(dev_priv), 3605 I915_IDLE_ENGINES_TIMEOUT * 1000, 3606 10, 500); 3607 3608 rearm_hangcheck = 3609 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3610 3611 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3612 /* Currently busy, come back later */ 3613 mod_delayed_work(dev_priv->wq, 3614 &dev_priv->gt.idle_work, 3615 msecs_to_jiffies(50)); 3616 goto out_rearm; 3617 } 3618 3619 /* 3620 * New request retired after this work handler started, extend active 3621 * period until next instance of the work. 3622 */ 3623 if (new_requests_since_last_retire(dev_priv)) 3624 goto out_unlock; 3625 3626 epoch = __i915_gem_park(dev_priv); 3627 3628 assert_kernel_context_is_current(dev_priv); 3629 3630 rearm_hangcheck = false; 3631 out_unlock: 3632 mutex_unlock(&dev_priv->drm.struct_mutex); 3633 3634 out_rearm: 3635 if (rearm_hangcheck) { 3636 GEM_BUG_ON(!dev_priv->gt.awake); 3637 i915_queue_hangcheck(dev_priv); 3638 } 3639 3640 /* 3641 * When we are idle, it is an opportune time to reap our caches. 3642 * However, we have many objects that utilise RCU and the ordered 3643 * i915->wq that this work is executing on. To try and flush any 3644 * pending frees now we are idle, we first wait for an RCU grace 3645 * period, and then queue a task (that will run last on the wq) to 3646 * shrink and re-optimize the caches. 3647 */ 3648 if (same_epoch(dev_priv, epoch)) { 3649 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3650 if (s) { 3651 init_rcu_head(&s->rcu); 3652 s->i915 = dev_priv; 3653 s->epoch = epoch; 3654 call_rcu(&s->rcu, __sleep_rcu); 3655 } 3656 } 3657 } 3658 3659 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3660 { 3661 struct drm_i915_private *i915 = to_i915(gem->dev); 3662 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3663 struct drm_i915_file_private *fpriv = file->driver_priv; 3664 struct i915_lut_handle *lut, *ln; 3665 3666 mutex_lock(&i915->drm.struct_mutex); 3667 3668 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3669 struct i915_gem_context *ctx = lut->ctx; 3670 struct i915_vma *vma; 3671 3672 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3673 if (ctx->file_priv != fpriv) 3674 continue; 3675 3676 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3677 GEM_BUG_ON(vma->obj != obj); 3678 3679 /* We allow the process to have multiple handles to the same 3680 * vma, in the same fd namespace, by virtue of flink/open. 3681 */ 3682 GEM_BUG_ON(!vma->open_count); 3683 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3684 i915_vma_close(vma); 3685 3686 list_del(&lut->obj_link); 3687 list_del(&lut->ctx_link); 3688 3689 kmem_cache_free(i915->luts, lut); 3690 __i915_gem_object_release_unless_active(obj); 3691 } 3692 3693 mutex_unlock(&i915->drm.struct_mutex); 3694 } 3695 3696 static unsigned long to_wait_timeout(s64 timeout_ns) 3697 { 3698 if (timeout_ns < 0) 3699 return MAX_SCHEDULE_TIMEOUT; 3700 3701 if (timeout_ns == 0) 3702 return 0; 3703 3704 return nsecs_to_jiffies_timeout(timeout_ns); 3705 } 3706 3707 /** 3708 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3709 * @dev: drm device pointer 3710 * @data: ioctl data blob 3711 * @file: drm file pointer 3712 * 3713 * Returns 0 if successful, else an error is returned with the remaining time in 3714 * the timeout parameter. 3715 * -ETIME: object is still busy after timeout 3716 * -ERESTARTSYS: signal interrupted the wait 3717 * -ENONENT: object doesn't exist 3718 * Also possible, but rare: 3719 * -EAGAIN: incomplete, restart syscall 3720 * -ENOMEM: damn 3721 * -ENODEV: Internal IRQ fail 3722 * -E?: The add request failed 3723 * 3724 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3725 * non-zero timeout parameter the wait ioctl will wait for the given number of 3726 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3727 * without holding struct_mutex the object may become re-busied before this 3728 * function completes. A similar but shorter * race condition exists in the busy 3729 * ioctl 3730 */ 3731 int 3732 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3733 { 3734 struct drm_i915_gem_wait *args = data; 3735 struct drm_i915_gem_object *obj; 3736 ktime_t start; 3737 long ret; 3738 3739 if (args->flags != 0) 3740 return -EINVAL; 3741 3742 obj = i915_gem_object_lookup(file, args->bo_handle); 3743 if (!obj) 3744 return -ENOENT; 3745 3746 start = ktime_get(); 3747 3748 ret = i915_gem_object_wait(obj, 3749 I915_WAIT_INTERRUPTIBLE | 3750 I915_WAIT_PRIORITY | 3751 I915_WAIT_ALL, 3752 to_wait_timeout(args->timeout_ns), 3753 to_rps_client(file)); 3754 3755 if (args->timeout_ns > 0) { 3756 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3757 if (args->timeout_ns < 0) 3758 args->timeout_ns = 0; 3759 3760 /* 3761 * Apparently ktime isn't accurate enough and occasionally has a 3762 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3763 * things up to make the test happy. We allow up to 1 jiffy. 3764 * 3765 * This is a regression from the timespec->ktime conversion. 3766 */ 3767 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3768 args->timeout_ns = 0; 3769 3770 /* Asked to wait beyond the jiffie/scheduler precision? */ 3771 if (ret == -ETIME && args->timeout_ns) 3772 ret = -EAGAIN; 3773 } 3774 3775 i915_gem_object_put(obj); 3776 return ret; 3777 } 3778 3779 static long wait_for_timeline(struct i915_timeline *tl, 3780 unsigned int flags, long timeout) 3781 { 3782 struct i915_request *rq; 3783 3784 rq = i915_gem_active_get_unlocked(&tl->last_request); 3785 if (!rq) 3786 return timeout; 3787 3788 /* 3789 * "Race-to-idle". 3790 * 3791 * Switching to the kernel context is often used a synchronous 3792 * step prior to idling, e.g. in suspend for flushing all 3793 * current operations to memory before sleeping. These we 3794 * want to complete as quickly as possible to avoid prolonged 3795 * stalls, so allow the gpu to boost to maximum clocks. 3796 */ 3797 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3798 gen6_rps_boost(rq, NULL); 3799 3800 timeout = i915_request_wait(rq, flags, timeout); 3801 i915_request_put(rq); 3802 3803 return timeout; 3804 } 3805 3806 static int wait_for_engines(struct drm_i915_private *i915) 3807 { 3808 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3809 dev_err(i915->drm.dev, 3810 "Failed to idle engines, declaring wedged!\n"); 3811 GEM_TRACE_DUMP(); 3812 i915_gem_set_wedged(i915); 3813 return -EIO; 3814 } 3815 3816 return 0; 3817 } 3818 3819 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3820 unsigned int flags, long timeout) 3821 { 3822 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3823 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3824 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3825 3826 /* If the device is asleep, we have no requests outstanding */ 3827 if (!READ_ONCE(i915->gt.awake)) 3828 return 0; 3829 3830 if (flags & I915_WAIT_LOCKED) { 3831 struct i915_timeline *tl; 3832 int err; 3833 3834 lockdep_assert_held(&i915->drm.struct_mutex); 3835 3836 list_for_each_entry(tl, &i915->gt.timelines, link) { 3837 timeout = wait_for_timeline(tl, flags, timeout); 3838 if (timeout < 0) 3839 return timeout; 3840 } 3841 if (GEM_SHOW_DEBUG() && !timeout) { 3842 /* Presume that timeout was non-zero to begin with! */ 3843 dev_warn(&i915->drm.pdev->dev, 3844 "Missed idle-completion interrupt!\n"); 3845 GEM_TRACE_DUMP(); 3846 } 3847 3848 err = wait_for_engines(i915); 3849 if (err) 3850 return err; 3851 3852 i915_retire_requests(i915); 3853 GEM_BUG_ON(i915->gt.active_requests); 3854 } else { 3855 struct intel_engine_cs *engine; 3856 enum intel_engine_id id; 3857 3858 for_each_engine(engine, i915, id) { 3859 struct i915_timeline *tl = &engine->timeline; 3860 3861 timeout = wait_for_timeline(tl, flags, timeout); 3862 if (timeout < 0) 3863 return timeout; 3864 } 3865 } 3866 3867 return 0; 3868 } 3869 3870 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3871 { 3872 /* 3873 * We manually flush the CPU domain so that we can override and 3874 * force the flush for the display, and perform it asyncrhonously. 3875 */ 3876 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3877 if (obj->cache_dirty) 3878 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3879 obj->write_domain = 0; 3880 } 3881 3882 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3883 { 3884 if (!READ_ONCE(obj->pin_global)) 3885 return; 3886 3887 mutex_lock(&obj->base.dev->struct_mutex); 3888 __i915_gem_object_flush_for_display(obj); 3889 mutex_unlock(&obj->base.dev->struct_mutex); 3890 } 3891 3892 /** 3893 * Moves a single object to the WC read, and possibly write domain. 3894 * @obj: object to act on 3895 * @write: ask for write access or read only 3896 * 3897 * This function returns when the move is complete, including waiting on 3898 * flushes to occur. 3899 */ 3900 int 3901 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3902 { 3903 int ret; 3904 3905 lockdep_assert_held(&obj->base.dev->struct_mutex); 3906 3907 ret = i915_gem_object_wait(obj, 3908 I915_WAIT_INTERRUPTIBLE | 3909 I915_WAIT_LOCKED | 3910 (write ? I915_WAIT_ALL : 0), 3911 MAX_SCHEDULE_TIMEOUT, 3912 NULL); 3913 if (ret) 3914 return ret; 3915 3916 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3917 return 0; 3918 3919 /* Flush and acquire obj->pages so that we are coherent through 3920 * direct access in memory with previous cached writes through 3921 * shmemfs and that our cache domain tracking remains valid. 3922 * For example, if the obj->filp was moved to swap without us 3923 * being notified and releasing the pages, we would mistakenly 3924 * continue to assume that the obj remained out of the CPU cached 3925 * domain. 3926 */ 3927 ret = i915_gem_object_pin_pages(obj); 3928 if (ret) 3929 return ret; 3930 3931 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3932 3933 /* Serialise direct access to this object with the barriers for 3934 * coherent writes from the GPU, by effectively invalidating the 3935 * WC domain upon first access. 3936 */ 3937 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3938 mb(); 3939 3940 /* It should now be out of any other write domains, and we can update 3941 * the domain values for our changes. 3942 */ 3943 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3944 obj->read_domains |= I915_GEM_DOMAIN_WC; 3945 if (write) { 3946 obj->read_domains = I915_GEM_DOMAIN_WC; 3947 obj->write_domain = I915_GEM_DOMAIN_WC; 3948 obj->mm.dirty = true; 3949 } 3950 3951 i915_gem_object_unpin_pages(obj); 3952 return 0; 3953 } 3954 3955 /** 3956 * Moves a single object to the GTT read, and possibly write domain. 3957 * @obj: object to act on 3958 * @write: ask for write access or read only 3959 * 3960 * This function returns when the move is complete, including waiting on 3961 * flushes to occur. 3962 */ 3963 int 3964 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3965 { 3966 int ret; 3967 3968 lockdep_assert_held(&obj->base.dev->struct_mutex); 3969 3970 ret = i915_gem_object_wait(obj, 3971 I915_WAIT_INTERRUPTIBLE | 3972 I915_WAIT_LOCKED | 3973 (write ? I915_WAIT_ALL : 0), 3974 MAX_SCHEDULE_TIMEOUT, 3975 NULL); 3976 if (ret) 3977 return ret; 3978 3979 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3980 return 0; 3981 3982 /* Flush and acquire obj->pages so that we are coherent through 3983 * direct access in memory with previous cached writes through 3984 * shmemfs and that our cache domain tracking remains valid. 3985 * For example, if the obj->filp was moved to swap without us 3986 * being notified and releasing the pages, we would mistakenly 3987 * continue to assume that the obj remained out of the CPU cached 3988 * domain. 3989 */ 3990 ret = i915_gem_object_pin_pages(obj); 3991 if (ret) 3992 return ret; 3993 3994 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3995 3996 /* Serialise direct access to this object with the barriers for 3997 * coherent writes from the GPU, by effectively invalidating the 3998 * GTT domain upon first access. 3999 */ 4000 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 4001 mb(); 4002 4003 /* It should now be out of any other write domains, and we can update 4004 * the domain values for our changes. 4005 */ 4006 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4007 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4008 if (write) { 4009 obj->read_domains = I915_GEM_DOMAIN_GTT; 4010 obj->write_domain = I915_GEM_DOMAIN_GTT; 4011 obj->mm.dirty = true; 4012 } 4013 4014 i915_gem_object_unpin_pages(obj); 4015 return 0; 4016 } 4017 4018 /** 4019 * Changes the cache-level of an object across all VMA. 4020 * @obj: object to act on 4021 * @cache_level: new cache level to set for the object 4022 * 4023 * After this function returns, the object will be in the new cache-level 4024 * across all GTT and the contents of the backing storage will be coherent, 4025 * with respect to the new cache-level. In order to keep the backing storage 4026 * coherent for all users, we only allow a single cache level to be set 4027 * globally on the object and prevent it from being changed whilst the 4028 * hardware is reading from the object. That is if the object is currently 4029 * on the scanout it will be set to uncached (or equivalent display 4030 * cache coherency) and all non-MOCS GPU access will also be uncached so 4031 * that all direct access to the scanout remains coherent. 4032 */ 4033 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4034 enum i915_cache_level cache_level) 4035 { 4036 struct i915_vma *vma; 4037 int ret; 4038 4039 lockdep_assert_held(&obj->base.dev->struct_mutex); 4040 4041 if (obj->cache_level == cache_level) 4042 return 0; 4043 4044 /* Inspect the list of currently bound VMA and unbind any that would 4045 * be invalid given the new cache-level. This is principally to 4046 * catch the issue of the CS prefetch crossing page boundaries and 4047 * reading an invalid PTE on older architectures. 4048 */ 4049 restart: 4050 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4051 if (!drm_mm_node_allocated(&vma->node)) 4052 continue; 4053 4054 if (i915_vma_is_pinned(vma)) { 4055 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4056 return -EBUSY; 4057 } 4058 4059 if (!i915_vma_is_closed(vma) && 4060 i915_gem_valid_gtt_space(vma, cache_level)) 4061 continue; 4062 4063 ret = i915_vma_unbind(vma); 4064 if (ret) 4065 return ret; 4066 4067 /* As unbinding may affect other elements in the 4068 * obj->vma_list (due to side-effects from retiring 4069 * an active vma), play safe and restart the iterator. 4070 */ 4071 goto restart; 4072 } 4073 4074 /* We can reuse the existing drm_mm nodes but need to change the 4075 * cache-level on the PTE. We could simply unbind them all and 4076 * rebind with the correct cache-level on next use. However since 4077 * we already have a valid slot, dma mapping, pages etc, we may as 4078 * rewrite the PTE in the belief that doing so tramples upon less 4079 * state and so involves less work. 4080 */ 4081 if (obj->bind_count) { 4082 /* Before we change the PTE, the GPU must not be accessing it. 4083 * If we wait upon the object, we know that all the bound 4084 * VMA are no longer active. 4085 */ 4086 ret = i915_gem_object_wait(obj, 4087 I915_WAIT_INTERRUPTIBLE | 4088 I915_WAIT_LOCKED | 4089 I915_WAIT_ALL, 4090 MAX_SCHEDULE_TIMEOUT, 4091 NULL); 4092 if (ret) 4093 return ret; 4094 4095 if (!HAS_LLC(to_i915(obj->base.dev)) && 4096 cache_level != I915_CACHE_NONE) { 4097 /* Access to snoopable pages through the GTT is 4098 * incoherent and on some machines causes a hard 4099 * lockup. Relinquish the CPU mmaping to force 4100 * userspace to refault in the pages and we can 4101 * then double check if the GTT mapping is still 4102 * valid for that pointer access. 4103 */ 4104 i915_gem_release_mmap(obj); 4105 4106 /* As we no longer need a fence for GTT access, 4107 * we can relinquish it now (and so prevent having 4108 * to steal a fence from someone else on the next 4109 * fence request). Note GPU activity would have 4110 * dropped the fence as all snoopable access is 4111 * supposed to be linear. 4112 */ 4113 for_each_ggtt_vma(vma, obj) { 4114 ret = i915_vma_put_fence(vma); 4115 if (ret) 4116 return ret; 4117 } 4118 } else { 4119 /* We either have incoherent backing store and 4120 * so no GTT access or the architecture is fully 4121 * coherent. In such cases, existing GTT mmaps 4122 * ignore the cache bit in the PTE and we can 4123 * rewrite it without confusing the GPU or having 4124 * to force userspace to fault back in its mmaps. 4125 */ 4126 } 4127 4128 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4129 if (!drm_mm_node_allocated(&vma->node)) 4130 continue; 4131 4132 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4133 if (ret) 4134 return ret; 4135 } 4136 } 4137 4138 list_for_each_entry(vma, &obj->vma_list, obj_link) 4139 vma->node.color = cache_level; 4140 i915_gem_object_set_cache_coherency(obj, cache_level); 4141 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4142 4143 return 0; 4144 } 4145 4146 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4147 struct drm_file *file) 4148 { 4149 struct drm_i915_gem_caching *args = data; 4150 struct drm_i915_gem_object *obj; 4151 int err = 0; 4152 4153 rcu_read_lock(); 4154 obj = i915_gem_object_lookup_rcu(file, args->handle); 4155 if (!obj) { 4156 err = -ENOENT; 4157 goto out; 4158 } 4159 4160 switch (obj->cache_level) { 4161 case I915_CACHE_LLC: 4162 case I915_CACHE_L3_LLC: 4163 args->caching = I915_CACHING_CACHED; 4164 break; 4165 4166 case I915_CACHE_WT: 4167 args->caching = I915_CACHING_DISPLAY; 4168 break; 4169 4170 default: 4171 args->caching = I915_CACHING_NONE; 4172 break; 4173 } 4174 out: 4175 rcu_read_unlock(); 4176 return err; 4177 } 4178 4179 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4180 struct drm_file *file) 4181 { 4182 struct drm_i915_private *i915 = to_i915(dev); 4183 struct drm_i915_gem_caching *args = data; 4184 struct drm_i915_gem_object *obj; 4185 enum i915_cache_level level; 4186 int ret = 0; 4187 4188 switch (args->caching) { 4189 case I915_CACHING_NONE: 4190 level = I915_CACHE_NONE; 4191 break; 4192 case I915_CACHING_CACHED: 4193 /* 4194 * Due to a HW issue on BXT A stepping, GPU stores via a 4195 * snooped mapping may leave stale data in a corresponding CPU 4196 * cacheline, whereas normally such cachelines would get 4197 * invalidated. 4198 */ 4199 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4200 return -ENODEV; 4201 4202 level = I915_CACHE_LLC; 4203 break; 4204 case I915_CACHING_DISPLAY: 4205 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4206 break; 4207 default: 4208 return -EINVAL; 4209 } 4210 4211 obj = i915_gem_object_lookup(file, args->handle); 4212 if (!obj) 4213 return -ENOENT; 4214 4215 /* 4216 * The caching mode of proxy object is handled by its generator, and 4217 * not allowed to be changed by userspace. 4218 */ 4219 if (i915_gem_object_is_proxy(obj)) { 4220 ret = -ENXIO; 4221 goto out; 4222 } 4223 4224 if (obj->cache_level == level) 4225 goto out; 4226 4227 ret = i915_gem_object_wait(obj, 4228 I915_WAIT_INTERRUPTIBLE, 4229 MAX_SCHEDULE_TIMEOUT, 4230 to_rps_client(file)); 4231 if (ret) 4232 goto out; 4233 4234 ret = i915_mutex_lock_interruptible(dev); 4235 if (ret) 4236 goto out; 4237 4238 ret = i915_gem_object_set_cache_level(obj, level); 4239 mutex_unlock(&dev->struct_mutex); 4240 4241 out: 4242 i915_gem_object_put(obj); 4243 return ret; 4244 } 4245 4246 /* 4247 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4248 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4249 * (for pageflips). We only flush the caches while preparing the buffer for 4250 * display, the callers are responsible for frontbuffer flush. 4251 */ 4252 struct i915_vma * 4253 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4254 u32 alignment, 4255 const struct i915_ggtt_view *view, 4256 unsigned int flags) 4257 { 4258 struct i915_vma *vma; 4259 int ret; 4260 4261 lockdep_assert_held(&obj->base.dev->struct_mutex); 4262 4263 /* Mark the global pin early so that we account for the 4264 * display coherency whilst setting up the cache domains. 4265 */ 4266 obj->pin_global++; 4267 4268 /* The display engine is not coherent with the LLC cache on gen6. As 4269 * a result, we make sure that the pinning that is about to occur is 4270 * done with uncached PTEs. This is lowest common denominator for all 4271 * chipsets. 4272 * 4273 * However for gen6+, we could do better by using the GFDT bit instead 4274 * of uncaching, which would allow us to flush all the LLC-cached data 4275 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4276 */ 4277 ret = i915_gem_object_set_cache_level(obj, 4278 HAS_WT(to_i915(obj->base.dev)) ? 4279 I915_CACHE_WT : I915_CACHE_NONE); 4280 if (ret) { 4281 vma = ERR_PTR(ret); 4282 goto err_unpin_global; 4283 } 4284 4285 /* As the user may map the buffer once pinned in the display plane 4286 * (e.g. libkms for the bootup splash), we have to ensure that we 4287 * always use map_and_fenceable for all scanout buffers. However, 4288 * it may simply be too big to fit into mappable, in which case 4289 * put it anyway and hope that userspace can cope (but always first 4290 * try to preserve the existing ABI). 4291 */ 4292 vma = ERR_PTR(-ENOSPC); 4293 if ((flags & PIN_MAPPABLE) == 0 && 4294 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4295 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4296 flags | 4297 PIN_MAPPABLE | 4298 PIN_NONBLOCK); 4299 if (IS_ERR(vma)) 4300 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4301 if (IS_ERR(vma)) 4302 goto err_unpin_global; 4303 4304 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4305 4306 __i915_gem_object_flush_for_display(obj); 4307 4308 /* It should now be out of any other write domains, and we can update 4309 * the domain values for our changes. 4310 */ 4311 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4312 4313 return vma; 4314 4315 err_unpin_global: 4316 obj->pin_global--; 4317 return vma; 4318 } 4319 4320 void 4321 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4322 { 4323 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4324 4325 if (WARN_ON(vma->obj->pin_global == 0)) 4326 return; 4327 4328 if (--vma->obj->pin_global == 0) 4329 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4330 4331 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4332 i915_gem_object_bump_inactive_ggtt(vma->obj); 4333 4334 i915_vma_unpin(vma); 4335 } 4336 4337 /** 4338 * Moves a single object to the CPU read, and possibly write domain. 4339 * @obj: object to act on 4340 * @write: requesting write or read-only access 4341 * 4342 * This function returns when the move is complete, including waiting on 4343 * flushes to occur. 4344 */ 4345 int 4346 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4347 { 4348 int ret; 4349 4350 lockdep_assert_held(&obj->base.dev->struct_mutex); 4351 4352 ret = i915_gem_object_wait(obj, 4353 I915_WAIT_INTERRUPTIBLE | 4354 I915_WAIT_LOCKED | 4355 (write ? I915_WAIT_ALL : 0), 4356 MAX_SCHEDULE_TIMEOUT, 4357 NULL); 4358 if (ret) 4359 return ret; 4360 4361 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4362 4363 /* Flush the CPU cache if it's still invalid. */ 4364 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4365 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4366 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4367 } 4368 4369 /* It should now be out of any other write domains, and we can update 4370 * the domain values for our changes. 4371 */ 4372 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4373 4374 /* If we're writing through the CPU, then the GPU read domains will 4375 * need to be invalidated at next use. 4376 */ 4377 if (write) 4378 __start_cpu_write(obj); 4379 4380 return 0; 4381 } 4382 4383 /* Throttle our rendering by waiting until the ring has completed our requests 4384 * emitted over 20 msec ago. 4385 * 4386 * Note that if we were to use the current jiffies each time around the loop, 4387 * we wouldn't escape the function with any frames outstanding if the time to 4388 * render a frame was over 20ms. 4389 * 4390 * This should get us reasonable parallelism between CPU and GPU but also 4391 * relatively low latency when blocking on a particular request to finish. 4392 */ 4393 static int 4394 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4395 { 4396 struct drm_i915_private *dev_priv = to_i915(dev); 4397 struct drm_i915_file_private *file_priv = file->driver_priv; 4398 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4399 struct i915_request *request, *target = NULL; 4400 long ret; 4401 4402 /* ABI: return -EIO if already wedged */ 4403 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4404 return -EIO; 4405 4406 spin_lock(&file_priv->mm.lock); 4407 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4408 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4409 break; 4410 4411 if (target) { 4412 list_del(&target->client_link); 4413 target->file_priv = NULL; 4414 } 4415 4416 target = request; 4417 } 4418 if (target) 4419 i915_request_get(target); 4420 spin_unlock(&file_priv->mm.lock); 4421 4422 if (target == NULL) 4423 return 0; 4424 4425 ret = i915_request_wait(target, 4426 I915_WAIT_INTERRUPTIBLE, 4427 MAX_SCHEDULE_TIMEOUT); 4428 i915_request_put(target); 4429 4430 return ret < 0 ? ret : 0; 4431 } 4432 4433 struct i915_vma * 4434 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4435 const struct i915_ggtt_view *view, 4436 u64 size, 4437 u64 alignment, 4438 u64 flags) 4439 { 4440 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4441 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4442 struct i915_vma *vma; 4443 int ret; 4444 4445 lockdep_assert_held(&obj->base.dev->struct_mutex); 4446 4447 if (flags & PIN_MAPPABLE && 4448 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4449 /* If the required space is larger than the available 4450 * aperture, we will not able to find a slot for the 4451 * object and unbinding the object now will be in 4452 * vain. Worse, doing so may cause us to ping-pong 4453 * the object in and out of the Global GTT and 4454 * waste a lot of cycles under the mutex. 4455 */ 4456 if (obj->base.size > dev_priv->ggtt.mappable_end) 4457 return ERR_PTR(-E2BIG); 4458 4459 /* If NONBLOCK is set the caller is optimistically 4460 * trying to cache the full object within the mappable 4461 * aperture, and *must* have a fallback in place for 4462 * situations where we cannot bind the object. We 4463 * can be a little more lax here and use the fallback 4464 * more often to avoid costly migrations of ourselves 4465 * and other objects within the aperture. 4466 * 4467 * Half-the-aperture is used as a simple heuristic. 4468 * More interesting would to do search for a free 4469 * block prior to making the commitment to unbind. 4470 * That caters for the self-harm case, and with a 4471 * little more heuristics (e.g. NOFAULT, NOEVICT) 4472 * we could try to minimise harm to others. 4473 */ 4474 if (flags & PIN_NONBLOCK && 4475 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4476 return ERR_PTR(-ENOSPC); 4477 } 4478 4479 vma = i915_vma_instance(obj, vm, view); 4480 if (unlikely(IS_ERR(vma))) 4481 return vma; 4482 4483 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4484 if (flags & PIN_NONBLOCK) { 4485 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4486 return ERR_PTR(-ENOSPC); 4487 4488 if (flags & PIN_MAPPABLE && 4489 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4490 return ERR_PTR(-ENOSPC); 4491 } 4492 4493 WARN(i915_vma_is_pinned(vma), 4494 "bo is already pinned in ggtt with incorrect alignment:" 4495 " offset=%08x, req.alignment=%llx," 4496 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4497 i915_ggtt_offset(vma), alignment, 4498 !!(flags & PIN_MAPPABLE), 4499 i915_vma_is_map_and_fenceable(vma)); 4500 ret = i915_vma_unbind(vma); 4501 if (ret) 4502 return ERR_PTR(ret); 4503 } 4504 4505 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4506 if (ret) 4507 return ERR_PTR(ret); 4508 4509 return vma; 4510 } 4511 4512 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4513 { 4514 /* Note that we could alias engines in the execbuf API, but 4515 * that would be very unwise as it prevents userspace from 4516 * fine control over engine selection. Ahem. 4517 * 4518 * This should be something like EXEC_MAX_ENGINE instead of 4519 * I915_NUM_ENGINES. 4520 */ 4521 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4522 return 0x10000 << id; 4523 } 4524 4525 static __always_inline unsigned int __busy_write_id(unsigned int id) 4526 { 4527 /* The uABI guarantees an active writer is also amongst the read 4528 * engines. This would be true if we accessed the activity tracking 4529 * under the lock, but as we perform the lookup of the object and 4530 * its activity locklessly we can not guarantee that the last_write 4531 * being active implies that we have set the same engine flag from 4532 * last_read - hence we always set both read and write busy for 4533 * last_write. 4534 */ 4535 return id | __busy_read_flag(id); 4536 } 4537 4538 static __always_inline unsigned int 4539 __busy_set_if_active(const struct dma_fence *fence, 4540 unsigned int (*flag)(unsigned int id)) 4541 { 4542 struct i915_request *rq; 4543 4544 /* We have to check the current hw status of the fence as the uABI 4545 * guarantees forward progress. We could rely on the idle worker 4546 * to eventually flush us, but to minimise latency just ask the 4547 * hardware. 4548 * 4549 * Note we only report on the status of native fences. 4550 */ 4551 if (!dma_fence_is_i915(fence)) 4552 return 0; 4553 4554 /* opencode to_request() in order to avoid const warnings */ 4555 rq = container_of(fence, struct i915_request, fence); 4556 if (i915_request_completed(rq)) 4557 return 0; 4558 4559 return flag(rq->engine->uabi_id); 4560 } 4561 4562 static __always_inline unsigned int 4563 busy_check_reader(const struct dma_fence *fence) 4564 { 4565 return __busy_set_if_active(fence, __busy_read_flag); 4566 } 4567 4568 static __always_inline unsigned int 4569 busy_check_writer(const struct dma_fence *fence) 4570 { 4571 if (!fence) 4572 return 0; 4573 4574 return __busy_set_if_active(fence, __busy_write_id); 4575 } 4576 4577 int 4578 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4579 struct drm_file *file) 4580 { 4581 struct drm_i915_gem_busy *args = data; 4582 struct drm_i915_gem_object *obj; 4583 struct reservation_object_list *list; 4584 unsigned int seq; 4585 int err; 4586 4587 err = -ENOENT; 4588 rcu_read_lock(); 4589 obj = i915_gem_object_lookup_rcu(file, args->handle); 4590 if (!obj) 4591 goto out; 4592 4593 /* A discrepancy here is that we do not report the status of 4594 * non-i915 fences, i.e. even though we may report the object as idle, 4595 * a call to set-domain may still stall waiting for foreign rendering. 4596 * This also means that wait-ioctl may report an object as busy, 4597 * where busy-ioctl considers it idle. 4598 * 4599 * We trade the ability to warn of foreign fences to report on which 4600 * i915 engines are active for the object. 4601 * 4602 * Alternatively, we can trade that extra information on read/write 4603 * activity with 4604 * args->busy = 4605 * !reservation_object_test_signaled_rcu(obj->resv, true); 4606 * to report the overall busyness. This is what the wait-ioctl does. 4607 * 4608 */ 4609 retry: 4610 seq = raw_read_seqcount(&obj->resv->seq); 4611 4612 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4613 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4614 4615 /* Translate shared fences to READ set of engines */ 4616 list = rcu_dereference(obj->resv->fence); 4617 if (list) { 4618 unsigned int shared_count = list->shared_count, i; 4619 4620 for (i = 0; i < shared_count; ++i) { 4621 struct dma_fence *fence = 4622 rcu_dereference(list->shared[i]); 4623 4624 args->busy |= busy_check_reader(fence); 4625 } 4626 } 4627 4628 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4629 goto retry; 4630 4631 err = 0; 4632 out: 4633 rcu_read_unlock(); 4634 return err; 4635 } 4636 4637 int 4638 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4639 struct drm_file *file_priv) 4640 { 4641 return i915_gem_ring_throttle(dev, file_priv); 4642 } 4643 4644 int 4645 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4646 struct drm_file *file_priv) 4647 { 4648 struct drm_i915_private *dev_priv = to_i915(dev); 4649 struct drm_i915_gem_madvise *args = data; 4650 struct drm_i915_gem_object *obj; 4651 int err; 4652 4653 switch (args->madv) { 4654 case I915_MADV_DONTNEED: 4655 case I915_MADV_WILLNEED: 4656 break; 4657 default: 4658 return -EINVAL; 4659 } 4660 4661 obj = i915_gem_object_lookup(file_priv, args->handle); 4662 if (!obj) 4663 return -ENOENT; 4664 4665 err = mutex_lock_interruptible(&obj->mm.lock); 4666 if (err) 4667 goto out; 4668 4669 if (i915_gem_object_has_pages(obj) && 4670 i915_gem_object_is_tiled(obj) && 4671 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4672 if (obj->mm.madv == I915_MADV_WILLNEED) { 4673 GEM_BUG_ON(!obj->mm.quirked); 4674 __i915_gem_object_unpin_pages(obj); 4675 obj->mm.quirked = false; 4676 } 4677 if (args->madv == I915_MADV_WILLNEED) { 4678 GEM_BUG_ON(obj->mm.quirked); 4679 __i915_gem_object_pin_pages(obj); 4680 obj->mm.quirked = true; 4681 } 4682 } 4683 4684 if (obj->mm.madv != __I915_MADV_PURGED) 4685 obj->mm.madv = args->madv; 4686 4687 /* if the object is no longer attached, discard its backing storage */ 4688 if (obj->mm.madv == I915_MADV_DONTNEED && 4689 !i915_gem_object_has_pages(obj)) 4690 i915_gem_object_truncate(obj); 4691 4692 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4693 mutex_unlock(&obj->mm.lock); 4694 4695 out: 4696 i915_gem_object_put(obj); 4697 return err; 4698 } 4699 4700 static void 4701 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4702 { 4703 struct drm_i915_gem_object *obj = 4704 container_of(active, typeof(*obj), frontbuffer_write); 4705 4706 intel_fb_obj_flush(obj, ORIGIN_CS); 4707 } 4708 4709 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4710 const struct drm_i915_gem_object_ops *ops) 4711 { 4712 mutex_init(&obj->mm.lock); 4713 4714 INIT_LIST_HEAD(&obj->vma_list); 4715 INIT_LIST_HEAD(&obj->lut_list); 4716 INIT_LIST_HEAD(&obj->batch_pool_link); 4717 4718 init_rcu_head(&obj->rcu); 4719 4720 obj->ops = ops; 4721 4722 reservation_object_init(&obj->__builtin_resv); 4723 obj->resv = &obj->__builtin_resv; 4724 4725 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4726 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4727 4728 obj->mm.madv = I915_MADV_WILLNEED; 4729 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4730 mutex_init(&obj->mm.get_page.lock); 4731 4732 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4733 } 4734 4735 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4736 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4737 I915_GEM_OBJECT_IS_SHRINKABLE, 4738 4739 .get_pages = i915_gem_object_get_pages_gtt, 4740 .put_pages = i915_gem_object_put_pages_gtt, 4741 4742 .pwrite = i915_gem_object_pwrite_gtt, 4743 }; 4744 4745 static int i915_gem_object_create_shmem(struct drm_device *dev, 4746 struct drm_gem_object *obj, 4747 size_t size) 4748 { 4749 struct drm_i915_private *i915 = to_i915(dev); 4750 unsigned long flags = VM_NORESERVE; 4751 struct file *filp; 4752 4753 drm_gem_private_object_init(dev, obj, size); 4754 4755 if (i915->mm.gemfs) 4756 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4757 flags); 4758 else 4759 filp = shmem_file_setup("i915", size, flags); 4760 4761 if (IS_ERR(filp)) 4762 return PTR_ERR(filp); 4763 4764 obj->filp = filp; 4765 4766 return 0; 4767 } 4768 4769 struct drm_i915_gem_object * 4770 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4771 { 4772 struct drm_i915_gem_object *obj; 4773 struct address_space *mapping; 4774 unsigned int cache_level; 4775 gfp_t mask; 4776 int ret; 4777 4778 /* There is a prevalence of the assumption that we fit the object's 4779 * page count inside a 32bit _signed_ variable. Let's document this and 4780 * catch if we ever need to fix it. In the meantime, if you do spot 4781 * such a local variable, please consider fixing! 4782 */ 4783 if (size >> PAGE_SHIFT > INT_MAX) 4784 return ERR_PTR(-E2BIG); 4785 4786 if (overflows_type(size, obj->base.size)) 4787 return ERR_PTR(-E2BIG); 4788 4789 obj = i915_gem_object_alloc(dev_priv); 4790 if (obj == NULL) 4791 return ERR_PTR(-ENOMEM); 4792 4793 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4794 if (ret) 4795 goto fail; 4796 4797 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4798 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4799 /* 965gm cannot relocate objects above 4GiB. */ 4800 mask &= ~__GFP_HIGHMEM; 4801 mask |= __GFP_DMA32; 4802 } 4803 4804 mapping = obj->base.filp->f_mapping; 4805 mapping_set_gfp_mask(mapping, mask); 4806 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4807 4808 i915_gem_object_init(obj, &i915_gem_object_ops); 4809 4810 obj->write_domain = I915_GEM_DOMAIN_CPU; 4811 obj->read_domains = I915_GEM_DOMAIN_CPU; 4812 4813 if (HAS_LLC(dev_priv)) 4814 /* On some devices, we can have the GPU use the LLC (the CPU 4815 * cache) for about a 10% performance improvement 4816 * compared to uncached. Graphics requests other than 4817 * display scanout are coherent with the CPU in 4818 * accessing this cache. This means in this mode we 4819 * don't need to clflush on the CPU side, and on the 4820 * GPU side we only need to flush internal caches to 4821 * get data visible to the CPU. 4822 * 4823 * However, we maintain the display planes as UC, and so 4824 * need to rebind when first used as such. 4825 */ 4826 cache_level = I915_CACHE_LLC; 4827 else 4828 cache_level = I915_CACHE_NONE; 4829 4830 i915_gem_object_set_cache_coherency(obj, cache_level); 4831 4832 trace_i915_gem_object_create(obj); 4833 4834 return obj; 4835 4836 fail: 4837 i915_gem_object_free(obj); 4838 return ERR_PTR(ret); 4839 } 4840 4841 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4842 { 4843 /* If we are the last user of the backing storage (be it shmemfs 4844 * pages or stolen etc), we know that the pages are going to be 4845 * immediately released. In this case, we can then skip copying 4846 * back the contents from the GPU. 4847 */ 4848 4849 if (obj->mm.madv != I915_MADV_WILLNEED) 4850 return false; 4851 4852 if (obj->base.filp == NULL) 4853 return true; 4854 4855 /* At first glance, this looks racy, but then again so would be 4856 * userspace racing mmap against close. However, the first external 4857 * reference to the filp can only be obtained through the 4858 * i915_gem_mmap_ioctl() which safeguards us against the user 4859 * acquiring such a reference whilst we are in the middle of 4860 * freeing the object. 4861 */ 4862 return atomic_long_read(&obj->base.filp->f_count) == 1; 4863 } 4864 4865 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4866 struct llist_node *freed) 4867 { 4868 struct drm_i915_gem_object *obj, *on; 4869 4870 intel_runtime_pm_get(i915); 4871 llist_for_each_entry_safe(obj, on, freed, freed) { 4872 struct i915_vma *vma, *vn; 4873 4874 trace_i915_gem_object_destroy(obj); 4875 4876 mutex_lock(&i915->drm.struct_mutex); 4877 4878 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4879 list_for_each_entry_safe(vma, vn, 4880 &obj->vma_list, obj_link) { 4881 GEM_BUG_ON(i915_vma_is_active(vma)); 4882 vma->flags &= ~I915_VMA_PIN_MASK; 4883 i915_vma_destroy(vma); 4884 } 4885 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4886 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4887 4888 /* This serializes freeing with the shrinker. Since the free 4889 * is delayed, first by RCU then by the workqueue, we want the 4890 * shrinker to be able to free pages of unreferenced objects, 4891 * or else we may oom whilst there are plenty of deferred 4892 * freed objects. 4893 */ 4894 if (i915_gem_object_has_pages(obj)) { 4895 spin_lock(&i915->mm.obj_lock); 4896 list_del_init(&obj->mm.link); 4897 spin_unlock(&i915->mm.obj_lock); 4898 } 4899 4900 mutex_unlock(&i915->drm.struct_mutex); 4901 4902 GEM_BUG_ON(obj->bind_count); 4903 GEM_BUG_ON(obj->userfault_count); 4904 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4905 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4906 4907 if (obj->ops->release) 4908 obj->ops->release(obj); 4909 4910 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4911 atomic_set(&obj->mm.pages_pin_count, 0); 4912 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4913 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4914 4915 if (obj->base.import_attach) 4916 drm_prime_gem_destroy(&obj->base, NULL); 4917 4918 reservation_object_fini(&obj->__builtin_resv); 4919 drm_gem_object_release(&obj->base); 4920 i915_gem_info_remove_obj(i915, obj->base.size); 4921 4922 kfree(obj->bit_17); 4923 i915_gem_object_free(obj); 4924 4925 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4926 atomic_dec(&i915->mm.free_count); 4927 4928 if (on) 4929 cond_resched(); 4930 } 4931 intel_runtime_pm_put(i915); 4932 } 4933 4934 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4935 { 4936 struct llist_node *freed; 4937 4938 /* Free the oldest, most stale object to keep the free_list short */ 4939 freed = NULL; 4940 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4941 /* Only one consumer of llist_del_first() allowed */ 4942 spin_lock(&i915->mm.free_lock); 4943 freed = llist_del_first(&i915->mm.free_list); 4944 spin_unlock(&i915->mm.free_lock); 4945 } 4946 if (unlikely(freed)) { 4947 freed->next = NULL; 4948 __i915_gem_free_objects(i915, freed); 4949 } 4950 } 4951 4952 static void __i915_gem_free_work(struct work_struct *work) 4953 { 4954 struct drm_i915_private *i915 = 4955 container_of(work, struct drm_i915_private, mm.free_work); 4956 struct llist_node *freed; 4957 4958 /* 4959 * All file-owned VMA should have been released by this point through 4960 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4961 * However, the object may also be bound into the global GTT (e.g. 4962 * older GPUs without per-process support, or for direct access through 4963 * the GTT either for the user or for scanout). Those VMA still need to 4964 * unbound now. 4965 */ 4966 4967 spin_lock(&i915->mm.free_lock); 4968 while ((freed = llist_del_all(&i915->mm.free_list))) { 4969 spin_unlock(&i915->mm.free_lock); 4970 4971 __i915_gem_free_objects(i915, freed); 4972 if (need_resched()) 4973 return; 4974 4975 spin_lock(&i915->mm.free_lock); 4976 } 4977 spin_unlock(&i915->mm.free_lock); 4978 } 4979 4980 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4981 { 4982 struct drm_i915_gem_object *obj = 4983 container_of(head, typeof(*obj), rcu); 4984 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4985 4986 /* 4987 * We reuse obj->rcu for the freed list, so we had better not treat 4988 * it like a rcu_head from this point forwards. And we expect all 4989 * objects to be freed via this path. 4990 */ 4991 destroy_rcu_head(&obj->rcu); 4992 4993 /* 4994 * Since we require blocking on struct_mutex to unbind the freed 4995 * object from the GPU before releasing resources back to the 4996 * system, we can not do that directly from the RCU callback (which may 4997 * be a softirq context), but must instead then defer that work onto a 4998 * kthread. We use the RCU callback rather than move the freed object 4999 * directly onto the work queue so that we can mix between using the 5000 * worker and performing frees directly from subsequent allocations for 5001 * crude but effective memory throttling. 5002 */ 5003 if (llist_add(&obj->freed, &i915->mm.free_list)) 5004 queue_work(i915->wq, &i915->mm.free_work); 5005 } 5006 5007 void i915_gem_free_object(struct drm_gem_object *gem_obj) 5008 { 5009 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 5010 5011 if (obj->mm.quirked) 5012 __i915_gem_object_unpin_pages(obj); 5013 5014 if (discard_backing_storage(obj)) 5015 obj->mm.madv = I915_MADV_DONTNEED; 5016 5017 /* 5018 * Before we free the object, make sure any pure RCU-only 5019 * read-side critical sections are complete, e.g. 5020 * i915_gem_busy_ioctl(). For the corresponding synchronized 5021 * lookup see i915_gem_object_lookup_rcu(). 5022 */ 5023 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5024 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5025 } 5026 5027 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5028 { 5029 lockdep_assert_held(&obj->base.dev->struct_mutex); 5030 5031 if (!i915_gem_object_has_active_reference(obj) && 5032 i915_gem_object_is_active(obj)) 5033 i915_gem_object_set_active_reference(obj); 5034 else 5035 i915_gem_object_put(obj); 5036 } 5037 5038 void i915_gem_sanitize(struct drm_i915_private *i915) 5039 { 5040 int err; 5041 5042 GEM_TRACE("\n"); 5043 5044 mutex_lock(&i915->drm.struct_mutex); 5045 5046 intel_runtime_pm_get(i915); 5047 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5048 5049 /* 5050 * As we have just resumed the machine and woken the device up from 5051 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5052 * back to defaults, recovering from whatever wedged state we left it 5053 * in and so worth trying to use the device once more. 5054 */ 5055 if (i915_terminally_wedged(&i915->gpu_error)) 5056 i915_gem_unset_wedged(i915); 5057 5058 /* 5059 * If we inherit context state from the BIOS or earlier occupants 5060 * of the GPU, the GPU may be in an inconsistent state when we 5061 * try to take over. The only way to remove the earlier state 5062 * is by resetting. However, resetting on earlier gen is tricky as 5063 * it may impact the display and we are uncertain about the stability 5064 * of the reset, so this could be applied to even earlier gen. 5065 */ 5066 err = -ENODEV; 5067 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5068 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5069 if (!err) 5070 intel_engines_sanitize(i915); 5071 5072 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5073 intel_runtime_pm_put(i915); 5074 5075 i915_gem_contexts_lost(i915); 5076 mutex_unlock(&i915->drm.struct_mutex); 5077 } 5078 5079 int i915_gem_suspend(struct drm_i915_private *i915) 5080 { 5081 int ret; 5082 5083 GEM_TRACE("\n"); 5084 5085 intel_runtime_pm_get(i915); 5086 intel_suspend_gt_powersave(i915); 5087 5088 mutex_lock(&i915->drm.struct_mutex); 5089 5090 /* 5091 * We have to flush all the executing contexts to main memory so 5092 * that they can saved in the hibernation image. To ensure the last 5093 * context image is coherent, we have to switch away from it. That 5094 * leaves the i915->kernel_context still active when 5095 * we actually suspend, and its image in memory may not match the GPU 5096 * state. Fortunately, the kernel_context is disposable and we do 5097 * not rely on its state. 5098 */ 5099 if (!i915_terminally_wedged(&i915->gpu_error)) { 5100 ret = i915_gem_switch_to_kernel_context(i915); 5101 if (ret) 5102 goto err_unlock; 5103 5104 ret = i915_gem_wait_for_idle(i915, 5105 I915_WAIT_INTERRUPTIBLE | 5106 I915_WAIT_LOCKED | 5107 I915_WAIT_FOR_IDLE_BOOST, 5108 MAX_SCHEDULE_TIMEOUT); 5109 if (ret && ret != -EIO) 5110 goto err_unlock; 5111 5112 assert_kernel_context_is_current(i915); 5113 } 5114 i915_retire_requests(i915); /* ensure we flush after wedging */ 5115 5116 mutex_unlock(&i915->drm.struct_mutex); 5117 5118 intel_uc_suspend(i915); 5119 5120 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5121 cancel_delayed_work_sync(&i915->gt.retire_work); 5122 5123 /* 5124 * As the idle_work is rearming if it detects a race, play safe and 5125 * repeat the flush until it is definitely idle. 5126 */ 5127 drain_delayed_work(&i915->gt.idle_work); 5128 5129 /* 5130 * Assert that we successfully flushed all the work and 5131 * reset the GPU back to its idle, low power state. 5132 */ 5133 WARN_ON(i915->gt.awake); 5134 if (WARN_ON(!intel_engines_are_idle(i915))) 5135 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5136 5137 intel_runtime_pm_put(i915); 5138 return 0; 5139 5140 err_unlock: 5141 mutex_unlock(&i915->drm.struct_mutex); 5142 intel_runtime_pm_put(i915); 5143 return ret; 5144 } 5145 5146 void i915_gem_suspend_late(struct drm_i915_private *i915) 5147 { 5148 struct drm_i915_gem_object *obj; 5149 struct list_head *phases[] = { 5150 &i915->mm.unbound_list, 5151 &i915->mm.bound_list, 5152 NULL 5153 }, **phase; 5154 5155 /* 5156 * Neither the BIOS, ourselves or any other kernel 5157 * expects the system to be in execlists mode on startup, 5158 * so we need to reset the GPU back to legacy mode. And the only 5159 * known way to disable logical contexts is through a GPU reset. 5160 * 5161 * So in order to leave the system in a known default configuration, 5162 * always reset the GPU upon unload and suspend. Afterwards we then 5163 * clean up the GEM state tracking, flushing off the requests and 5164 * leaving the system in a known idle state. 5165 * 5166 * Note that is of the upmost importance that the GPU is idle and 5167 * all stray writes are flushed *before* we dismantle the backing 5168 * storage for the pinned objects. 5169 * 5170 * However, since we are uncertain that resetting the GPU on older 5171 * machines is a good idea, we don't - just in case it leaves the 5172 * machine in an unusable condition. 5173 */ 5174 5175 mutex_lock(&i915->drm.struct_mutex); 5176 for (phase = phases; *phase; phase++) { 5177 list_for_each_entry(obj, *phase, mm.link) 5178 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5179 } 5180 mutex_unlock(&i915->drm.struct_mutex); 5181 5182 intel_uc_sanitize(i915); 5183 i915_gem_sanitize(i915); 5184 } 5185 5186 void i915_gem_resume(struct drm_i915_private *i915) 5187 { 5188 GEM_TRACE("\n"); 5189 5190 WARN_ON(i915->gt.awake); 5191 5192 mutex_lock(&i915->drm.struct_mutex); 5193 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5194 5195 i915_gem_restore_gtt_mappings(i915); 5196 i915_gem_restore_fences(i915); 5197 5198 /* 5199 * As we didn't flush the kernel context before suspend, we cannot 5200 * guarantee that the context image is complete. So let's just reset 5201 * it and start again. 5202 */ 5203 i915->gt.resume(i915); 5204 5205 if (i915_gem_init_hw(i915)) 5206 goto err_wedged; 5207 5208 intel_uc_resume(i915); 5209 5210 /* Always reload a context for powersaving. */ 5211 if (i915_gem_switch_to_kernel_context(i915)) 5212 goto err_wedged; 5213 5214 out_unlock: 5215 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5216 mutex_unlock(&i915->drm.struct_mutex); 5217 return; 5218 5219 err_wedged: 5220 if (!i915_terminally_wedged(&i915->gpu_error)) { 5221 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5222 i915_gem_set_wedged(i915); 5223 } 5224 goto out_unlock; 5225 } 5226 5227 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5228 { 5229 if (INTEL_GEN(dev_priv) < 5 || 5230 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5231 return; 5232 5233 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5234 DISP_TILE_SURFACE_SWIZZLING); 5235 5236 if (IS_GEN5(dev_priv)) 5237 return; 5238 5239 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5240 if (IS_GEN6(dev_priv)) 5241 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5242 else if (IS_GEN7(dev_priv)) 5243 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5244 else if (IS_GEN8(dev_priv)) 5245 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5246 else 5247 BUG(); 5248 } 5249 5250 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5251 { 5252 I915_WRITE(RING_CTL(base), 0); 5253 I915_WRITE(RING_HEAD(base), 0); 5254 I915_WRITE(RING_TAIL(base), 0); 5255 I915_WRITE(RING_START(base), 0); 5256 } 5257 5258 static void init_unused_rings(struct drm_i915_private *dev_priv) 5259 { 5260 if (IS_I830(dev_priv)) { 5261 init_unused_ring(dev_priv, PRB1_BASE); 5262 init_unused_ring(dev_priv, SRB0_BASE); 5263 init_unused_ring(dev_priv, SRB1_BASE); 5264 init_unused_ring(dev_priv, SRB2_BASE); 5265 init_unused_ring(dev_priv, SRB3_BASE); 5266 } else if (IS_GEN2(dev_priv)) { 5267 init_unused_ring(dev_priv, SRB0_BASE); 5268 init_unused_ring(dev_priv, SRB1_BASE); 5269 } else if (IS_GEN3(dev_priv)) { 5270 init_unused_ring(dev_priv, PRB1_BASE); 5271 init_unused_ring(dev_priv, PRB2_BASE); 5272 } 5273 } 5274 5275 static int __i915_gem_restart_engines(void *data) 5276 { 5277 struct drm_i915_private *i915 = data; 5278 struct intel_engine_cs *engine; 5279 enum intel_engine_id id; 5280 int err; 5281 5282 for_each_engine(engine, i915, id) { 5283 err = engine->init_hw(engine); 5284 if (err) { 5285 DRM_ERROR("Failed to restart %s (%d)\n", 5286 engine->name, err); 5287 return err; 5288 } 5289 } 5290 5291 return 0; 5292 } 5293 5294 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5295 { 5296 int ret; 5297 5298 dev_priv->gt.last_init_time = ktime_get(); 5299 5300 /* Double layer security blanket, see i915_gem_init() */ 5301 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5302 5303 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5304 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5305 5306 if (IS_HASWELL(dev_priv)) 5307 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5308 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5309 5310 /* Apply the GT workarounds... */ 5311 intel_gt_apply_workarounds(dev_priv); 5312 /* ...and determine whether they are sticking. */ 5313 intel_gt_verify_workarounds(dev_priv, "init"); 5314 5315 i915_gem_init_swizzling(dev_priv); 5316 5317 /* 5318 * At least 830 can leave some of the unused rings 5319 * "active" (ie. head != tail) after resume which 5320 * will prevent c3 entry. Makes sure all unused rings 5321 * are totally idle. 5322 */ 5323 init_unused_rings(dev_priv); 5324 5325 BUG_ON(!dev_priv->kernel_context); 5326 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5327 ret = -EIO; 5328 goto out; 5329 } 5330 5331 ret = i915_ppgtt_init_hw(dev_priv); 5332 if (ret) { 5333 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5334 goto out; 5335 } 5336 5337 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5338 if (ret) { 5339 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5340 goto out; 5341 } 5342 5343 /* We can't enable contexts until all firmware is loaded */ 5344 ret = intel_uc_init_hw(dev_priv); 5345 if (ret) { 5346 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5347 goto out; 5348 } 5349 5350 intel_mocs_init_l3cc_table(dev_priv); 5351 5352 /* Only when the HW is re-initialised, can we replay the requests */ 5353 ret = __i915_gem_restart_engines(dev_priv); 5354 if (ret) 5355 goto cleanup_uc; 5356 5357 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5358 5359 return 0; 5360 5361 cleanup_uc: 5362 intel_uc_fini_hw(dev_priv); 5363 out: 5364 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5365 5366 return ret; 5367 } 5368 5369 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5370 { 5371 struct i915_gem_context *ctx; 5372 struct intel_engine_cs *engine; 5373 enum intel_engine_id id; 5374 int err; 5375 5376 /* 5377 * As we reset the gpu during very early sanitisation, the current 5378 * register state on the GPU should reflect its defaults values. 5379 * We load a context onto the hw (with restore-inhibit), then switch 5380 * over to a second context to save that default register state. We 5381 * can then prime every new context with that state so they all start 5382 * from the same default HW values. 5383 */ 5384 5385 ctx = i915_gem_context_create_kernel(i915, 0); 5386 if (IS_ERR(ctx)) 5387 return PTR_ERR(ctx); 5388 5389 for_each_engine(engine, i915, id) { 5390 struct i915_request *rq; 5391 5392 rq = i915_request_alloc(engine, ctx); 5393 if (IS_ERR(rq)) { 5394 err = PTR_ERR(rq); 5395 goto out_ctx; 5396 } 5397 5398 err = 0; 5399 if (engine->init_context) 5400 err = engine->init_context(rq); 5401 5402 i915_request_add(rq); 5403 if (err) 5404 goto err_active; 5405 } 5406 5407 err = i915_gem_switch_to_kernel_context(i915); 5408 if (err) 5409 goto err_active; 5410 5411 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5412 i915_gem_set_wedged(i915); 5413 err = -EIO; /* Caller will declare us wedged */ 5414 goto err_active; 5415 } 5416 5417 assert_kernel_context_is_current(i915); 5418 5419 /* 5420 * Immediately park the GPU so that we enable powersaving and 5421 * treat it as idle. The next time we issue a request, we will 5422 * unpark and start using the engine->pinned_default_state, otherwise 5423 * it is in limbo and an early reset may fail. 5424 */ 5425 __i915_gem_park(i915); 5426 5427 for_each_engine(engine, i915, id) { 5428 struct i915_vma *state; 5429 void *vaddr; 5430 5431 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 5432 5433 state = to_intel_context(ctx, engine)->state; 5434 if (!state) 5435 continue; 5436 5437 /* 5438 * As we will hold a reference to the logical state, it will 5439 * not be torn down with the context, and importantly the 5440 * object will hold onto its vma (making it possible for a 5441 * stray GTT write to corrupt our defaults). Unmap the vma 5442 * from the GTT to prevent such accidents and reclaim the 5443 * space. 5444 */ 5445 err = i915_vma_unbind(state); 5446 if (err) 5447 goto err_active; 5448 5449 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5450 if (err) 5451 goto err_active; 5452 5453 engine->default_state = i915_gem_object_get(state->obj); 5454 5455 /* Check we can acquire the image of the context state */ 5456 vaddr = i915_gem_object_pin_map(engine->default_state, 5457 I915_MAP_FORCE_WB); 5458 if (IS_ERR(vaddr)) { 5459 err = PTR_ERR(vaddr); 5460 goto err_active; 5461 } 5462 5463 i915_gem_object_unpin_map(engine->default_state); 5464 } 5465 5466 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5467 unsigned int found = intel_engines_has_context_isolation(i915); 5468 5469 /* 5470 * Make sure that classes with multiple engine instances all 5471 * share the same basic configuration. 5472 */ 5473 for_each_engine(engine, i915, id) { 5474 unsigned int bit = BIT(engine->uabi_class); 5475 unsigned int expected = engine->default_state ? bit : 0; 5476 5477 if ((found & bit) != expected) { 5478 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5479 engine->uabi_class, engine->name); 5480 } 5481 } 5482 } 5483 5484 out_ctx: 5485 i915_gem_context_set_closed(ctx); 5486 i915_gem_context_put(ctx); 5487 return err; 5488 5489 err_active: 5490 /* 5491 * If we have to abandon now, we expect the engines to be idle 5492 * and ready to be torn-down. First try to flush any remaining 5493 * request, ensure we are pointing at the kernel context and 5494 * then remove it. 5495 */ 5496 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5497 goto out_ctx; 5498 5499 if (WARN_ON(i915_gem_wait_for_idle(i915, 5500 I915_WAIT_LOCKED, 5501 MAX_SCHEDULE_TIMEOUT))) 5502 goto out_ctx; 5503 5504 i915_gem_contexts_lost(i915); 5505 goto out_ctx; 5506 } 5507 5508 static int 5509 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5510 { 5511 struct drm_i915_gem_object *obj; 5512 struct i915_vma *vma; 5513 int ret; 5514 5515 obj = i915_gem_object_create_stolen(i915, size); 5516 if (!obj) 5517 obj = i915_gem_object_create_internal(i915, size); 5518 if (IS_ERR(obj)) { 5519 DRM_ERROR("Failed to allocate scratch page\n"); 5520 return PTR_ERR(obj); 5521 } 5522 5523 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5524 if (IS_ERR(vma)) { 5525 ret = PTR_ERR(vma); 5526 goto err_unref; 5527 } 5528 5529 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5530 if (ret) 5531 goto err_unref; 5532 5533 i915->gt.scratch = vma; 5534 return 0; 5535 5536 err_unref: 5537 i915_gem_object_put(obj); 5538 return ret; 5539 } 5540 5541 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5542 { 5543 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5544 } 5545 5546 int i915_gem_init(struct drm_i915_private *dev_priv) 5547 { 5548 int ret; 5549 5550 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5551 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5552 mkwrite_device_info(dev_priv)->page_sizes = 5553 I915_GTT_PAGE_SIZE_4K; 5554 5555 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5556 5557 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5558 dev_priv->gt.resume = intel_lr_context_resume; 5559 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5560 } else { 5561 dev_priv->gt.resume = intel_legacy_submission_resume; 5562 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5563 } 5564 5565 ret = i915_gem_init_userptr(dev_priv); 5566 if (ret) 5567 return ret; 5568 5569 ret = intel_uc_init_misc(dev_priv); 5570 if (ret) 5571 return ret; 5572 5573 ret = intel_wopcm_init(&dev_priv->wopcm); 5574 if (ret) 5575 goto err_uc_misc; 5576 5577 /* This is just a security blanket to placate dragons. 5578 * On some systems, we very sporadically observe that the first TLBs 5579 * used by the CS may be stale, despite us poking the TLB reset. If 5580 * we hold the forcewake during initialisation these problems 5581 * just magically go away. 5582 */ 5583 mutex_lock(&dev_priv->drm.struct_mutex); 5584 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5585 5586 ret = i915_gem_init_ggtt(dev_priv); 5587 if (ret) { 5588 GEM_BUG_ON(ret == -EIO); 5589 goto err_unlock; 5590 } 5591 5592 ret = i915_gem_init_scratch(dev_priv, 5593 IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); 5594 if (ret) { 5595 GEM_BUG_ON(ret == -EIO); 5596 goto err_ggtt; 5597 } 5598 5599 ret = i915_gem_contexts_init(dev_priv); 5600 if (ret) { 5601 GEM_BUG_ON(ret == -EIO); 5602 goto err_scratch; 5603 } 5604 5605 ret = intel_engines_init(dev_priv); 5606 if (ret) { 5607 GEM_BUG_ON(ret == -EIO); 5608 goto err_context; 5609 } 5610 5611 intel_init_gt_powersave(dev_priv); 5612 5613 ret = intel_uc_init(dev_priv); 5614 if (ret) 5615 goto err_pm; 5616 5617 ret = i915_gem_init_hw(dev_priv); 5618 if (ret) 5619 goto err_uc_init; 5620 5621 /* 5622 * Despite its name intel_init_clock_gating applies both display 5623 * clock gating workarounds; GT mmio workarounds and the occasional 5624 * GT power context workaround. Worse, sometimes it includes a context 5625 * register workaround which we need to apply before we record the 5626 * default HW state for all contexts. 5627 * 5628 * FIXME: break up the workarounds and apply them at the right time! 5629 */ 5630 intel_init_clock_gating(dev_priv); 5631 5632 ret = __intel_engines_record_defaults(dev_priv); 5633 if (ret) 5634 goto err_init_hw; 5635 5636 if (i915_inject_load_failure()) { 5637 ret = -ENODEV; 5638 goto err_init_hw; 5639 } 5640 5641 if (i915_inject_load_failure()) { 5642 ret = -EIO; 5643 goto err_init_hw; 5644 } 5645 5646 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5647 mutex_unlock(&dev_priv->drm.struct_mutex); 5648 5649 return 0; 5650 5651 /* 5652 * Unwinding is complicated by that we want to handle -EIO to mean 5653 * disable GPU submission but keep KMS alive. We want to mark the 5654 * HW as irrevisibly wedged, but keep enough state around that the 5655 * driver doesn't explode during runtime. 5656 */ 5657 err_init_hw: 5658 mutex_unlock(&dev_priv->drm.struct_mutex); 5659 5660 WARN_ON(i915_gem_suspend(dev_priv)); 5661 i915_gem_suspend_late(dev_priv); 5662 5663 i915_gem_drain_workqueue(dev_priv); 5664 5665 mutex_lock(&dev_priv->drm.struct_mutex); 5666 intel_uc_fini_hw(dev_priv); 5667 err_uc_init: 5668 intel_uc_fini(dev_priv); 5669 err_pm: 5670 if (ret != -EIO) { 5671 intel_cleanup_gt_powersave(dev_priv); 5672 i915_gem_cleanup_engines(dev_priv); 5673 } 5674 err_context: 5675 if (ret != -EIO) 5676 i915_gem_contexts_fini(dev_priv); 5677 err_scratch: 5678 i915_gem_fini_scratch(dev_priv); 5679 err_ggtt: 5680 err_unlock: 5681 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5682 mutex_unlock(&dev_priv->drm.struct_mutex); 5683 5684 err_uc_misc: 5685 intel_uc_fini_misc(dev_priv); 5686 5687 if (ret != -EIO) 5688 i915_gem_cleanup_userptr(dev_priv); 5689 5690 if (ret == -EIO) { 5691 mutex_lock(&dev_priv->drm.struct_mutex); 5692 5693 /* 5694 * Allow engine initialisation to fail by marking the GPU as 5695 * wedged. But we only want to do this where the GPU is angry, 5696 * for all other failure, such as an allocation failure, bail. 5697 */ 5698 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5699 i915_load_error(dev_priv, 5700 "Failed to initialize GPU, declaring it wedged!\n"); 5701 i915_gem_set_wedged(dev_priv); 5702 } 5703 5704 /* Minimal basic recovery for KMS */ 5705 ret = i915_ggtt_enable_hw(dev_priv); 5706 i915_gem_restore_gtt_mappings(dev_priv); 5707 i915_gem_restore_fences(dev_priv); 5708 intel_init_clock_gating(dev_priv); 5709 5710 mutex_unlock(&dev_priv->drm.struct_mutex); 5711 } 5712 5713 i915_gem_drain_freed_objects(dev_priv); 5714 return ret; 5715 } 5716 5717 void i915_gem_fini(struct drm_i915_private *dev_priv) 5718 { 5719 i915_gem_suspend_late(dev_priv); 5720 intel_disable_gt_powersave(dev_priv); 5721 5722 /* Flush any outstanding unpin_work. */ 5723 i915_gem_drain_workqueue(dev_priv); 5724 5725 mutex_lock(&dev_priv->drm.struct_mutex); 5726 intel_uc_fini_hw(dev_priv); 5727 intel_uc_fini(dev_priv); 5728 i915_gem_cleanup_engines(dev_priv); 5729 i915_gem_contexts_fini(dev_priv); 5730 i915_gem_fini_scratch(dev_priv); 5731 mutex_unlock(&dev_priv->drm.struct_mutex); 5732 5733 intel_wa_list_free(&dev_priv->gt_wa_list); 5734 5735 intel_cleanup_gt_powersave(dev_priv); 5736 5737 intel_uc_fini_misc(dev_priv); 5738 i915_gem_cleanup_userptr(dev_priv); 5739 5740 i915_gem_drain_freed_objects(dev_priv); 5741 5742 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5743 } 5744 5745 void i915_gem_init_mmio(struct drm_i915_private *i915) 5746 { 5747 i915_gem_sanitize(i915); 5748 } 5749 5750 void 5751 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5752 { 5753 struct intel_engine_cs *engine; 5754 enum intel_engine_id id; 5755 5756 for_each_engine(engine, dev_priv, id) 5757 dev_priv->gt.cleanup_engine(engine); 5758 } 5759 5760 void 5761 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5762 { 5763 int i; 5764 5765 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5766 !IS_CHERRYVIEW(dev_priv)) 5767 dev_priv->num_fence_regs = 32; 5768 else if (INTEL_GEN(dev_priv) >= 4 || 5769 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5770 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5771 dev_priv->num_fence_regs = 16; 5772 else 5773 dev_priv->num_fence_regs = 8; 5774 5775 if (intel_vgpu_active(dev_priv)) 5776 dev_priv->num_fence_regs = 5777 I915_READ(vgtif_reg(avail_rs.fence_num)); 5778 5779 /* Initialize fence registers to zero */ 5780 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5781 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5782 5783 fence->i915 = dev_priv; 5784 fence->id = i; 5785 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5786 } 5787 i915_gem_restore_fences(dev_priv); 5788 5789 i915_gem_detect_bit_6_swizzle(dev_priv); 5790 } 5791 5792 static void i915_gem_init__mm(struct drm_i915_private *i915) 5793 { 5794 spin_lock_init(&i915->mm.object_stat_lock); 5795 spin_lock_init(&i915->mm.obj_lock); 5796 spin_lock_init(&i915->mm.free_lock); 5797 5798 init_llist_head(&i915->mm.free_list); 5799 5800 INIT_LIST_HEAD(&i915->mm.unbound_list); 5801 INIT_LIST_HEAD(&i915->mm.bound_list); 5802 INIT_LIST_HEAD(&i915->mm.fence_list); 5803 INIT_LIST_HEAD(&i915->mm.userfault_list); 5804 5805 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5806 } 5807 5808 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5809 { 5810 int err = -ENOMEM; 5811 5812 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5813 if (!dev_priv->objects) 5814 goto err_out; 5815 5816 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5817 if (!dev_priv->vmas) 5818 goto err_objects; 5819 5820 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5821 if (!dev_priv->luts) 5822 goto err_vmas; 5823 5824 dev_priv->requests = KMEM_CACHE(i915_request, 5825 SLAB_HWCACHE_ALIGN | 5826 SLAB_RECLAIM_ACCOUNT | 5827 SLAB_TYPESAFE_BY_RCU); 5828 if (!dev_priv->requests) 5829 goto err_luts; 5830 5831 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5832 SLAB_HWCACHE_ALIGN | 5833 SLAB_RECLAIM_ACCOUNT); 5834 if (!dev_priv->dependencies) 5835 goto err_requests; 5836 5837 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5838 if (!dev_priv->priorities) 5839 goto err_dependencies; 5840 5841 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5842 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5843 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5844 5845 i915_gem_init__mm(dev_priv); 5846 5847 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5848 i915_gem_retire_work_handler); 5849 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5850 i915_gem_idle_work_handler); 5851 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5852 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5853 5854 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5855 5856 spin_lock_init(&dev_priv->fb_tracking.lock); 5857 5858 err = i915_gemfs_init(dev_priv); 5859 if (err) 5860 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5861 5862 return 0; 5863 5864 err_dependencies: 5865 kmem_cache_destroy(dev_priv->dependencies); 5866 err_requests: 5867 kmem_cache_destroy(dev_priv->requests); 5868 err_luts: 5869 kmem_cache_destroy(dev_priv->luts); 5870 err_vmas: 5871 kmem_cache_destroy(dev_priv->vmas); 5872 err_objects: 5873 kmem_cache_destroy(dev_priv->objects); 5874 err_out: 5875 return err; 5876 } 5877 5878 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5879 { 5880 i915_gem_drain_freed_objects(dev_priv); 5881 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5882 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5883 WARN_ON(dev_priv->mm.object_count); 5884 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5885 5886 kmem_cache_destroy(dev_priv->priorities); 5887 kmem_cache_destroy(dev_priv->dependencies); 5888 kmem_cache_destroy(dev_priv->requests); 5889 kmem_cache_destroy(dev_priv->luts); 5890 kmem_cache_destroy(dev_priv->vmas); 5891 kmem_cache_destroy(dev_priv->objects); 5892 5893 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5894 rcu_barrier(); 5895 5896 i915_gemfs_fini(dev_priv); 5897 } 5898 5899 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5900 { 5901 /* Discard all purgeable objects, let userspace recover those as 5902 * required after resuming. 5903 */ 5904 i915_gem_shrink_all(dev_priv); 5905 5906 return 0; 5907 } 5908 5909 int i915_gem_freeze_late(struct drm_i915_private *i915) 5910 { 5911 struct drm_i915_gem_object *obj; 5912 struct list_head *phases[] = { 5913 &i915->mm.unbound_list, 5914 &i915->mm.bound_list, 5915 NULL 5916 }, **phase; 5917 5918 /* 5919 * Called just before we write the hibernation image. 5920 * 5921 * We need to update the domain tracking to reflect that the CPU 5922 * will be accessing all the pages to create and restore from the 5923 * hibernation, and so upon restoration those pages will be in the 5924 * CPU domain. 5925 * 5926 * To make sure the hibernation image contains the latest state, 5927 * we update that state just before writing out the image. 5928 * 5929 * To try and reduce the hibernation image, we manually shrink 5930 * the objects as well, see i915_gem_freeze() 5931 */ 5932 5933 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5934 i915_gem_drain_freed_objects(i915); 5935 5936 mutex_lock(&i915->drm.struct_mutex); 5937 for (phase = phases; *phase; phase++) { 5938 list_for_each_entry(obj, *phase, mm.link) 5939 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5940 } 5941 mutex_unlock(&i915->drm.struct_mutex); 5942 5943 return 0; 5944 } 5945 5946 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5947 { 5948 struct drm_i915_file_private *file_priv = file->driver_priv; 5949 struct i915_request *request; 5950 5951 /* Clean up our request list when the client is going away, so that 5952 * later retire_requests won't dereference our soon-to-be-gone 5953 * file_priv. 5954 */ 5955 spin_lock(&file_priv->mm.lock); 5956 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5957 request->file_priv = NULL; 5958 spin_unlock(&file_priv->mm.lock); 5959 } 5960 5961 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5962 { 5963 struct drm_i915_file_private *file_priv; 5964 int ret; 5965 5966 DRM_DEBUG("\n"); 5967 5968 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5969 if (!file_priv) 5970 return -ENOMEM; 5971 5972 file->driver_priv = file_priv; 5973 file_priv->dev_priv = i915; 5974 file_priv->file = file; 5975 5976 spin_lock_init(&file_priv->mm.lock); 5977 INIT_LIST_HEAD(&file_priv->mm.request_list); 5978 5979 file_priv->bsd_engine = -1; 5980 file_priv->hang_timestamp = jiffies; 5981 5982 ret = i915_gem_context_open(i915, file); 5983 if (ret) 5984 kfree(file_priv); 5985 5986 return ret; 5987 } 5988 5989 /** 5990 * i915_gem_track_fb - update frontbuffer tracking 5991 * @old: current GEM buffer for the frontbuffer slots 5992 * @new: new GEM buffer for the frontbuffer slots 5993 * @frontbuffer_bits: bitmask of frontbuffer slots 5994 * 5995 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5996 * from @old and setting them in @new. Both @old and @new can be NULL. 5997 */ 5998 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5999 struct drm_i915_gem_object *new, 6000 unsigned frontbuffer_bits) 6001 { 6002 /* Control of individual bits within the mask are guarded by 6003 * the owning plane->mutex, i.e. we can never see concurrent 6004 * manipulation of individual bits. But since the bitfield as a whole 6005 * is updated using RMW, we need to use atomics in order to update 6006 * the bits. 6007 */ 6008 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 6009 BITS_PER_TYPE(atomic_t)); 6010 6011 if (old) { 6012 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 6013 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 6014 } 6015 6016 if (new) { 6017 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 6018 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 6019 } 6020 } 6021 6022 /* Allocate a new GEM object and fill it with the supplied data */ 6023 struct drm_i915_gem_object * 6024 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 6025 const void *data, size_t size) 6026 { 6027 struct drm_i915_gem_object *obj; 6028 struct file *file; 6029 size_t offset; 6030 int err; 6031 6032 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 6033 if (IS_ERR(obj)) 6034 return obj; 6035 6036 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 6037 6038 file = obj->base.filp; 6039 offset = 0; 6040 do { 6041 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 6042 struct page *page; 6043 void *pgdata, *vaddr; 6044 6045 err = pagecache_write_begin(file, file->f_mapping, 6046 offset, len, 0, 6047 &page, &pgdata); 6048 if (err < 0) 6049 goto fail; 6050 6051 vaddr = kmap(page); 6052 memcpy(vaddr, data, len); 6053 kunmap(page); 6054 6055 err = pagecache_write_end(file, file->f_mapping, 6056 offset, len, len, 6057 page, pgdata); 6058 if (err < 0) 6059 goto fail; 6060 6061 size -= len; 6062 data += len; 6063 offset += len; 6064 } while (size); 6065 6066 return obj; 6067 6068 fail: 6069 i915_gem_object_put(obj); 6070 return ERR_PTR(err); 6071 } 6072 6073 struct scatterlist * 6074 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6075 unsigned int n, 6076 unsigned int *offset) 6077 { 6078 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6079 struct scatterlist *sg; 6080 unsigned int idx, count; 6081 6082 might_sleep(); 6083 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6084 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6085 6086 /* As we iterate forward through the sg, we record each entry in a 6087 * radixtree for quick repeated (backwards) lookups. If we have seen 6088 * this index previously, we will have an entry for it. 6089 * 6090 * Initial lookup is O(N), but this is amortized to O(1) for 6091 * sequential page access (where each new request is consecutive 6092 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6093 * i.e. O(1) with a large constant! 6094 */ 6095 if (n < READ_ONCE(iter->sg_idx)) 6096 goto lookup; 6097 6098 mutex_lock(&iter->lock); 6099 6100 /* We prefer to reuse the last sg so that repeated lookup of this 6101 * (or the subsequent) sg are fast - comparing against the last 6102 * sg is faster than going through the radixtree. 6103 */ 6104 6105 sg = iter->sg_pos; 6106 idx = iter->sg_idx; 6107 count = __sg_page_count(sg); 6108 6109 while (idx + count <= n) { 6110 void *entry; 6111 unsigned long i; 6112 int ret; 6113 6114 /* If we cannot allocate and insert this entry, or the 6115 * individual pages from this range, cancel updating the 6116 * sg_idx so that on this lookup we are forced to linearly 6117 * scan onwards, but on future lookups we will try the 6118 * insertion again (in which case we need to be careful of 6119 * the error return reporting that we have already inserted 6120 * this index). 6121 */ 6122 ret = radix_tree_insert(&iter->radix, idx, sg); 6123 if (ret && ret != -EEXIST) 6124 goto scan; 6125 6126 entry = xa_mk_value(idx); 6127 for (i = 1; i < count; i++) { 6128 ret = radix_tree_insert(&iter->radix, idx + i, entry); 6129 if (ret && ret != -EEXIST) 6130 goto scan; 6131 } 6132 6133 idx += count; 6134 sg = ____sg_next(sg); 6135 count = __sg_page_count(sg); 6136 } 6137 6138 scan: 6139 iter->sg_pos = sg; 6140 iter->sg_idx = idx; 6141 6142 mutex_unlock(&iter->lock); 6143 6144 if (unlikely(n < idx)) /* insertion completed by another thread */ 6145 goto lookup; 6146 6147 /* In case we failed to insert the entry into the radixtree, we need 6148 * to look beyond the current sg. 6149 */ 6150 while (idx + count <= n) { 6151 idx += count; 6152 sg = ____sg_next(sg); 6153 count = __sg_page_count(sg); 6154 } 6155 6156 *offset = n - idx; 6157 return sg; 6158 6159 lookup: 6160 rcu_read_lock(); 6161 6162 sg = radix_tree_lookup(&iter->radix, n); 6163 GEM_BUG_ON(!sg); 6164 6165 /* If this index is in the middle of multi-page sg entry, 6166 * the radix tree will contain a value entry that points 6167 * to the start of that range. We will return the pointer to 6168 * the base page and the offset of this page within the 6169 * sg entry's range. 6170 */ 6171 *offset = 0; 6172 if (unlikely(xa_is_value(sg))) { 6173 unsigned long base = xa_to_value(sg); 6174 6175 sg = radix_tree_lookup(&iter->radix, base); 6176 GEM_BUG_ON(!sg); 6177 6178 *offset = n - base; 6179 } 6180 6181 rcu_read_unlock(); 6182 6183 return sg; 6184 } 6185 6186 struct page * 6187 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6188 { 6189 struct scatterlist *sg; 6190 unsigned int offset; 6191 6192 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6193 6194 sg = i915_gem_object_get_sg(obj, n, &offset); 6195 return nth_page(sg_page(sg), offset); 6196 } 6197 6198 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6199 struct page * 6200 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6201 unsigned int n) 6202 { 6203 struct page *page; 6204 6205 page = i915_gem_object_get_page(obj, n); 6206 if (!obj->mm.dirty) 6207 set_page_dirty(page); 6208 6209 return page; 6210 } 6211 6212 dma_addr_t 6213 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6214 unsigned long n) 6215 { 6216 struct scatterlist *sg; 6217 unsigned int offset; 6218 6219 sg = i915_gem_object_get_sg(obj, n, &offset); 6220 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6221 } 6222 6223 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6224 { 6225 struct sg_table *pages; 6226 int err; 6227 6228 if (align > obj->base.size) 6229 return -EINVAL; 6230 6231 if (obj->ops == &i915_gem_phys_ops) 6232 return 0; 6233 6234 if (obj->ops != &i915_gem_object_ops) 6235 return -EINVAL; 6236 6237 err = i915_gem_object_unbind(obj); 6238 if (err) 6239 return err; 6240 6241 mutex_lock(&obj->mm.lock); 6242 6243 if (obj->mm.madv != I915_MADV_WILLNEED) { 6244 err = -EFAULT; 6245 goto err_unlock; 6246 } 6247 6248 if (obj->mm.quirked) { 6249 err = -EFAULT; 6250 goto err_unlock; 6251 } 6252 6253 if (obj->mm.mapping) { 6254 err = -EBUSY; 6255 goto err_unlock; 6256 } 6257 6258 pages = __i915_gem_object_unset_pages(obj); 6259 6260 obj->ops = &i915_gem_phys_ops; 6261 6262 err = ____i915_gem_object_get_pages(obj); 6263 if (err) 6264 goto err_xfer; 6265 6266 /* Perma-pin (until release) the physical set of pages */ 6267 __i915_gem_object_pin_pages(obj); 6268 6269 if (!IS_ERR_OR_NULL(pages)) 6270 i915_gem_object_ops.put_pages(obj, pages); 6271 mutex_unlock(&obj->mm.lock); 6272 return 0; 6273 6274 err_xfer: 6275 obj->ops = &i915_gem_object_ops; 6276 if (!IS_ERR_OR_NULL(pages)) { 6277 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6278 6279 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6280 } 6281 err_unlock: 6282 mutex_unlock(&obj->mm.lock); 6283 return err; 6284 } 6285 6286 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6287 #include "selftests/scatterlist.c" 6288 #include "selftests/mock_gem_device.c" 6289 #include "selftests/huge_gem_object.c" 6290 #include "selftests/huge_pages.c" 6291 #include "selftests/i915_gem_object.c" 6292 #include "selftests/i915_gem_coherency.c" 6293 #include "selftests/i915_gem.c" 6294 #endif 6295