1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "i915_gemfs.h" 39 #include <linux/dma-fence-array.h> 40 #include <linux/kthread.h> 41 #include <linux/reservation.h> 42 #include <linux/shmem_fs.h> 43 #include <linux/slab.h> 44 #include <linux/stop_machine.h> 45 #include <linux/swap.h> 46 #include <linux/pci.h> 47 #include <linux/dma-buf.h> 48 49 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 50 51 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 52 { 53 if (obj->cache_dirty) 54 return false; 55 56 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 57 return true; 58 59 return obj->pin_global; /* currently in use by HW, keep flushed */ 60 } 61 62 static int 63 insert_mappable_node(struct i915_ggtt *ggtt, 64 struct drm_mm_node *node, u32 size) 65 { 66 memset(node, 0, sizeof(*node)); 67 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 68 size, 0, I915_COLOR_UNEVICTABLE, 69 0, ggtt->mappable_end, 70 DRM_MM_INSERT_LOW); 71 } 72 73 static void 74 remove_mappable_node(struct drm_mm_node *node) 75 { 76 drm_mm_remove_node(node); 77 } 78 79 /* some bookkeeping */ 80 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 81 u64 size) 82 { 83 spin_lock(&dev_priv->mm.object_stat_lock); 84 dev_priv->mm.object_count++; 85 dev_priv->mm.object_memory += size; 86 spin_unlock(&dev_priv->mm.object_stat_lock); 87 } 88 89 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 90 u64 size) 91 { 92 spin_lock(&dev_priv->mm.object_stat_lock); 93 dev_priv->mm.object_count--; 94 dev_priv->mm.object_memory -= size; 95 spin_unlock(&dev_priv->mm.object_stat_lock); 96 } 97 98 static int 99 i915_gem_wait_for_error(struct i915_gpu_error *error) 100 { 101 int ret; 102 103 might_sleep(); 104 105 /* 106 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 107 * userspace. If it takes that long something really bad is going on and 108 * we should simply try to bail out and fail as gracefully as possible. 109 */ 110 ret = wait_event_interruptible_timeout(error->reset_queue, 111 !i915_reset_backoff(error), 112 I915_RESET_TIMEOUT); 113 if (ret == 0) { 114 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 115 return -EIO; 116 } else if (ret < 0) { 117 return ret; 118 } else { 119 return 0; 120 } 121 } 122 123 int i915_mutex_lock_interruptible(struct drm_device *dev) 124 { 125 struct drm_i915_private *dev_priv = to_i915(dev); 126 int ret; 127 128 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 129 if (ret) 130 return ret; 131 132 ret = mutex_lock_interruptible(&dev->struct_mutex); 133 if (ret) 134 return ret; 135 136 return 0; 137 } 138 139 int 140 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 141 struct drm_file *file) 142 { 143 struct drm_i915_private *dev_priv = to_i915(dev); 144 struct i915_ggtt *ggtt = &dev_priv->ggtt; 145 struct drm_i915_gem_get_aperture *args = data; 146 struct i915_vma *vma; 147 u64 pinned; 148 149 pinned = ggtt->base.reserved; 150 mutex_lock(&dev->struct_mutex); 151 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 152 if (i915_vma_is_pinned(vma)) 153 pinned += vma->node.size; 154 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 155 if (i915_vma_is_pinned(vma)) 156 pinned += vma->node.size; 157 mutex_unlock(&dev->struct_mutex); 158 159 args->aper_size = ggtt->base.total; 160 args->aper_available_size = args->aper_size - pinned; 161 162 return 0; 163 } 164 165 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 166 { 167 struct address_space *mapping = obj->base.filp->f_mapping; 168 drm_dma_handle_t *phys; 169 struct sg_table *st; 170 struct scatterlist *sg; 171 char *vaddr; 172 int i; 173 int err; 174 175 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 176 return -EINVAL; 177 178 /* Always aligning to the object size, allows a single allocation 179 * to handle all possible callers, and given typical object sizes, 180 * the alignment of the buddy allocation will naturally match. 181 */ 182 phys = drm_pci_alloc(obj->base.dev, 183 roundup_pow_of_two(obj->base.size), 184 roundup_pow_of_two(obj->base.size)); 185 if (!phys) 186 return -ENOMEM; 187 188 vaddr = phys->vaddr; 189 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 190 struct page *page; 191 char *src; 192 193 page = shmem_read_mapping_page(mapping, i); 194 if (IS_ERR(page)) { 195 err = PTR_ERR(page); 196 goto err_phys; 197 } 198 199 src = kmap_atomic(page); 200 memcpy(vaddr, src, PAGE_SIZE); 201 drm_clflush_virt_range(vaddr, PAGE_SIZE); 202 kunmap_atomic(src); 203 204 put_page(page); 205 vaddr += PAGE_SIZE; 206 } 207 208 i915_gem_chipset_flush(to_i915(obj->base.dev)); 209 210 st = kmalloc(sizeof(*st), GFP_KERNEL); 211 if (!st) { 212 err = -ENOMEM; 213 goto err_phys; 214 } 215 216 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 217 kfree(st); 218 err = -ENOMEM; 219 goto err_phys; 220 } 221 222 sg = st->sgl; 223 sg->offset = 0; 224 sg->length = obj->base.size; 225 226 sg_dma_address(sg) = phys->busaddr; 227 sg_dma_len(sg) = obj->base.size; 228 229 obj->phys_handle = phys; 230 231 __i915_gem_object_set_pages(obj, st, sg->length); 232 233 return 0; 234 235 err_phys: 236 drm_pci_free(obj->base.dev, phys); 237 238 return err; 239 } 240 241 static void __start_cpu_write(struct drm_i915_gem_object *obj) 242 { 243 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 244 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 245 if (cpu_write_needs_clflush(obj)) 246 obj->cache_dirty = true; 247 } 248 249 static void 250 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 251 struct sg_table *pages, 252 bool needs_clflush) 253 { 254 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 255 256 if (obj->mm.madv == I915_MADV_DONTNEED) 257 obj->mm.dirty = false; 258 259 if (needs_clflush && 260 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && 261 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 262 drm_clflush_sg(pages); 263 264 __start_cpu_write(obj); 265 } 266 267 static void 268 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 269 struct sg_table *pages) 270 { 271 __i915_gem_object_release_shmem(obj, pages, false); 272 273 if (obj->mm.dirty) { 274 struct address_space *mapping = obj->base.filp->f_mapping; 275 char *vaddr = obj->phys_handle->vaddr; 276 int i; 277 278 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 279 struct page *page; 280 char *dst; 281 282 page = shmem_read_mapping_page(mapping, i); 283 if (IS_ERR(page)) 284 continue; 285 286 dst = kmap_atomic(page); 287 drm_clflush_virt_range(vaddr, PAGE_SIZE); 288 memcpy(dst, vaddr, PAGE_SIZE); 289 kunmap_atomic(dst); 290 291 set_page_dirty(page); 292 if (obj->mm.madv == I915_MADV_WILLNEED) 293 mark_page_accessed(page); 294 put_page(page); 295 vaddr += PAGE_SIZE; 296 } 297 obj->mm.dirty = false; 298 } 299 300 sg_free_table(pages); 301 kfree(pages); 302 303 drm_pci_free(obj->base.dev, obj->phys_handle); 304 } 305 306 static void 307 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 308 { 309 i915_gem_object_unpin_pages(obj); 310 } 311 312 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 313 .get_pages = i915_gem_object_get_pages_phys, 314 .put_pages = i915_gem_object_put_pages_phys, 315 .release = i915_gem_object_release_phys, 316 }; 317 318 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 319 320 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 321 { 322 struct i915_vma *vma; 323 LIST_HEAD(still_in_list); 324 int ret; 325 326 lockdep_assert_held(&obj->base.dev->struct_mutex); 327 328 /* Closed vma are removed from the obj->vma_list - but they may 329 * still have an active binding on the object. To remove those we 330 * must wait for all rendering to complete to the object (as unbinding 331 * must anyway), and retire the requests. 332 */ 333 ret = i915_gem_object_set_to_cpu_domain(obj, false); 334 if (ret) 335 return ret; 336 337 while ((vma = list_first_entry_or_null(&obj->vma_list, 338 struct i915_vma, 339 obj_link))) { 340 list_move_tail(&vma->obj_link, &still_in_list); 341 ret = i915_vma_unbind(vma); 342 if (ret) 343 break; 344 } 345 list_splice(&still_in_list, &obj->vma_list); 346 347 return ret; 348 } 349 350 static long 351 i915_gem_object_wait_fence(struct dma_fence *fence, 352 unsigned int flags, 353 long timeout, 354 struct intel_rps_client *rps_client) 355 { 356 struct drm_i915_gem_request *rq; 357 358 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 359 360 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 361 return timeout; 362 363 if (!dma_fence_is_i915(fence)) 364 return dma_fence_wait_timeout(fence, 365 flags & I915_WAIT_INTERRUPTIBLE, 366 timeout); 367 368 rq = to_request(fence); 369 if (i915_gem_request_completed(rq)) 370 goto out; 371 372 /* 373 * This client is about to stall waiting for the GPU. In many cases 374 * this is undesirable and limits the throughput of the system, as 375 * many clients cannot continue processing user input/output whilst 376 * blocked. RPS autotuning may take tens of milliseconds to respond 377 * to the GPU load and thus incurs additional latency for the client. 378 * We can circumvent that by promoting the GPU frequency to maximum 379 * before we wait. This makes the GPU throttle up much more quickly 380 * (good for benchmarks and user experience, e.g. window animations), 381 * but at a cost of spending more power processing the workload 382 * (bad for battery). Not all clients even want their results 383 * immediately and for them we should just let the GPU select its own 384 * frequency to maximise efficiency. To prevent a single client from 385 * forcing the clocks too high for the whole system, we only allow 386 * each client to waitboost once in a busy period. 387 */ 388 if (rps_client && !i915_gem_request_started(rq)) { 389 if (INTEL_GEN(rq->i915) >= 6) 390 gen6_rps_boost(rq, rps_client); 391 } 392 393 timeout = i915_wait_request(rq, flags, timeout); 394 395 out: 396 if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) 397 i915_gem_request_retire_upto(rq); 398 399 return timeout; 400 } 401 402 static long 403 i915_gem_object_wait_reservation(struct reservation_object *resv, 404 unsigned int flags, 405 long timeout, 406 struct intel_rps_client *rps_client) 407 { 408 unsigned int seq = __read_seqcount_begin(&resv->seq); 409 struct dma_fence *excl; 410 bool prune_fences = false; 411 412 if (flags & I915_WAIT_ALL) { 413 struct dma_fence **shared; 414 unsigned int count, i; 415 int ret; 416 417 ret = reservation_object_get_fences_rcu(resv, 418 &excl, &count, &shared); 419 if (ret) 420 return ret; 421 422 for (i = 0; i < count; i++) { 423 timeout = i915_gem_object_wait_fence(shared[i], 424 flags, timeout, 425 rps_client); 426 if (timeout < 0) 427 break; 428 429 dma_fence_put(shared[i]); 430 } 431 432 for (; i < count; i++) 433 dma_fence_put(shared[i]); 434 kfree(shared); 435 436 prune_fences = count && timeout >= 0; 437 } else { 438 excl = reservation_object_get_excl_rcu(resv); 439 } 440 441 if (excl && timeout >= 0) { 442 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 443 rps_client); 444 prune_fences = timeout >= 0; 445 } 446 447 dma_fence_put(excl); 448 449 /* Oportunistically prune the fences iff we know they have *all* been 450 * signaled and that the reservation object has not been changed (i.e. 451 * no new fences have been added). 452 */ 453 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 454 if (reservation_object_trylock(resv)) { 455 if (!__read_seqcount_retry(&resv->seq, seq)) 456 reservation_object_add_excl_fence(resv, NULL); 457 reservation_object_unlock(resv); 458 } 459 } 460 461 return timeout; 462 } 463 464 static void __fence_set_priority(struct dma_fence *fence, int prio) 465 { 466 struct drm_i915_gem_request *rq; 467 struct intel_engine_cs *engine; 468 469 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 470 return; 471 472 rq = to_request(fence); 473 engine = rq->engine; 474 if (!engine->schedule) 475 return; 476 477 engine->schedule(rq, prio); 478 } 479 480 static void fence_set_priority(struct dma_fence *fence, int prio) 481 { 482 /* Recurse once into a fence-array */ 483 if (dma_fence_is_array(fence)) { 484 struct dma_fence_array *array = to_dma_fence_array(fence); 485 int i; 486 487 for (i = 0; i < array->num_fences; i++) 488 __fence_set_priority(array->fences[i], prio); 489 } else { 490 __fence_set_priority(fence, prio); 491 } 492 } 493 494 int 495 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 496 unsigned int flags, 497 int prio) 498 { 499 struct dma_fence *excl; 500 501 if (flags & I915_WAIT_ALL) { 502 struct dma_fence **shared; 503 unsigned int count, i; 504 int ret; 505 506 ret = reservation_object_get_fences_rcu(obj->resv, 507 &excl, &count, &shared); 508 if (ret) 509 return ret; 510 511 for (i = 0; i < count; i++) { 512 fence_set_priority(shared[i], prio); 513 dma_fence_put(shared[i]); 514 } 515 516 kfree(shared); 517 } else { 518 excl = reservation_object_get_excl_rcu(obj->resv); 519 } 520 521 if (excl) { 522 fence_set_priority(excl, prio); 523 dma_fence_put(excl); 524 } 525 return 0; 526 } 527 528 /** 529 * Waits for rendering to the object to be completed 530 * @obj: i915 gem object 531 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 532 * @timeout: how long to wait 533 * @rps_client: client (user process) to charge for any waitboosting 534 */ 535 int 536 i915_gem_object_wait(struct drm_i915_gem_object *obj, 537 unsigned int flags, 538 long timeout, 539 struct intel_rps_client *rps_client) 540 { 541 might_sleep(); 542 #if IS_ENABLED(CONFIG_LOCKDEP) 543 GEM_BUG_ON(debug_locks && 544 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 545 !!(flags & I915_WAIT_LOCKED)); 546 #endif 547 GEM_BUG_ON(timeout < 0); 548 549 timeout = i915_gem_object_wait_reservation(obj->resv, 550 flags, timeout, 551 rps_client); 552 return timeout < 0 ? timeout : 0; 553 } 554 555 static struct intel_rps_client *to_rps_client(struct drm_file *file) 556 { 557 struct drm_i915_file_private *fpriv = file->driver_priv; 558 559 return &fpriv->rps_client; 560 } 561 562 static int 563 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 564 struct drm_i915_gem_pwrite *args, 565 struct drm_file *file) 566 { 567 void *vaddr = obj->phys_handle->vaddr + args->offset; 568 char __user *user_data = u64_to_user_ptr(args->data_ptr); 569 570 /* We manually control the domain here and pretend that it 571 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 572 */ 573 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 574 if (copy_from_user(vaddr, user_data, args->size)) 575 return -EFAULT; 576 577 drm_clflush_virt_range(vaddr, args->size); 578 i915_gem_chipset_flush(to_i915(obj->base.dev)); 579 580 intel_fb_obj_flush(obj, ORIGIN_CPU); 581 return 0; 582 } 583 584 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 585 { 586 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 587 } 588 589 void i915_gem_object_free(struct drm_i915_gem_object *obj) 590 { 591 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 592 kmem_cache_free(dev_priv->objects, obj); 593 } 594 595 static int 596 i915_gem_create(struct drm_file *file, 597 struct drm_i915_private *dev_priv, 598 uint64_t size, 599 uint32_t *handle_p) 600 { 601 struct drm_i915_gem_object *obj; 602 int ret; 603 u32 handle; 604 605 size = roundup(size, PAGE_SIZE); 606 if (size == 0) 607 return -EINVAL; 608 609 /* Allocate the new object */ 610 obj = i915_gem_object_create(dev_priv, size); 611 if (IS_ERR(obj)) 612 return PTR_ERR(obj); 613 614 ret = drm_gem_handle_create(file, &obj->base, &handle); 615 /* drop reference from allocate - handle holds it now */ 616 i915_gem_object_put(obj); 617 if (ret) 618 return ret; 619 620 *handle_p = handle; 621 return 0; 622 } 623 624 int 625 i915_gem_dumb_create(struct drm_file *file, 626 struct drm_device *dev, 627 struct drm_mode_create_dumb *args) 628 { 629 /* have to work out size/pitch and return them */ 630 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 631 args->size = args->pitch * args->height; 632 return i915_gem_create(file, to_i915(dev), 633 args->size, &args->handle); 634 } 635 636 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 637 { 638 return !(obj->cache_level == I915_CACHE_NONE || 639 obj->cache_level == I915_CACHE_WT); 640 } 641 642 /** 643 * Creates a new mm object and returns a handle to it. 644 * @dev: drm device pointer 645 * @data: ioctl data blob 646 * @file: drm file pointer 647 */ 648 int 649 i915_gem_create_ioctl(struct drm_device *dev, void *data, 650 struct drm_file *file) 651 { 652 struct drm_i915_private *dev_priv = to_i915(dev); 653 struct drm_i915_gem_create *args = data; 654 655 i915_gem_flush_free_objects(dev_priv); 656 657 return i915_gem_create(file, dev_priv, 658 args->size, &args->handle); 659 } 660 661 static inline enum fb_op_origin 662 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 663 { 664 return (domain == I915_GEM_DOMAIN_GTT ? 665 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 666 } 667 668 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 669 { 670 /* 671 * No actual flushing is required for the GTT write domain for reads 672 * from the GTT domain. Writes to it "immediately" go to main memory 673 * as far as we know, so there's no chipset flush. It also doesn't 674 * land in the GPU render cache. 675 * 676 * However, we do have to enforce the order so that all writes through 677 * the GTT land before any writes to the device, such as updates to 678 * the GATT itself. 679 * 680 * We also have to wait a bit for the writes to land from the GTT. 681 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 682 * timing. This issue has only been observed when switching quickly 683 * between GTT writes and CPU reads from inside the kernel on recent hw, 684 * and it appears to only affect discrete GTT blocks (i.e. on LLC 685 * system agents we cannot reproduce this behaviour, until Cannonlake 686 * that was!). 687 */ 688 689 wmb(); 690 691 intel_runtime_pm_get(dev_priv); 692 spin_lock_irq(&dev_priv->uncore.lock); 693 694 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 695 696 spin_unlock_irq(&dev_priv->uncore.lock); 697 intel_runtime_pm_put(dev_priv); 698 } 699 700 static void 701 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 702 { 703 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 704 struct i915_vma *vma; 705 706 if (!(obj->base.write_domain & flush_domains)) 707 return; 708 709 switch (obj->base.write_domain) { 710 case I915_GEM_DOMAIN_GTT: 711 i915_gem_flush_ggtt_writes(dev_priv); 712 713 intel_fb_obj_flush(obj, 714 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 715 716 for_each_ggtt_vma(vma, obj) { 717 if (vma->iomap) 718 continue; 719 720 i915_vma_unset_ggtt_write(vma); 721 } 722 break; 723 724 case I915_GEM_DOMAIN_CPU: 725 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 726 break; 727 728 case I915_GEM_DOMAIN_RENDER: 729 if (gpu_write_needs_clflush(obj)) 730 obj->cache_dirty = true; 731 break; 732 } 733 734 obj->base.write_domain = 0; 735 } 736 737 static inline int 738 __copy_to_user_swizzled(char __user *cpu_vaddr, 739 const char *gpu_vaddr, int gpu_offset, 740 int length) 741 { 742 int ret, cpu_offset = 0; 743 744 while (length > 0) { 745 int cacheline_end = ALIGN(gpu_offset + 1, 64); 746 int this_length = min(cacheline_end - gpu_offset, length); 747 int swizzled_gpu_offset = gpu_offset ^ 64; 748 749 ret = __copy_to_user(cpu_vaddr + cpu_offset, 750 gpu_vaddr + swizzled_gpu_offset, 751 this_length); 752 if (ret) 753 return ret + length; 754 755 cpu_offset += this_length; 756 gpu_offset += this_length; 757 length -= this_length; 758 } 759 760 return 0; 761 } 762 763 static inline int 764 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 765 const char __user *cpu_vaddr, 766 int length) 767 { 768 int ret, cpu_offset = 0; 769 770 while (length > 0) { 771 int cacheline_end = ALIGN(gpu_offset + 1, 64); 772 int this_length = min(cacheline_end - gpu_offset, length); 773 int swizzled_gpu_offset = gpu_offset ^ 64; 774 775 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 776 cpu_vaddr + cpu_offset, 777 this_length); 778 if (ret) 779 return ret + length; 780 781 cpu_offset += this_length; 782 gpu_offset += this_length; 783 length -= this_length; 784 } 785 786 return 0; 787 } 788 789 /* 790 * Pins the specified object's pages and synchronizes the object with 791 * GPU accesses. Sets needs_clflush to non-zero if the caller should 792 * flush the object from the CPU cache. 793 */ 794 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 795 unsigned int *needs_clflush) 796 { 797 int ret; 798 799 lockdep_assert_held(&obj->base.dev->struct_mutex); 800 801 *needs_clflush = 0; 802 if (!i915_gem_object_has_struct_page(obj)) 803 return -ENODEV; 804 805 ret = i915_gem_object_wait(obj, 806 I915_WAIT_INTERRUPTIBLE | 807 I915_WAIT_LOCKED, 808 MAX_SCHEDULE_TIMEOUT, 809 NULL); 810 if (ret) 811 return ret; 812 813 ret = i915_gem_object_pin_pages(obj); 814 if (ret) 815 return ret; 816 817 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 818 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 819 ret = i915_gem_object_set_to_cpu_domain(obj, false); 820 if (ret) 821 goto err_unpin; 822 else 823 goto out; 824 } 825 826 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 827 828 /* If we're not in the cpu read domain, set ourself into the gtt 829 * read domain and manually flush cachelines (if required). This 830 * optimizes for the case when the gpu will dirty the data 831 * anyway again before the next pread happens. 832 */ 833 if (!obj->cache_dirty && 834 !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 835 *needs_clflush = CLFLUSH_BEFORE; 836 837 out: 838 /* return with the pages pinned */ 839 return 0; 840 841 err_unpin: 842 i915_gem_object_unpin_pages(obj); 843 return ret; 844 } 845 846 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 847 unsigned int *needs_clflush) 848 { 849 int ret; 850 851 lockdep_assert_held(&obj->base.dev->struct_mutex); 852 853 *needs_clflush = 0; 854 if (!i915_gem_object_has_struct_page(obj)) 855 return -ENODEV; 856 857 ret = i915_gem_object_wait(obj, 858 I915_WAIT_INTERRUPTIBLE | 859 I915_WAIT_LOCKED | 860 I915_WAIT_ALL, 861 MAX_SCHEDULE_TIMEOUT, 862 NULL); 863 if (ret) 864 return ret; 865 866 ret = i915_gem_object_pin_pages(obj); 867 if (ret) 868 return ret; 869 870 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 871 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 872 ret = i915_gem_object_set_to_cpu_domain(obj, true); 873 if (ret) 874 goto err_unpin; 875 else 876 goto out; 877 } 878 879 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 880 881 /* If we're not in the cpu write domain, set ourself into the 882 * gtt write domain and manually flush cachelines (as required). 883 * This optimizes for the case when the gpu will use the data 884 * right away and we therefore have to clflush anyway. 885 */ 886 if (!obj->cache_dirty) { 887 *needs_clflush |= CLFLUSH_AFTER; 888 889 /* 890 * Same trick applies to invalidate partially written 891 * cachelines read before writing. 892 */ 893 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 894 *needs_clflush |= CLFLUSH_BEFORE; 895 } 896 897 out: 898 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 899 obj->mm.dirty = true; 900 /* return with the pages pinned */ 901 return 0; 902 903 err_unpin: 904 i915_gem_object_unpin_pages(obj); 905 return ret; 906 } 907 908 static void 909 shmem_clflush_swizzled_range(char *addr, unsigned long length, 910 bool swizzled) 911 { 912 if (unlikely(swizzled)) { 913 unsigned long start = (unsigned long) addr; 914 unsigned long end = (unsigned long) addr + length; 915 916 /* For swizzling simply ensure that we always flush both 917 * channels. Lame, but simple and it works. Swizzled 918 * pwrite/pread is far from a hotpath - current userspace 919 * doesn't use it at all. */ 920 start = round_down(start, 128); 921 end = round_up(end, 128); 922 923 drm_clflush_virt_range((void *)start, end - start); 924 } else { 925 drm_clflush_virt_range(addr, length); 926 } 927 928 } 929 930 /* Only difference to the fast-path function is that this can handle bit17 931 * and uses non-atomic copy and kmap functions. */ 932 static int 933 shmem_pread_slow(struct page *page, int offset, int length, 934 char __user *user_data, 935 bool page_do_bit17_swizzling, bool needs_clflush) 936 { 937 char *vaddr; 938 int ret; 939 940 vaddr = kmap(page); 941 if (needs_clflush) 942 shmem_clflush_swizzled_range(vaddr + offset, length, 943 page_do_bit17_swizzling); 944 945 if (page_do_bit17_swizzling) 946 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 947 else 948 ret = __copy_to_user(user_data, vaddr + offset, length); 949 kunmap(page); 950 951 return ret ? - EFAULT : 0; 952 } 953 954 static int 955 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 956 bool page_do_bit17_swizzling, bool needs_clflush) 957 { 958 int ret; 959 960 ret = -ENODEV; 961 if (!page_do_bit17_swizzling) { 962 char *vaddr = kmap_atomic(page); 963 964 if (needs_clflush) 965 drm_clflush_virt_range(vaddr + offset, length); 966 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 967 kunmap_atomic(vaddr); 968 } 969 if (ret == 0) 970 return 0; 971 972 return shmem_pread_slow(page, offset, length, user_data, 973 page_do_bit17_swizzling, needs_clflush); 974 } 975 976 static int 977 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 978 struct drm_i915_gem_pread *args) 979 { 980 char __user *user_data; 981 u64 remain; 982 unsigned int obj_do_bit17_swizzling; 983 unsigned int needs_clflush; 984 unsigned int idx, offset; 985 int ret; 986 987 obj_do_bit17_swizzling = 0; 988 if (i915_gem_object_needs_bit17_swizzle(obj)) 989 obj_do_bit17_swizzling = BIT(17); 990 991 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 992 if (ret) 993 return ret; 994 995 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 996 mutex_unlock(&obj->base.dev->struct_mutex); 997 if (ret) 998 return ret; 999 1000 remain = args->size; 1001 user_data = u64_to_user_ptr(args->data_ptr); 1002 offset = offset_in_page(args->offset); 1003 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1004 struct page *page = i915_gem_object_get_page(obj, idx); 1005 int length; 1006 1007 length = remain; 1008 if (offset + length > PAGE_SIZE) 1009 length = PAGE_SIZE - offset; 1010 1011 ret = shmem_pread(page, offset, length, user_data, 1012 page_to_phys(page) & obj_do_bit17_swizzling, 1013 needs_clflush); 1014 if (ret) 1015 break; 1016 1017 remain -= length; 1018 user_data += length; 1019 offset = 0; 1020 } 1021 1022 i915_gem_obj_finish_shmem_access(obj); 1023 return ret; 1024 } 1025 1026 static inline bool 1027 gtt_user_read(struct io_mapping *mapping, 1028 loff_t base, int offset, 1029 char __user *user_data, int length) 1030 { 1031 void __iomem *vaddr; 1032 unsigned long unwritten; 1033 1034 /* We can use the cpu mem copy function because this is X86. */ 1035 vaddr = io_mapping_map_atomic_wc(mapping, base); 1036 unwritten = __copy_to_user_inatomic(user_data, 1037 (void __force *)vaddr + offset, 1038 length); 1039 io_mapping_unmap_atomic(vaddr); 1040 if (unwritten) { 1041 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1042 unwritten = copy_to_user(user_data, 1043 (void __force *)vaddr + offset, 1044 length); 1045 io_mapping_unmap(vaddr); 1046 } 1047 return unwritten; 1048 } 1049 1050 static int 1051 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1052 const struct drm_i915_gem_pread *args) 1053 { 1054 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1055 struct i915_ggtt *ggtt = &i915->ggtt; 1056 struct drm_mm_node node; 1057 struct i915_vma *vma; 1058 void __user *user_data; 1059 u64 remain, offset; 1060 int ret; 1061 1062 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1063 if (ret) 1064 return ret; 1065 1066 intel_runtime_pm_get(i915); 1067 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1068 PIN_MAPPABLE | 1069 PIN_NONFAULT | 1070 PIN_NONBLOCK); 1071 if (!IS_ERR(vma)) { 1072 node.start = i915_ggtt_offset(vma); 1073 node.allocated = false; 1074 ret = i915_vma_put_fence(vma); 1075 if (ret) { 1076 i915_vma_unpin(vma); 1077 vma = ERR_PTR(ret); 1078 } 1079 } 1080 if (IS_ERR(vma)) { 1081 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1082 if (ret) 1083 goto out_unlock; 1084 GEM_BUG_ON(!node.allocated); 1085 } 1086 1087 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1088 if (ret) 1089 goto out_unpin; 1090 1091 mutex_unlock(&i915->drm.struct_mutex); 1092 1093 user_data = u64_to_user_ptr(args->data_ptr); 1094 remain = args->size; 1095 offset = args->offset; 1096 1097 while (remain > 0) { 1098 /* Operation in this page 1099 * 1100 * page_base = page offset within aperture 1101 * page_offset = offset within page 1102 * page_length = bytes to copy for this page 1103 */ 1104 u32 page_base = node.start; 1105 unsigned page_offset = offset_in_page(offset); 1106 unsigned page_length = PAGE_SIZE - page_offset; 1107 page_length = remain < page_length ? remain : page_length; 1108 if (node.allocated) { 1109 wmb(); 1110 ggtt->base.insert_page(&ggtt->base, 1111 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1112 node.start, I915_CACHE_NONE, 0); 1113 wmb(); 1114 } else { 1115 page_base += offset & PAGE_MASK; 1116 } 1117 1118 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1119 user_data, page_length)) { 1120 ret = -EFAULT; 1121 break; 1122 } 1123 1124 remain -= page_length; 1125 user_data += page_length; 1126 offset += page_length; 1127 } 1128 1129 mutex_lock(&i915->drm.struct_mutex); 1130 out_unpin: 1131 if (node.allocated) { 1132 wmb(); 1133 ggtt->base.clear_range(&ggtt->base, 1134 node.start, node.size); 1135 remove_mappable_node(&node); 1136 } else { 1137 i915_vma_unpin(vma); 1138 } 1139 out_unlock: 1140 intel_runtime_pm_put(i915); 1141 mutex_unlock(&i915->drm.struct_mutex); 1142 1143 return ret; 1144 } 1145 1146 /** 1147 * Reads data from the object referenced by handle. 1148 * @dev: drm device pointer 1149 * @data: ioctl data blob 1150 * @file: drm file pointer 1151 * 1152 * On error, the contents of *data are undefined. 1153 */ 1154 int 1155 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1156 struct drm_file *file) 1157 { 1158 struct drm_i915_gem_pread *args = data; 1159 struct drm_i915_gem_object *obj; 1160 int ret; 1161 1162 if (args->size == 0) 1163 return 0; 1164 1165 if (!access_ok(VERIFY_WRITE, 1166 u64_to_user_ptr(args->data_ptr), 1167 args->size)) 1168 return -EFAULT; 1169 1170 obj = i915_gem_object_lookup(file, args->handle); 1171 if (!obj) 1172 return -ENOENT; 1173 1174 /* Bounds check source. */ 1175 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1176 ret = -EINVAL; 1177 goto out; 1178 } 1179 1180 trace_i915_gem_object_pread(obj, args->offset, args->size); 1181 1182 ret = i915_gem_object_wait(obj, 1183 I915_WAIT_INTERRUPTIBLE, 1184 MAX_SCHEDULE_TIMEOUT, 1185 to_rps_client(file)); 1186 if (ret) 1187 goto out; 1188 1189 ret = i915_gem_object_pin_pages(obj); 1190 if (ret) 1191 goto out; 1192 1193 ret = i915_gem_shmem_pread(obj, args); 1194 if (ret == -EFAULT || ret == -ENODEV) 1195 ret = i915_gem_gtt_pread(obj, args); 1196 1197 i915_gem_object_unpin_pages(obj); 1198 out: 1199 i915_gem_object_put(obj); 1200 return ret; 1201 } 1202 1203 /* This is the fast write path which cannot handle 1204 * page faults in the source data 1205 */ 1206 1207 static inline bool 1208 ggtt_write(struct io_mapping *mapping, 1209 loff_t base, int offset, 1210 char __user *user_data, int length) 1211 { 1212 void __iomem *vaddr; 1213 unsigned long unwritten; 1214 1215 /* We can use the cpu mem copy function because this is X86. */ 1216 vaddr = io_mapping_map_atomic_wc(mapping, base); 1217 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1218 user_data, length); 1219 io_mapping_unmap_atomic(vaddr); 1220 if (unwritten) { 1221 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1222 unwritten = copy_from_user((void __force *)vaddr + offset, 1223 user_data, length); 1224 io_mapping_unmap(vaddr); 1225 } 1226 1227 return unwritten; 1228 } 1229 1230 /** 1231 * This is the fast pwrite path, where we copy the data directly from the 1232 * user into the GTT, uncached. 1233 * @obj: i915 GEM object 1234 * @args: pwrite arguments structure 1235 */ 1236 static int 1237 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1238 const struct drm_i915_gem_pwrite *args) 1239 { 1240 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1241 struct i915_ggtt *ggtt = &i915->ggtt; 1242 struct drm_mm_node node; 1243 struct i915_vma *vma; 1244 u64 remain, offset; 1245 void __user *user_data; 1246 int ret; 1247 1248 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1249 if (ret) 1250 return ret; 1251 1252 if (i915_gem_object_has_struct_page(obj)) { 1253 /* 1254 * Avoid waking the device up if we can fallback, as 1255 * waking/resuming is very slow (worst-case 10-100 ms 1256 * depending on PCI sleeps and our own resume time). 1257 * This easily dwarfs any performance advantage from 1258 * using the cache bypass of indirect GGTT access. 1259 */ 1260 if (!intel_runtime_pm_get_if_in_use(i915)) { 1261 ret = -EFAULT; 1262 goto out_unlock; 1263 } 1264 } else { 1265 /* No backing pages, no fallback, we must force GGTT access */ 1266 intel_runtime_pm_get(i915); 1267 } 1268 1269 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1270 PIN_MAPPABLE | 1271 PIN_NONFAULT | 1272 PIN_NONBLOCK); 1273 if (!IS_ERR(vma)) { 1274 node.start = i915_ggtt_offset(vma); 1275 node.allocated = false; 1276 ret = i915_vma_put_fence(vma); 1277 if (ret) { 1278 i915_vma_unpin(vma); 1279 vma = ERR_PTR(ret); 1280 } 1281 } 1282 if (IS_ERR(vma)) { 1283 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1284 if (ret) 1285 goto out_rpm; 1286 GEM_BUG_ON(!node.allocated); 1287 } 1288 1289 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1290 if (ret) 1291 goto out_unpin; 1292 1293 mutex_unlock(&i915->drm.struct_mutex); 1294 1295 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1296 1297 user_data = u64_to_user_ptr(args->data_ptr); 1298 offset = args->offset; 1299 remain = args->size; 1300 while (remain) { 1301 /* Operation in this page 1302 * 1303 * page_base = page offset within aperture 1304 * page_offset = offset within page 1305 * page_length = bytes to copy for this page 1306 */ 1307 u32 page_base = node.start; 1308 unsigned int page_offset = offset_in_page(offset); 1309 unsigned int page_length = PAGE_SIZE - page_offset; 1310 page_length = remain < page_length ? remain : page_length; 1311 if (node.allocated) { 1312 wmb(); /* flush the write before we modify the GGTT */ 1313 ggtt->base.insert_page(&ggtt->base, 1314 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1315 node.start, I915_CACHE_NONE, 0); 1316 wmb(); /* flush modifications to the GGTT (insert_page) */ 1317 } else { 1318 page_base += offset & PAGE_MASK; 1319 } 1320 /* If we get a fault while copying data, then (presumably) our 1321 * source page isn't available. Return the error and we'll 1322 * retry in the slow path. 1323 * If the object is non-shmem backed, we retry again with the 1324 * path that handles page fault. 1325 */ 1326 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1327 user_data, page_length)) { 1328 ret = -EFAULT; 1329 break; 1330 } 1331 1332 remain -= page_length; 1333 user_data += page_length; 1334 offset += page_length; 1335 } 1336 intel_fb_obj_flush(obj, ORIGIN_CPU); 1337 1338 mutex_lock(&i915->drm.struct_mutex); 1339 out_unpin: 1340 if (node.allocated) { 1341 wmb(); 1342 ggtt->base.clear_range(&ggtt->base, 1343 node.start, node.size); 1344 remove_mappable_node(&node); 1345 } else { 1346 i915_vma_unpin(vma); 1347 } 1348 out_rpm: 1349 intel_runtime_pm_put(i915); 1350 out_unlock: 1351 mutex_unlock(&i915->drm.struct_mutex); 1352 return ret; 1353 } 1354 1355 static int 1356 shmem_pwrite_slow(struct page *page, int offset, int length, 1357 char __user *user_data, 1358 bool page_do_bit17_swizzling, 1359 bool needs_clflush_before, 1360 bool needs_clflush_after) 1361 { 1362 char *vaddr; 1363 int ret; 1364 1365 vaddr = kmap(page); 1366 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1367 shmem_clflush_swizzled_range(vaddr + offset, length, 1368 page_do_bit17_swizzling); 1369 if (page_do_bit17_swizzling) 1370 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1371 length); 1372 else 1373 ret = __copy_from_user(vaddr + offset, user_data, length); 1374 if (needs_clflush_after) 1375 shmem_clflush_swizzled_range(vaddr + offset, length, 1376 page_do_bit17_swizzling); 1377 kunmap(page); 1378 1379 return ret ? -EFAULT : 0; 1380 } 1381 1382 /* Per-page copy function for the shmem pwrite fastpath. 1383 * Flushes invalid cachelines before writing to the target if 1384 * needs_clflush_before is set and flushes out any written cachelines after 1385 * writing if needs_clflush is set. 1386 */ 1387 static int 1388 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1389 bool page_do_bit17_swizzling, 1390 bool needs_clflush_before, 1391 bool needs_clflush_after) 1392 { 1393 int ret; 1394 1395 ret = -ENODEV; 1396 if (!page_do_bit17_swizzling) { 1397 char *vaddr = kmap_atomic(page); 1398 1399 if (needs_clflush_before) 1400 drm_clflush_virt_range(vaddr + offset, len); 1401 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1402 if (needs_clflush_after) 1403 drm_clflush_virt_range(vaddr + offset, len); 1404 1405 kunmap_atomic(vaddr); 1406 } 1407 if (ret == 0) 1408 return ret; 1409 1410 return shmem_pwrite_slow(page, offset, len, user_data, 1411 page_do_bit17_swizzling, 1412 needs_clflush_before, 1413 needs_clflush_after); 1414 } 1415 1416 static int 1417 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1418 const struct drm_i915_gem_pwrite *args) 1419 { 1420 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1421 void __user *user_data; 1422 u64 remain; 1423 unsigned int obj_do_bit17_swizzling; 1424 unsigned int partial_cacheline_write; 1425 unsigned int needs_clflush; 1426 unsigned int offset, idx; 1427 int ret; 1428 1429 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1430 if (ret) 1431 return ret; 1432 1433 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1434 mutex_unlock(&i915->drm.struct_mutex); 1435 if (ret) 1436 return ret; 1437 1438 obj_do_bit17_swizzling = 0; 1439 if (i915_gem_object_needs_bit17_swizzle(obj)) 1440 obj_do_bit17_swizzling = BIT(17); 1441 1442 /* If we don't overwrite a cacheline completely we need to be 1443 * careful to have up-to-date data by first clflushing. Don't 1444 * overcomplicate things and flush the entire patch. 1445 */ 1446 partial_cacheline_write = 0; 1447 if (needs_clflush & CLFLUSH_BEFORE) 1448 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1449 1450 user_data = u64_to_user_ptr(args->data_ptr); 1451 remain = args->size; 1452 offset = offset_in_page(args->offset); 1453 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1454 struct page *page = i915_gem_object_get_page(obj, idx); 1455 int length; 1456 1457 length = remain; 1458 if (offset + length > PAGE_SIZE) 1459 length = PAGE_SIZE - offset; 1460 1461 ret = shmem_pwrite(page, offset, length, user_data, 1462 page_to_phys(page) & obj_do_bit17_swizzling, 1463 (offset | length) & partial_cacheline_write, 1464 needs_clflush & CLFLUSH_AFTER); 1465 if (ret) 1466 break; 1467 1468 remain -= length; 1469 user_data += length; 1470 offset = 0; 1471 } 1472 1473 intel_fb_obj_flush(obj, ORIGIN_CPU); 1474 i915_gem_obj_finish_shmem_access(obj); 1475 return ret; 1476 } 1477 1478 /** 1479 * Writes data to the object referenced by handle. 1480 * @dev: drm device 1481 * @data: ioctl data blob 1482 * @file: drm file 1483 * 1484 * On error, the contents of the buffer that were to be modified are undefined. 1485 */ 1486 int 1487 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1488 struct drm_file *file) 1489 { 1490 struct drm_i915_gem_pwrite *args = data; 1491 struct drm_i915_gem_object *obj; 1492 int ret; 1493 1494 if (args->size == 0) 1495 return 0; 1496 1497 if (!access_ok(VERIFY_READ, 1498 u64_to_user_ptr(args->data_ptr), 1499 args->size)) 1500 return -EFAULT; 1501 1502 obj = i915_gem_object_lookup(file, args->handle); 1503 if (!obj) 1504 return -ENOENT; 1505 1506 /* Bounds check destination. */ 1507 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1508 ret = -EINVAL; 1509 goto err; 1510 } 1511 1512 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1513 1514 ret = -ENODEV; 1515 if (obj->ops->pwrite) 1516 ret = obj->ops->pwrite(obj, args); 1517 if (ret != -ENODEV) 1518 goto err; 1519 1520 ret = i915_gem_object_wait(obj, 1521 I915_WAIT_INTERRUPTIBLE | 1522 I915_WAIT_ALL, 1523 MAX_SCHEDULE_TIMEOUT, 1524 to_rps_client(file)); 1525 if (ret) 1526 goto err; 1527 1528 ret = i915_gem_object_pin_pages(obj); 1529 if (ret) 1530 goto err; 1531 1532 ret = -EFAULT; 1533 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1534 * it would end up going through the fenced access, and we'll get 1535 * different detiling behavior between reading and writing. 1536 * pread/pwrite currently are reading and writing from the CPU 1537 * perspective, requiring manual detiling by the client. 1538 */ 1539 if (!i915_gem_object_has_struct_page(obj) || 1540 cpu_write_needs_clflush(obj)) 1541 /* Note that the gtt paths might fail with non-page-backed user 1542 * pointers (e.g. gtt mappings when moving data between 1543 * textures). Fallback to the shmem path in that case. 1544 */ 1545 ret = i915_gem_gtt_pwrite_fast(obj, args); 1546 1547 if (ret == -EFAULT || ret == -ENOSPC) { 1548 if (obj->phys_handle) 1549 ret = i915_gem_phys_pwrite(obj, args, file); 1550 else 1551 ret = i915_gem_shmem_pwrite(obj, args); 1552 } 1553 1554 i915_gem_object_unpin_pages(obj); 1555 err: 1556 i915_gem_object_put(obj); 1557 return ret; 1558 } 1559 1560 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1561 { 1562 struct drm_i915_private *i915; 1563 struct list_head *list; 1564 struct i915_vma *vma; 1565 1566 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1567 1568 for_each_ggtt_vma(vma, obj) { 1569 if (i915_vma_is_active(vma)) 1570 continue; 1571 1572 if (!drm_mm_node_allocated(&vma->node)) 1573 continue; 1574 1575 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1576 } 1577 1578 i915 = to_i915(obj->base.dev); 1579 spin_lock(&i915->mm.obj_lock); 1580 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1581 list_move_tail(&obj->mm.link, list); 1582 spin_unlock(&i915->mm.obj_lock); 1583 } 1584 1585 /** 1586 * Called when user space prepares to use an object with the CPU, either 1587 * through the mmap ioctl's mapping or a GTT mapping. 1588 * @dev: drm device 1589 * @data: ioctl data blob 1590 * @file: drm file 1591 */ 1592 int 1593 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1594 struct drm_file *file) 1595 { 1596 struct drm_i915_gem_set_domain *args = data; 1597 struct drm_i915_gem_object *obj; 1598 uint32_t read_domains = args->read_domains; 1599 uint32_t write_domain = args->write_domain; 1600 int err; 1601 1602 /* Only handle setting domains to types used by the CPU. */ 1603 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1604 return -EINVAL; 1605 1606 /* Having something in the write domain implies it's in the read 1607 * domain, and only that read domain. Enforce that in the request. 1608 */ 1609 if (write_domain != 0 && read_domains != write_domain) 1610 return -EINVAL; 1611 1612 obj = i915_gem_object_lookup(file, args->handle); 1613 if (!obj) 1614 return -ENOENT; 1615 1616 /* Try to flush the object off the GPU without holding the lock. 1617 * We will repeat the flush holding the lock in the normal manner 1618 * to catch cases where we are gazumped. 1619 */ 1620 err = i915_gem_object_wait(obj, 1621 I915_WAIT_INTERRUPTIBLE | 1622 (write_domain ? I915_WAIT_ALL : 0), 1623 MAX_SCHEDULE_TIMEOUT, 1624 to_rps_client(file)); 1625 if (err) 1626 goto out; 1627 1628 /* 1629 * Proxy objects do not control access to the backing storage, ergo 1630 * they cannot be used as a means to manipulate the cache domain 1631 * tracking for that backing storage. The proxy object is always 1632 * considered to be outside of any cache domain. 1633 */ 1634 if (i915_gem_object_is_proxy(obj)) { 1635 err = -ENXIO; 1636 goto out; 1637 } 1638 1639 /* 1640 * Flush and acquire obj->pages so that we are coherent through 1641 * direct access in memory with previous cached writes through 1642 * shmemfs and that our cache domain tracking remains valid. 1643 * For example, if the obj->filp was moved to swap without us 1644 * being notified and releasing the pages, we would mistakenly 1645 * continue to assume that the obj remained out of the CPU cached 1646 * domain. 1647 */ 1648 err = i915_gem_object_pin_pages(obj); 1649 if (err) 1650 goto out; 1651 1652 err = i915_mutex_lock_interruptible(dev); 1653 if (err) 1654 goto out_unpin; 1655 1656 if (read_domains & I915_GEM_DOMAIN_WC) 1657 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1658 else if (read_domains & I915_GEM_DOMAIN_GTT) 1659 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1660 else 1661 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1662 1663 /* And bump the LRU for this access */ 1664 i915_gem_object_bump_inactive_ggtt(obj); 1665 1666 mutex_unlock(&dev->struct_mutex); 1667 1668 if (write_domain != 0) 1669 intel_fb_obj_invalidate(obj, 1670 fb_write_origin(obj, write_domain)); 1671 1672 out_unpin: 1673 i915_gem_object_unpin_pages(obj); 1674 out: 1675 i915_gem_object_put(obj); 1676 return err; 1677 } 1678 1679 /** 1680 * Called when user space has done writes to this buffer 1681 * @dev: drm device 1682 * @data: ioctl data blob 1683 * @file: drm file 1684 */ 1685 int 1686 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1687 struct drm_file *file) 1688 { 1689 struct drm_i915_gem_sw_finish *args = data; 1690 struct drm_i915_gem_object *obj; 1691 1692 obj = i915_gem_object_lookup(file, args->handle); 1693 if (!obj) 1694 return -ENOENT; 1695 1696 /* 1697 * Proxy objects are barred from CPU access, so there is no 1698 * need to ban sw_finish as it is a nop. 1699 */ 1700 1701 /* Pinned buffers may be scanout, so flush the cache */ 1702 i915_gem_object_flush_if_display(obj); 1703 i915_gem_object_put(obj); 1704 1705 return 0; 1706 } 1707 1708 /** 1709 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1710 * it is mapped to. 1711 * @dev: drm device 1712 * @data: ioctl data blob 1713 * @file: drm file 1714 * 1715 * While the mapping holds a reference on the contents of the object, it doesn't 1716 * imply a ref on the object itself. 1717 * 1718 * IMPORTANT: 1719 * 1720 * DRM driver writers who look a this function as an example for how to do GEM 1721 * mmap support, please don't implement mmap support like here. The modern way 1722 * to implement DRM mmap support is with an mmap offset ioctl (like 1723 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1724 * That way debug tooling like valgrind will understand what's going on, hiding 1725 * the mmap call in a driver private ioctl will break that. The i915 driver only 1726 * does cpu mmaps this way because we didn't know better. 1727 */ 1728 int 1729 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1730 struct drm_file *file) 1731 { 1732 struct drm_i915_gem_mmap *args = data; 1733 struct drm_i915_gem_object *obj; 1734 unsigned long addr; 1735 1736 if (args->flags & ~(I915_MMAP_WC)) 1737 return -EINVAL; 1738 1739 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1740 return -ENODEV; 1741 1742 obj = i915_gem_object_lookup(file, args->handle); 1743 if (!obj) 1744 return -ENOENT; 1745 1746 /* prime objects have no backing filp to GEM mmap 1747 * pages from. 1748 */ 1749 if (!obj->base.filp) { 1750 i915_gem_object_put(obj); 1751 return -ENXIO; 1752 } 1753 1754 addr = vm_mmap(obj->base.filp, 0, args->size, 1755 PROT_READ | PROT_WRITE, MAP_SHARED, 1756 args->offset); 1757 if (args->flags & I915_MMAP_WC) { 1758 struct mm_struct *mm = current->mm; 1759 struct vm_area_struct *vma; 1760 1761 if (down_write_killable(&mm->mmap_sem)) { 1762 i915_gem_object_put(obj); 1763 return -EINTR; 1764 } 1765 vma = find_vma(mm, addr); 1766 if (vma) 1767 vma->vm_page_prot = 1768 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1769 else 1770 addr = -ENOMEM; 1771 up_write(&mm->mmap_sem); 1772 1773 /* This may race, but that's ok, it only gets set */ 1774 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1775 } 1776 i915_gem_object_put(obj); 1777 if (IS_ERR((void *)addr)) 1778 return addr; 1779 1780 args->addr_ptr = (uint64_t) addr; 1781 1782 return 0; 1783 } 1784 1785 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1786 { 1787 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1788 } 1789 1790 /** 1791 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1792 * 1793 * A history of the GTT mmap interface: 1794 * 1795 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1796 * aligned and suitable for fencing, and still fit into the available 1797 * mappable space left by the pinned display objects. A classic problem 1798 * we called the page-fault-of-doom where we would ping-pong between 1799 * two objects that could not fit inside the GTT and so the memcpy 1800 * would page one object in at the expense of the other between every 1801 * single byte. 1802 * 1803 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1804 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1805 * object is too large for the available space (or simply too large 1806 * for the mappable aperture!), a view is created instead and faulted 1807 * into userspace. (This view is aligned and sized appropriately for 1808 * fenced access.) 1809 * 1810 * 2 - Recognise WC as a separate cache domain so that we can flush the 1811 * delayed writes via GTT before performing direct access via WC. 1812 * 1813 * Restrictions: 1814 * 1815 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1816 * hangs on some architectures, corruption on others. An attempt to service 1817 * a GTT page fault from a snoopable object will generate a SIGBUS. 1818 * 1819 * * the object must be able to fit into RAM (physical memory, though no 1820 * limited to the mappable aperture). 1821 * 1822 * 1823 * Caveats: 1824 * 1825 * * a new GTT page fault will synchronize rendering from the GPU and flush 1826 * all data to system memory. Subsequent access will not be synchronized. 1827 * 1828 * * all mappings are revoked on runtime device suspend. 1829 * 1830 * * there are only 8, 16 or 32 fence registers to share between all users 1831 * (older machines require fence register for display and blitter access 1832 * as well). Contention of the fence registers will cause the previous users 1833 * to be unmapped and any new access will generate new page faults. 1834 * 1835 * * running out of memory while servicing a fault may generate a SIGBUS, 1836 * rather than the expected SIGSEGV. 1837 */ 1838 int i915_gem_mmap_gtt_version(void) 1839 { 1840 return 2; 1841 } 1842 1843 static inline struct i915_ggtt_view 1844 compute_partial_view(struct drm_i915_gem_object *obj, 1845 pgoff_t page_offset, 1846 unsigned int chunk) 1847 { 1848 struct i915_ggtt_view view; 1849 1850 if (i915_gem_object_is_tiled(obj)) 1851 chunk = roundup(chunk, tile_row_pages(obj)); 1852 1853 view.type = I915_GGTT_VIEW_PARTIAL; 1854 view.partial.offset = rounddown(page_offset, chunk); 1855 view.partial.size = 1856 min_t(unsigned int, chunk, 1857 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1858 1859 /* If the partial covers the entire object, just create a normal VMA. */ 1860 if (chunk >= obj->base.size >> PAGE_SHIFT) 1861 view.type = I915_GGTT_VIEW_NORMAL; 1862 1863 return view; 1864 } 1865 1866 /** 1867 * i915_gem_fault - fault a page into the GTT 1868 * @vmf: fault info 1869 * 1870 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1871 * from userspace. The fault handler takes care of binding the object to 1872 * the GTT (if needed), allocating and programming a fence register (again, 1873 * only if needed based on whether the old reg is still valid or the object 1874 * is tiled) and inserting a new PTE into the faulting process. 1875 * 1876 * Note that the faulting process may involve evicting existing objects 1877 * from the GTT and/or fence registers to make room. So performance may 1878 * suffer if the GTT working set is large or there are few fence registers 1879 * left. 1880 * 1881 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1882 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1883 */ 1884 int i915_gem_fault(struct vm_fault *vmf) 1885 { 1886 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1887 struct vm_area_struct *area = vmf->vma; 1888 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1889 struct drm_device *dev = obj->base.dev; 1890 struct drm_i915_private *dev_priv = to_i915(dev); 1891 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1892 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1893 struct i915_vma *vma; 1894 pgoff_t page_offset; 1895 unsigned int flags; 1896 int ret; 1897 1898 /* We don't use vmf->pgoff since that has the fake offset */ 1899 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1900 1901 trace_i915_gem_object_fault(obj, page_offset, true, write); 1902 1903 /* Try to flush the object off the GPU first without holding the lock. 1904 * Upon acquiring the lock, we will perform our sanity checks and then 1905 * repeat the flush holding the lock in the normal manner to catch cases 1906 * where we are gazumped. 1907 */ 1908 ret = i915_gem_object_wait(obj, 1909 I915_WAIT_INTERRUPTIBLE, 1910 MAX_SCHEDULE_TIMEOUT, 1911 NULL); 1912 if (ret) 1913 goto err; 1914 1915 ret = i915_gem_object_pin_pages(obj); 1916 if (ret) 1917 goto err; 1918 1919 intel_runtime_pm_get(dev_priv); 1920 1921 ret = i915_mutex_lock_interruptible(dev); 1922 if (ret) 1923 goto err_rpm; 1924 1925 /* Access to snoopable pages through the GTT is incoherent. */ 1926 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1927 ret = -EFAULT; 1928 goto err_unlock; 1929 } 1930 1931 /* If the object is smaller than a couple of partial vma, it is 1932 * not worth only creating a single partial vma - we may as well 1933 * clear enough space for the full object. 1934 */ 1935 flags = PIN_MAPPABLE; 1936 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) 1937 flags |= PIN_NONBLOCK | PIN_NONFAULT; 1938 1939 /* Now pin it into the GTT as needed */ 1940 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); 1941 if (IS_ERR(vma)) { 1942 /* Use a partial view if it is bigger than available space */ 1943 struct i915_ggtt_view view = 1944 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1945 1946 /* Userspace is now writing through an untracked VMA, abandon 1947 * all hope that the hardware is able to track future writes. 1948 */ 1949 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1950 1951 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1952 } 1953 if (IS_ERR(vma)) { 1954 ret = PTR_ERR(vma); 1955 goto err_unlock; 1956 } 1957 1958 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1959 if (ret) 1960 goto err_unpin; 1961 1962 ret = i915_vma_pin_fence(vma); 1963 if (ret) 1964 goto err_unpin; 1965 1966 /* Finally, remap it using the new GTT offset */ 1967 ret = remap_io_mapping(area, 1968 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1969 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 1970 min_t(u64, vma->size, area->vm_end - area->vm_start), 1971 &ggtt->iomap); 1972 if (ret) 1973 goto err_fence; 1974 1975 /* Mark as being mmapped into userspace for later revocation */ 1976 assert_rpm_wakelock_held(dev_priv); 1977 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1978 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1979 GEM_BUG_ON(!obj->userfault_count); 1980 1981 i915_vma_set_ggtt_write(vma); 1982 1983 err_fence: 1984 i915_vma_unpin_fence(vma); 1985 err_unpin: 1986 __i915_vma_unpin(vma); 1987 err_unlock: 1988 mutex_unlock(&dev->struct_mutex); 1989 err_rpm: 1990 intel_runtime_pm_put(dev_priv); 1991 i915_gem_object_unpin_pages(obj); 1992 err: 1993 switch (ret) { 1994 case -EIO: 1995 /* 1996 * We eat errors when the gpu is terminally wedged to avoid 1997 * userspace unduly crashing (gl has no provisions for mmaps to 1998 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1999 * and so needs to be reported. 2000 */ 2001 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2002 ret = VM_FAULT_SIGBUS; 2003 break; 2004 } 2005 case -EAGAIN: 2006 /* 2007 * EAGAIN means the gpu is hung and we'll wait for the error 2008 * handler to reset everything when re-faulting in 2009 * i915_mutex_lock_interruptible. 2010 */ 2011 case 0: 2012 case -ERESTARTSYS: 2013 case -EINTR: 2014 case -EBUSY: 2015 /* 2016 * EBUSY is ok: this just means that another thread 2017 * already did the job. 2018 */ 2019 ret = VM_FAULT_NOPAGE; 2020 break; 2021 case -ENOMEM: 2022 ret = VM_FAULT_OOM; 2023 break; 2024 case -ENOSPC: 2025 case -EFAULT: 2026 ret = VM_FAULT_SIGBUS; 2027 break; 2028 default: 2029 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2030 ret = VM_FAULT_SIGBUS; 2031 break; 2032 } 2033 return ret; 2034 } 2035 2036 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2037 { 2038 struct i915_vma *vma; 2039 2040 GEM_BUG_ON(!obj->userfault_count); 2041 2042 obj->userfault_count = 0; 2043 list_del(&obj->userfault_link); 2044 drm_vma_node_unmap(&obj->base.vma_node, 2045 obj->base.dev->anon_inode->i_mapping); 2046 2047 for_each_ggtt_vma(vma, obj) 2048 i915_vma_unset_userfault(vma); 2049 } 2050 2051 /** 2052 * i915_gem_release_mmap - remove physical page mappings 2053 * @obj: obj in question 2054 * 2055 * Preserve the reservation of the mmapping with the DRM core code, but 2056 * relinquish ownership of the pages back to the system. 2057 * 2058 * It is vital that we remove the page mapping if we have mapped a tiled 2059 * object through the GTT and then lose the fence register due to 2060 * resource pressure. Similarly if the object has been moved out of the 2061 * aperture, than pages mapped into userspace must be revoked. Removing the 2062 * mapping will then trigger a page fault on the next user access, allowing 2063 * fixup by i915_gem_fault(). 2064 */ 2065 void 2066 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2067 { 2068 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2069 2070 /* Serialisation between user GTT access and our code depends upon 2071 * revoking the CPU's PTE whilst the mutex is held. The next user 2072 * pagefault then has to wait until we release the mutex. 2073 * 2074 * Note that RPM complicates somewhat by adding an additional 2075 * requirement that operations to the GGTT be made holding the RPM 2076 * wakeref. 2077 */ 2078 lockdep_assert_held(&i915->drm.struct_mutex); 2079 intel_runtime_pm_get(i915); 2080 2081 if (!obj->userfault_count) 2082 goto out; 2083 2084 __i915_gem_object_release_mmap(obj); 2085 2086 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2087 * memory transactions from userspace before we return. The TLB 2088 * flushing implied above by changing the PTE above *should* be 2089 * sufficient, an extra barrier here just provides us with a bit 2090 * of paranoid documentation about our requirement to serialise 2091 * memory writes before touching registers / GSM. 2092 */ 2093 wmb(); 2094 2095 out: 2096 intel_runtime_pm_put(i915); 2097 } 2098 2099 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2100 { 2101 struct drm_i915_gem_object *obj, *on; 2102 int i; 2103 2104 /* 2105 * Only called during RPM suspend. All users of the userfault_list 2106 * must be holding an RPM wakeref to ensure that this can not 2107 * run concurrently with themselves (and use the struct_mutex for 2108 * protection between themselves). 2109 */ 2110 2111 list_for_each_entry_safe(obj, on, 2112 &dev_priv->mm.userfault_list, userfault_link) 2113 __i915_gem_object_release_mmap(obj); 2114 2115 /* The fence will be lost when the device powers down. If any were 2116 * in use by hardware (i.e. they are pinned), we should not be powering 2117 * down! All other fences will be reacquired by the user upon waking. 2118 */ 2119 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2120 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2121 2122 /* Ideally we want to assert that the fence register is not 2123 * live at this point (i.e. that no piece of code will be 2124 * trying to write through fence + GTT, as that both violates 2125 * our tracking of activity and associated locking/barriers, 2126 * but also is illegal given that the hw is powered down). 2127 * 2128 * Previously we used reg->pin_count as a "liveness" indicator. 2129 * That is not sufficient, and we need a more fine-grained 2130 * tool if we want to have a sanity check here. 2131 */ 2132 2133 if (!reg->vma) 2134 continue; 2135 2136 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2137 reg->dirty = true; 2138 } 2139 } 2140 2141 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2142 { 2143 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2144 int err; 2145 2146 err = drm_gem_create_mmap_offset(&obj->base); 2147 if (likely(!err)) 2148 return 0; 2149 2150 /* Attempt to reap some mmap space from dead objects */ 2151 do { 2152 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2153 if (err) 2154 break; 2155 2156 i915_gem_drain_freed_objects(dev_priv); 2157 err = drm_gem_create_mmap_offset(&obj->base); 2158 if (!err) 2159 break; 2160 2161 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2162 2163 return err; 2164 } 2165 2166 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2167 { 2168 drm_gem_free_mmap_offset(&obj->base); 2169 } 2170 2171 int 2172 i915_gem_mmap_gtt(struct drm_file *file, 2173 struct drm_device *dev, 2174 uint32_t handle, 2175 uint64_t *offset) 2176 { 2177 struct drm_i915_gem_object *obj; 2178 int ret; 2179 2180 obj = i915_gem_object_lookup(file, handle); 2181 if (!obj) 2182 return -ENOENT; 2183 2184 ret = i915_gem_object_create_mmap_offset(obj); 2185 if (ret == 0) 2186 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2187 2188 i915_gem_object_put(obj); 2189 return ret; 2190 } 2191 2192 /** 2193 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2194 * @dev: DRM device 2195 * @data: GTT mapping ioctl data 2196 * @file: GEM object info 2197 * 2198 * Simply returns the fake offset to userspace so it can mmap it. 2199 * The mmap call will end up in drm_gem_mmap(), which will set things 2200 * up so we can get faults in the handler above. 2201 * 2202 * The fault handler will take care of binding the object into the GTT 2203 * (since it may have been evicted to make room for something), allocating 2204 * a fence register, and mapping the appropriate aperture address into 2205 * userspace. 2206 */ 2207 int 2208 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2209 struct drm_file *file) 2210 { 2211 struct drm_i915_gem_mmap_gtt *args = data; 2212 2213 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2214 } 2215 2216 /* Immediately discard the backing storage */ 2217 static void 2218 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2219 { 2220 i915_gem_object_free_mmap_offset(obj); 2221 2222 if (obj->base.filp == NULL) 2223 return; 2224 2225 /* Our goal here is to return as much of the memory as 2226 * is possible back to the system as we are called from OOM. 2227 * To do this we must instruct the shmfs to drop all of its 2228 * backing pages, *now*. 2229 */ 2230 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2231 obj->mm.madv = __I915_MADV_PURGED; 2232 obj->mm.pages = ERR_PTR(-EFAULT); 2233 } 2234 2235 /* Try to discard unwanted pages */ 2236 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2237 { 2238 struct address_space *mapping; 2239 2240 lockdep_assert_held(&obj->mm.lock); 2241 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2242 2243 switch (obj->mm.madv) { 2244 case I915_MADV_DONTNEED: 2245 i915_gem_object_truncate(obj); 2246 case __I915_MADV_PURGED: 2247 return; 2248 } 2249 2250 if (obj->base.filp == NULL) 2251 return; 2252 2253 mapping = obj->base.filp->f_mapping, 2254 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2255 } 2256 2257 static void 2258 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2259 struct sg_table *pages) 2260 { 2261 struct sgt_iter sgt_iter; 2262 struct page *page; 2263 2264 __i915_gem_object_release_shmem(obj, pages, true); 2265 2266 i915_gem_gtt_finish_pages(obj, pages); 2267 2268 if (i915_gem_object_needs_bit17_swizzle(obj)) 2269 i915_gem_object_save_bit_17_swizzle(obj, pages); 2270 2271 for_each_sgt_page(page, sgt_iter, pages) { 2272 if (obj->mm.dirty) 2273 set_page_dirty(page); 2274 2275 if (obj->mm.madv == I915_MADV_WILLNEED) 2276 mark_page_accessed(page); 2277 2278 put_page(page); 2279 } 2280 obj->mm.dirty = false; 2281 2282 sg_free_table(pages); 2283 kfree(pages); 2284 } 2285 2286 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2287 { 2288 struct radix_tree_iter iter; 2289 void __rcu **slot; 2290 2291 rcu_read_lock(); 2292 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2293 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2294 rcu_read_unlock(); 2295 } 2296 2297 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2298 enum i915_mm_subclass subclass) 2299 { 2300 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2301 struct sg_table *pages; 2302 2303 if (i915_gem_object_has_pinned_pages(obj)) 2304 return; 2305 2306 GEM_BUG_ON(obj->bind_count); 2307 if (!i915_gem_object_has_pages(obj)) 2308 return; 2309 2310 /* May be called by shrinker from within get_pages() (on another bo) */ 2311 mutex_lock_nested(&obj->mm.lock, subclass); 2312 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2313 goto unlock; 2314 2315 /* ->put_pages might need to allocate memory for the bit17 swizzle 2316 * array, hence protect them from being reaped by removing them from gtt 2317 * lists early. */ 2318 pages = fetch_and_zero(&obj->mm.pages); 2319 GEM_BUG_ON(!pages); 2320 2321 spin_lock(&i915->mm.obj_lock); 2322 list_del(&obj->mm.link); 2323 spin_unlock(&i915->mm.obj_lock); 2324 2325 if (obj->mm.mapping) { 2326 void *ptr; 2327 2328 ptr = page_mask_bits(obj->mm.mapping); 2329 if (is_vmalloc_addr(ptr)) 2330 vunmap(ptr); 2331 else 2332 kunmap(kmap_to_page(ptr)); 2333 2334 obj->mm.mapping = NULL; 2335 } 2336 2337 __i915_gem_object_reset_page_iter(obj); 2338 2339 if (!IS_ERR(pages)) 2340 obj->ops->put_pages(obj, pages); 2341 2342 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2343 2344 unlock: 2345 mutex_unlock(&obj->mm.lock); 2346 } 2347 2348 static bool i915_sg_trim(struct sg_table *orig_st) 2349 { 2350 struct sg_table new_st; 2351 struct scatterlist *sg, *new_sg; 2352 unsigned int i; 2353 2354 if (orig_st->nents == orig_st->orig_nents) 2355 return false; 2356 2357 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2358 return false; 2359 2360 new_sg = new_st.sgl; 2361 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2362 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2363 /* called before being DMA mapped, no need to copy sg->dma_* */ 2364 new_sg = sg_next(new_sg); 2365 } 2366 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2367 2368 sg_free_table(orig_st); 2369 2370 *orig_st = new_st; 2371 return true; 2372 } 2373 2374 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2375 { 2376 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2377 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2378 unsigned long i; 2379 struct address_space *mapping; 2380 struct sg_table *st; 2381 struct scatterlist *sg; 2382 struct sgt_iter sgt_iter; 2383 struct page *page; 2384 unsigned long last_pfn = 0; /* suppress gcc warning */ 2385 unsigned int max_segment = i915_sg_segment_size(); 2386 unsigned int sg_page_sizes; 2387 gfp_t noreclaim; 2388 int ret; 2389 2390 /* Assert that the object is not currently in any GPU domain. As it 2391 * wasn't in the GTT, there shouldn't be any way it could have been in 2392 * a GPU cache 2393 */ 2394 GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2395 GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2396 2397 st = kmalloc(sizeof(*st), GFP_KERNEL); 2398 if (st == NULL) 2399 return -ENOMEM; 2400 2401 rebuild_st: 2402 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2403 kfree(st); 2404 return -ENOMEM; 2405 } 2406 2407 /* Get the list of pages out of our struct file. They'll be pinned 2408 * at this point until we release them. 2409 * 2410 * Fail silently without starting the shrinker 2411 */ 2412 mapping = obj->base.filp->f_mapping; 2413 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2414 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2415 2416 sg = st->sgl; 2417 st->nents = 0; 2418 sg_page_sizes = 0; 2419 for (i = 0; i < page_count; i++) { 2420 const unsigned int shrink[] = { 2421 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2422 0, 2423 }, *s = shrink; 2424 gfp_t gfp = noreclaim; 2425 2426 do { 2427 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2428 if (likely(!IS_ERR(page))) 2429 break; 2430 2431 if (!*s) { 2432 ret = PTR_ERR(page); 2433 goto err_sg; 2434 } 2435 2436 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2437 cond_resched(); 2438 2439 /* We've tried hard to allocate the memory by reaping 2440 * our own buffer, now let the real VM do its job and 2441 * go down in flames if truly OOM. 2442 * 2443 * However, since graphics tend to be disposable, 2444 * defer the oom here by reporting the ENOMEM back 2445 * to userspace. 2446 */ 2447 if (!*s) { 2448 /* reclaim and warn, but no oom */ 2449 gfp = mapping_gfp_mask(mapping); 2450 2451 /* Our bo are always dirty and so we require 2452 * kswapd to reclaim our pages (direct reclaim 2453 * does not effectively begin pageout of our 2454 * buffers on its own). However, direct reclaim 2455 * only waits for kswapd when under allocation 2456 * congestion. So as a result __GFP_RECLAIM is 2457 * unreliable and fails to actually reclaim our 2458 * dirty pages -- unless you try over and over 2459 * again with !__GFP_NORETRY. However, we still 2460 * want to fail this allocation rather than 2461 * trigger the out-of-memory killer and for 2462 * this we want __GFP_RETRY_MAYFAIL. 2463 */ 2464 gfp |= __GFP_RETRY_MAYFAIL; 2465 } 2466 } while (1); 2467 2468 if (!i || 2469 sg->length >= max_segment || 2470 page_to_pfn(page) != last_pfn + 1) { 2471 if (i) { 2472 sg_page_sizes |= sg->length; 2473 sg = sg_next(sg); 2474 } 2475 st->nents++; 2476 sg_set_page(sg, page, PAGE_SIZE, 0); 2477 } else { 2478 sg->length += PAGE_SIZE; 2479 } 2480 last_pfn = page_to_pfn(page); 2481 2482 /* Check that the i965g/gm workaround works. */ 2483 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2484 } 2485 if (sg) { /* loop terminated early; short sg table */ 2486 sg_page_sizes |= sg->length; 2487 sg_mark_end(sg); 2488 } 2489 2490 /* Trim unused sg entries to avoid wasting memory. */ 2491 i915_sg_trim(st); 2492 2493 ret = i915_gem_gtt_prepare_pages(obj, st); 2494 if (ret) { 2495 /* DMA remapping failed? One possible cause is that 2496 * it could not reserve enough large entries, asking 2497 * for PAGE_SIZE chunks instead may be helpful. 2498 */ 2499 if (max_segment > PAGE_SIZE) { 2500 for_each_sgt_page(page, sgt_iter, st) 2501 put_page(page); 2502 sg_free_table(st); 2503 2504 max_segment = PAGE_SIZE; 2505 goto rebuild_st; 2506 } else { 2507 dev_warn(&dev_priv->drm.pdev->dev, 2508 "Failed to DMA remap %lu pages\n", 2509 page_count); 2510 goto err_pages; 2511 } 2512 } 2513 2514 if (i915_gem_object_needs_bit17_swizzle(obj)) 2515 i915_gem_object_do_bit_17_swizzle(obj, st); 2516 2517 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2518 2519 return 0; 2520 2521 err_sg: 2522 sg_mark_end(sg); 2523 err_pages: 2524 for_each_sgt_page(page, sgt_iter, st) 2525 put_page(page); 2526 sg_free_table(st); 2527 kfree(st); 2528 2529 /* shmemfs first checks if there is enough memory to allocate the page 2530 * and reports ENOSPC should there be insufficient, along with the usual 2531 * ENOMEM for a genuine allocation failure. 2532 * 2533 * We use ENOSPC in our driver to mean that we have run out of aperture 2534 * space and so want to translate the error from shmemfs back to our 2535 * usual understanding of ENOMEM. 2536 */ 2537 if (ret == -ENOSPC) 2538 ret = -ENOMEM; 2539 2540 return ret; 2541 } 2542 2543 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2544 struct sg_table *pages, 2545 unsigned int sg_page_sizes) 2546 { 2547 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2548 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2549 int i; 2550 2551 lockdep_assert_held(&obj->mm.lock); 2552 2553 obj->mm.get_page.sg_pos = pages->sgl; 2554 obj->mm.get_page.sg_idx = 0; 2555 2556 obj->mm.pages = pages; 2557 2558 if (i915_gem_object_is_tiled(obj) && 2559 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2560 GEM_BUG_ON(obj->mm.quirked); 2561 __i915_gem_object_pin_pages(obj); 2562 obj->mm.quirked = true; 2563 } 2564 2565 GEM_BUG_ON(!sg_page_sizes); 2566 obj->mm.page_sizes.phys = sg_page_sizes; 2567 2568 /* 2569 * Calculate the supported page-sizes which fit into the given 2570 * sg_page_sizes. This will give us the page-sizes which we may be able 2571 * to use opportunistically when later inserting into the GTT. For 2572 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2573 * 64K or 4K pages, although in practice this will depend on a number of 2574 * other factors. 2575 */ 2576 obj->mm.page_sizes.sg = 0; 2577 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2578 if (obj->mm.page_sizes.phys & ~0u << i) 2579 obj->mm.page_sizes.sg |= BIT(i); 2580 } 2581 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2582 2583 spin_lock(&i915->mm.obj_lock); 2584 list_add(&obj->mm.link, &i915->mm.unbound_list); 2585 spin_unlock(&i915->mm.obj_lock); 2586 } 2587 2588 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2589 { 2590 int err; 2591 2592 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2593 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2594 return -EFAULT; 2595 } 2596 2597 err = obj->ops->get_pages(obj); 2598 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2599 2600 return err; 2601 } 2602 2603 /* Ensure that the associated pages are gathered from the backing storage 2604 * and pinned into our object. i915_gem_object_pin_pages() may be called 2605 * multiple times before they are released by a single call to 2606 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2607 * either as a result of memory pressure (reaping pages under the shrinker) 2608 * or as the object is itself released. 2609 */ 2610 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2611 { 2612 int err; 2613 2614 err = mutex_lock_interruptible(&obj->mm.lock); 2615 if (err) 2616 return err; 2617 2618 if (unlikely(!i915_gem_object_has_pages(obj))) { 2619 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2620 2621 err = ____i915_gem_object_get_pages(obj); 2622 if (err) 2623 goto unlock; 2624 2625 smp_mb__before_atomic(); 2626 } 2627 atomic_inc(&obj->mm.pages_pin_count); 2628 2629 unlock: 2630 mutex_unlock(&obj->mm.lock); 2631 return err; 2632 } 2633 2634 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2635 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2636 enum i915_map_type type) 2637 { 2638 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2639 struct sg_table *sgt = obj->mm.pages; 2640 struct sgt_iter sgt_iter; 2641 struct page *page; 2642 struct page *stack_pages[32]; 2643 struct page **pages = stack_pages; 2644 unsigned long i = 0; 2645 pgprot_t pgprot; 2646 void *addr; 2647 2648 /* A single page can always be kmapped */ 2649 if (n_pages == 1 && type == I915_MAP_WB) 2650 return kmap(sg_page(sgt->sgl)); 2651 2652 if (n_pages > ARRAY_SIZE(stack_pages)) { 2653 /* Too big for stack -- allocate temporary array instead */ 2654 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2655 if (!pages) 2656 return NULL; 2657 } 2658 2659 for_each_sgt_page(page, sgt_iter, sgt) 2660 pages[i++] = page; 2661 2662 /* Check that we have the expected number of pages */ 2663 GEM_BUG_ON(i != n_pages); 2664 2665 switch (type) { 2666 default: 2667 MISSING_CASE(type); 2668 /* fallthrough to use PAGE_KERNEL anyway */ 2669 case I915_MAP_WB: 2670 pgprot = PAGE_KERNEL; 2671 break; 2672 case I915_MAP_WC: 2673 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2674 break; 2675 } 2676 addr = vmap(pages, n_pages, 0, pgprot); 2677 2678 if (pages != stack_pages) 2679 kvfree(pages); 2680 2681 return addr; 2682 } 2683 2684 /* get, pin, and map the pages of the object into kernel space */ 2685 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2686 enum i915_map_type type) 2687 { 2688 enum i915_map_type has_type; 2689 bool pinned; 2690 void *ptr; 2691 int ret; 2692 2693 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2694 return ERR_PTR(-ENXIO); 2695 2696 ret = mutex_lock_interruptible(&obj->mm.lock); 2697 if (ret) 2698 return ERR_PTR(ret); 2699 2700 pinned = !(type & I915_MAP_OVERRIDE); 2701 type &= ~I915_MAP_OVERRIDE; 2702 2703 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2704 if (unlikely(!i915_gem_object_has_pages(obj))) { 2705 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2706 2707 ret = ____i915_gem_object_get_pages(obj); 2708 if (ret) 2709 goto err_unlock; 2710 2711 smp_mb__before_atomic(); 2712 } 2713 atomic_inc(&obj->mm.pages_pin_count); 2714 pinned = false; 2715 } 2716 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2717 2718 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2719 if (ptr && has_type != type) { 2720 if (pinned) { 2721 ret = -EBUSY; 2722 goto err_unpin; 2723 } 2724 2725 if (is_vmalloc_addr(ptr)) 2726 vunmap(ptr); 2727 else 2728 kunmap(kmap_to_page(ptr)); 2729 2730 ptr = obj->mm.mapping = NULL; 2731 } 2732 2733 if (!ptr) { 2734 ptr = i915_gem_object_map(obj, type); 2735 if (!ptr) { 2736 ret = -ENOMEM; 2737 goto err_unpin; 2738 } 2739 2740 obj->mm.mapping = page_pack_bits(ptr, type); 2741 } 2742 2743 out_unlock: 2744 mutex_unlock(&obj->mm.lock); 2745 return ptr; 2746 2747 err_unpin: 2748 atomic_dec(&obj->mm.pages_pin_count); 2749 err_unlock: 2750 ptr = ERR_PTR(ret); 2751 goto out_unlock; 2752 } 2753 2754 static int 2755 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2756 const struct drm_i915_gem_pwrite *arg) 2757 { 2758 struct address_space *mapping = obj->base.filp->f_mapping; 2759 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2760 u64 remain, offset; 2761 unsigned int pg; 2762 2763 /* Before we instantiate/pin the backing store for our use, we 2764 * can prepopulate the shmemfs filp efficiently using a write into 2765 * the pagecache. We avoid the penalty of instantiating all the 2766 * pages, important if the user is just writing to a few and never 2767 * uses the object on the GPU, and using a direct write into shmemfs 2768 * allows it to avoid the cost of retrieving a page (either swapin 2769 * or clearing-before-use) before it is overwritten. 2770 */ 2771 if (i915_gem_object_has_pages(obj)) 2772 return -ENODEV; 2773 2774 if (obj->mm.madv != I915_MADV_WILLNEED) 2775 return -EFAULT; 2776 2777 /* Before the pages are instantiated the object is treated as being 2778 * in the CPU domain. The pages will be clflushed as required before 2779 * use, and we can freely write into the pages directly. If userspace 2780 * races pwrite with any other operation; corruption will ensue - 2781 * that is userspace's prerogative! 2782 */ 2783 2784 remain = arg->size; 2785 offset = arg->offset; 2786 pg = offset_in_page(offset); 2787 2788 do { 2789 unsigned int len, unwritten; 2790 struct page *page; 2791 void *data, *vaddr; 2792 int err; 2793 2794 len = PAGE_SIZE - pg; 2795 if (len > remain) 2796 len = remain; 2797 2798 err = pagecache_write_begin(obj->base.filp, mapping, 2799 offset, len, 0, 2800 &page, &data); 2801 if (err < 0) 2802 return err; 2803 2804 vaddr = kmap(page); 2805 unwritten = copy_from_user(vaddr + pg, user_data, len); 2806 kunmap(page); 2807 2808 err = pagecache_write_end(obj->base.filp, mapping, 2809 offset, len, len - unwritten, 2810 page, data); 2811 if (err < 0) 2812 return err; 2813 2814 if (unwritten) 2815 return -EFAULT; 2816 2817 remain -= len; 2818 user_data += len; 2819 offset += len; 2820 pg = 0; 2821 } while (remain); 2822 2823 return 0; 2824 } 2825 2826 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2827 { 2828 bool banned; 2829 2830 atomic_inc(&ctx->guilty_count); 2831 2832 banned = false; 2833 if (i915_gem_context_is_bannable(ctx)) { 2834 unsigned int score; 2835 2836 score = atomic_add_return(CONTEXT_SCORE_GUILTY, 2837 &ctx->ban_score); 2838 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 2839 2840 DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", 2841 ctx->name, score, yesno(banned)); 2842 } 2843 if (!banned) 2844 return; 2845 2846 i915_gem_context_set_banned(ctx); 2847 if (!IS_ERR_OR_NULL(ctx->file_priv)) { 2848 atomic_inc(&ctx->file_priv->context_bans); 2849 DRM_DEBUG_DRIVER("client %s has had %d context banned\n", 2850 ctx->name, atomic_read(&ctx->file_priv->context_bans)); 2851 } 2852 } 2853 2854 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2855 { 2856 atomic_inc(&ctx->active_count); 2857 } 2858 2859 struct drm_i915_gem_request * 2860 i915_gem_find_active_request(struct intel_engine_cs *engine) 2861 { 2862 struct drm_i915_gem_request *request, *active = NULL; 2863 unsigned long flags; 2864 2865 /* We are called by the error capture and reset at a random 2866 * point in time. In particular, note that neither is crucially 2867 * ordered with an interrupt. After a hang, the GPU is dead and we 2868 * assume that no more writes can happen (we waited long enough for 2869 * all writes that were in transaction to be flushed) - adding an 2870 * extra delay for a recent interrupt is pointless. Hence, we do 2871 * not need an engine->irq_seqno_barrier() before the seqno reads. 2872 */ 2873 spin_lock_irqsave(&engine->timeline->lock, flags); 2874 list_for_each_entry(request, &engine->timeline->requests, link) { 2875 if (__i915_gem_request_completed(request, 2876 request->global_seqno)) 2877 continue; 2878 2879 GEM_BUG_ON(request->engine != engine); 2880 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 2881 &request->fence.flags)); 2882 2883 active = request; 2884 break; 2885 } 2886 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2887 2888 return active; 2889 } 2890 2891 static bool engine_stalled(struct intel_engine_cs *engine) 2892 { 2893 if (!engine->hangcheck.stalled) 2894 return false; 2895 2896 /* Check for possible seqno movement after hang declaration */ 2897 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { 2898 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); 2899 return false; 2900 } 2901 2902 return true; 2903 } 2904 2905 /* 2906 * Ensure irq handler finishes, and not run again. 2907 * Also return the active request so that we only search for it once. 2908 */ 2909 struct drm_i915_gem_request * 2910 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 2911 { 2912 struct drm_i915_gem_request *request = NULL; 2913 2914 /* 2915 * During the reset sequence, we must prevent the engine from 2916 * entering RC6. As the context state is undefined until we restart 2917 * the engine, if it does enter RC6 during the reset, the state 2918 * written to the powercontext is undefined and so we may lose 2919 * GPU state upon resume, i.e. fail to restart after a reset. 2920 */ 2921 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 2922 2923 /* 2924 * Prevent the signaler thread from updating the request 2925 * state (by calling dma_fence_signal) as we are processing 2926 * the reset. The write from the GPU of the seqno is 2927 * asynchronous and the signaler thread may see a different 2928 * value to us and declare the request complete, even though 2929 * the reset routine have picked that request as the active 2930 * (incomplete) request. This conflict is not handled 2931 * gracefully! 2932 */ 2933 kthread_park(engine->breadcrumbs.signaler); 2934 2935 /* 2936 * Prevent request submission to the hardware until we have 2937 * completed the reset in i915_gem_reset_finish(). If a request 2938 * is completed by one engine, it may then queue a request 2939 * to a second via its execlists->tasklet *just* as we are 2940 * calling engine->init_hw() and also writing the ELSP. 2941 * Turning off the execlists->tasklet until the reset is over 2942 * prevents the race. 2943 */ 2944 tasklet_kill(&engine->execlists.tasklet); 2945 tasklet_disable(&engine->execlists.tasklet); 2946 2947 /* 2948 * We're using worker to queue preemption requests from the tasklet in 2949 * GuC submission mode. 2950 * Even though tasklet was disabled, we may still have a worker queued. 2951 * Let's make sure that all workers scheduled before disabling the 2952 * tasklet are completed before continuing with the reset. 2953 */ 2954 if (engine->i915->guc.preempt_wq) 2955 flush_workqueue(engine->i915->guc.preempt_wq); 2956 2957 if (engine->irq_seqno_barrier) 2958 engine->irq_seqno_barrier(engine); 2959 2960 request = i915_gem_find_active_request(engine); 2961 if (request && request->fence.error == -EIO) 2962 request = ERR_PTR(-EIO); /* Previous reset failed! */ 2963 2964 return request; 2965 } 2966 2967 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 2968 { 2969 struct intel_engine_cs *engine; 2970 struct drm_i915_gem_request *request; 2971 enum intel_engine_id id; 2972 int err = 0; 2973 2974 for_each_engine(engine, dev_priv, id) { 2975 request = i915_gem_reset_prepare_engine(engine); 2976 if (IS_ERR(request)) { 2977 err = PTR_ERR(request); 2978 continue; 2979 } 2980 2981 engine->hangcheck.active_request = request; 2982 } 2983 2984 i915_gem_revoke_fences(dev_priv); 2985 2986 return err; 2987 } 2988 2989 static void skip_request(struct drm_i915_gem_request *request) 2990 { 2991 void *vaddr = request->ring->vaddr; 2992 u32 head; 2993 2994 /* As this request likely depends on state from the lost 2995 * context, clear out all the user operations leaving the 2996 * breadcrumb at the end (so we get the fence notifications). 2997 */ 2998 head = request->head; 2999 if (request->postfix < head) { 3000 memset(vaddr + head, 0, request->ring->size - head); 3001 head = 0; 3002 } 3003 memset(vaddr + head, 0, request->postfix - head); 3004 3005 dma_fence_set_error(&request->fence, -EIO); 3006 } 3007 3008 static void engine_skip_context(struct drm_i915_gem_request *request) 3009 { 3010 struct intel_engine_cs *engine = request->engine; 3011 struct i915_gem_context *hung_ctx = request->ctx; 3012 struct intel_timeline *timeline; 3013 unsigned long flags; 3014 3015 timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); 3016 3017 spin_lock_irqsave(&engine->timeline->lock, flags); 3018 spin_lock(&timeline->lock); 3019 3020 list_for_each_entry_continue(request, &engine->timeline->requests, link) 3021 if (request->ctx == hung_ctx) 3022 skip_request(request); 3023 3024 list_for_each_entry(request, &timeline->requests, link) 3025 skip_request(request); 3026 3027 spin_unlock(&timeline->lock); 3028 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3029 } 3030 3031 /* Returns the request if it was guilty of the hang */ 3032 static struct drm_i915_gem_request * 3033 i915_gem_reset_request(struct intel_engine_cs *engine, 3034 struct drm_i915_gem_request *request) 3035 { 3036 /* The guilty request will get skipped on a hung engine. 3037 * 3038 * Users of client default contexts do not rely on logical 3039 * state preserved between batches so it is safe to execute 3040 * queued requests following the hang. Non default contexts 3041 * rely on preserved state, so skipping a batch loses the 3042 * evolution of the state and it needs to be considered corrupted. 3043 * Executing more queued batches on top of corrupted state is 3044 * risky. But we take the risk by trying to advance through 3045 * the queued requests in order to make the client behaviour 3046 * more predictable around resets, by not throwing away random 3047 * amount of batches it has prepared for execution. Sophisticated 3048 * clients can use gem_reset_stats_ioctl and dma fence status 3049 * (exported via sync_file info ioctl on explicit fences) to observe 3050 * when it loses the context state and should rebuild accordingly. 3051 * 3052 * The context ban, and ultimately the client ban, mechanism are safety 3053 * valves if client submission ends up resulting in nothing more than 3054 * subsequent hangs. 3055 */ 3056 3057 if (engine_stalled(engine)) { 3058 i915_gem_context_mark_guilty(request->ctx); 3059 skip_request(request); 3060 3061 /* If this context is now banned, skip all pending requests. */ 3062 if (i915_gem_context_is_banned(request->ctx)) 3063 engine_skip_context(request); 3064 } else { 3065 /* 3066 * Since this is not the hung engine, it may have advanced 3067 * since the hang declaration. Double check by refinding 3068 * the active request at the time of the reset. 3069 */ 3070 request = i915_gem_find_active_request(engine); 3071 if (request) { 3072 i915_gem_context_mark_innocent(request->ctx); 3073 dma_fence_set_error(&request->fence, -EAGAIN); 3074 3075 /* Rewind the engine to replay the incomplete rq */ 3076 spin_lock_irq(&engine->timeline->lock); 3077 request = list_prev_entry(request, link); 3078 if (&request->link == &engine->timeline->requests) 3079 request = NULL; 3080 spin_unlock_irq(&engine->timeline->lock); 3081 } 3082 } 3083 3084 return request; 3085 } 3086 3087 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3088 struct drm_i915_gem_request *request) 3089 { 3090 /* 3091 * Make sure this write is visible before we re-enable the interrupt 3092 * handlers on another CPU, as tasklet_enable() resolves to just 3093 * a compiler barrier which is insufficient for our purpose here. 3094 */ 3095 smp_store_mb(engine->irq_posted, 0); 3096 3097 if (request) 3098 request = i915_gem_reset_request(engine, request); 3099 3100 if (request) { 3101 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3102 engine->name, request->global_seqno); 3103 } 3104 3105 /* Setup the CS to resume from the breadcrumb of the hung request */ 3106 engine->reset_hw(engine, request); 3107 } 3108 3109 void i915_gem_reset(struct drm_i915_private *dev_priv) 3110 { 3111 struct intel_engine_cs *engine; 3112 enum intel_engine_id id; 3113 3114 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3115 3116 i915_gem_retire_requests(dev_priv); 3117 3118 for_each_engine(engine, dev_priv, id) { 3119 struct i915_gem_context *ctx; 3120 3121 i915_gem_reset_engine(engine, engine->hangcheck.active_request); 3122 ctx = fetch_and_zero(&engine->last_retired_context); 3123 if (ctx) 3124 engine->context_unpin(engine, ctx); 3125 3126 /* 3127 * Ostensibily, we always want a context loaded for powersaving, 3128 * so if the engine is idle after the reset, send a request 3129 * to load our scratch kernel_context. 3130 * 3131 * More mysteriously, if we leave the engine idle after a reset, 3132 * the next userspace batch may hang, with what appears to be 3133 * an incoherent read by the CS (presumably stale TLB). An 3134 * empty request appears sufficient to paper over the glitch. 3135 */ 3136 if (intel_engine_is_idle(engine)) { 3137 struct drm_i915_gem_request *rq; 3138 3139 rq = i915_gem_request_alloc(engine, 3140 dev_priv->kernel_context); 3141 if (!IS_ERR(rq)) 3142 __i915_add_request(rq, false); 3143 } 3144 } 3145 3146 i915_gem_restore_fences(dev_priv); 3147 3148 if (dev_priv->gt.awake) { 3149 intel_sanitize_gt_powersave(dev_priv); 3150 intel_enable_gt_powersave(dev_priv); 3151 if (INTEL_GEN(dev_priv) >= 6) 3152 gen6_rps_busy(dev_priv); 3153 } 3154 } 3155 3156 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3157 { 3158 tasklet_enable(&engine->execlists.tasklet); 3159 kthread_unpark(engine->breadcrumbs.signaler); 3160 3161 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3162 } 3163 3164 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3165 { 3166 struct intel_engine_cs *engine; 3167 enum intel_engine_id id; 3168 3169 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3170 3171 for_each_engine(engine, dev_priv, id) { 3172 engine->hangcheck.active_request = NULL; 3173 i915_gem_reset_finish_engine(engine); 3174 } 3175 } 3176 3177 static void nop_submit_request(struct drm_i915_gem_request *request) 3178 { 3179 dma_fence_set_error(&request->fence, -EIO); 3180 3181 i915_gem_request_submit(request); 3182 } 3183 3184 static void nop_complete_submit_request(struct drm_i915_gem_request *request) 3185 { 3186 unsigned long flags; 3187 3188 dma_fence_set_error(&request->fence, -EIO); 3189 3190 spin_lock_irqsave(&request->engine->timeline->lock, flags); 3191 __i915_gem_request_submit(request); 3192 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3193 spin_unlock_irqrestore(&request->engine->timeline->lock, flags); 3194 } 3195 3196 void i915_gem_set_wedged(struct drm_i915_private *i915) 3197 { 3198 struct intel_engine_cs *engine; 3199 enum intel_engine_id id; 3200 3201 if (drm_debug & DRM_UT_DRIVER) { 3202 struct drm_printer p = drm_debug_printer(__func__); 3203 3204 for_each_engine(engine, i915, id) 3205 intel_engine_dump(engine, &p, "%s\n", engine->name); 3206 } 3207 3208 /* 3209 * First, stop submission to hw, but do not yet complete requests by 3210 * rolling the global seqno forward (since this would complete requests 3211 * for which we haven't set the fence error to EIO yet). 3212 */ 3213 for_each_engine(engine, i915, id) 3214 engine->submit_request = nop_submit_request; 3215 3216 /* 3217 * Make sure no one is running the old callback before we proceed with 3218 * cancelling requests and resetting the completion tracking. Otherwise 3219 * we might submit a request to the hardware which never completes. 3220 */ 3221 synchronize_rcu(); 3222 3223 for_each_engine(engine, i915, id) { 3224 /* Mark all executing requests as skipped */ 3225 engine->cancel_requests(engine); 3226 3227 /* 3228 * Only once we've force-cancelled all in-flight requests can we 3229 * start to complete all requests. 3230 */ 3231 engine->submit_request = nop_complete_submit_request; 3232 } 3233 3234 /* 3235 * Make sure no request can slip through without getting completed by 3236 * either this call here to intel_engine_init_global_seqno, or the one 3237 * in nop_complete_submit_request. 3238 */ 3239 synchronize_rcu(); 3240 3241 for_each_engine(engine, i915, id) { 3242 unsigned long flags; 3243 3244 /* Mark all pending requests as complete so that any concurrent 3245 * (lockless) lookup doesn't try and wait upon the request as we 3246 * reset it. 3247 */ 3248 spin_lock_irqsave(&engine->timeline->lock, flags); 3249 intel_engine_init_global_seqno(engine, 3250 intel_engine_last_submit(engine)); 3251 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3252 } 3253 3254 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3255 wake_up_all(&i915->gpu_error.reset_queue); 3256 } 3257 3258 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3259 { 3260 struct i915_gem_timeline *tl; 3261 int i; 3262 3263 lockdep_assert_held(&i915->drm.struct_mutex); 3264 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3265 return true; 3266 3267 /* Before unwedging, make sure that all pending operations 3268 * are flushed and errored out - we may have requests waiting upon 3269 * third party fences. We marked all inflight requests as EIO, and 3270 * every execbuf since returned EIO, for consistency we want all 3271 * the currently pending requests to also be marked as EIO, which 3272 * is done inside our nop_submit_request - and so we must wait. 3273 * 3274 * No more can be submitted until we reset the wedged bit. 3275 */ 3276 list_for_each_entry(tl, &i915->gt.timelines, link) { 3277 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3278 struct drm_i915_gem_request *rq; 3279 3280 rq = i915_gem_active_peek(&tl->engine[i].last_request, 3281 &i915->drm.struct_mutex); 3282 if (!rq) 3283 continue; 3284 3285 /* We can't use our normal waiter as we want to 3286 * avoid recursively trying to handle the current 3287 * reset. The basic dma_fence_default_wait() installs 3288 * a callback for dma_fence_signal(), which is 3289 * triggered by our nop handler (indirectly, the 3290 * callback enables the signaler thread which is 3291 * woken by the nop_submit_request() advancing the seqno 3292 * and when the seqno passes the fence, the signaler 3293 * then signals the fence waking us up). 3294 */ 3295 if (dma_fence_default_wait(&rq->fence, true, 3296 MAX_SCHEDULE_TIMEOUT) < 0) 3297 return false; 3298 } 3299 } 3300 3301 /* Undo nop_submit_request. We prevent all new i915 requests from 3302 * being queued (by disallowing execbuf whilst wedged) so having 3303 * waited for all active requests above, we know the system is idle 3304 * and do not have to worry about a thread being inside 3305 * engine->submit_request() as we swap over. So unlike installing 3306 * the nop_submit_request on reset, we can do this from normal 3307 * context and do not require stop_machine(). 3308 */ 3309 intel_engines_reset_default_submission(i915); 3310 i915_gem_contexts_lost(i915); 3311 3312 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3313 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3314 3315 return true; 3316 } 3317 3318 static void 3319 i915_gem_retire_work_handler(struct work_struct *work) 3320 { 3321 struct drm_i915_private *dev_priv = 3322 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3323 struct drm_device *dev = &dev_priv->drm; 3324 3325 /* Come back later if the device is busy... */ 3326 if (mutex_trylock(&dev->struct_mutex)) { 3327 i915_gem_retire_requests(dev_priv); 3328 mutex_unlock(&dev->struct_mutex); 3329 } 3330 3331 /* 3332 * Keep the retire handler running until we are finally idle. 3333 * We do not need to do this test under locking as in the worst-case 3334 * we queue the retire worker once too often. 3335 */ 3336 if (READ_ONCE(dev_priv->gt.awake)) 3337 queue_delayed_work(dev_priv->wq, 3338 &dev_priv->gt.retire_work, 3339 round_jiffies_up_relative(HZ)); 3340 } 3341 3342 static void shrink_caches(struct drm_i915_private *i915) 3343 { 3344 /* 3345 * kmem_cache_shrink() discards empty slabs and reorders partially 3346 * filled slabs to prioritise allocating from the mostly full slabs, 3347 * with the aim of reducing fragmentation. 3348 */ 3349 kmem_cache_shrink(i915->priorities); 3350 kmem_cache_shrink(i915->dependencies); 3351 kmem_cache_shrink(i915->requests); 3352 kmem_cache_shrink(i915->luts); 3353 kmem_cache_shrink(i915->vmas); 3354 kmem_cache_shrink(i915->objects); 3355 } 3356 3357 struct sleep_rcu_work { 3358 union { 3359 struct rcu_head rcu; 3360 struct work_struct work; 3361 }; 3362 struct drm_i915_private *i915; 3363 unsigned int epoch; 3364 }; 3365 3366 static inline bool 3367 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3368 { 3369 /* 3370 * There is a small chance that the epoch wrapped since we started 3371 * sleeping. If we assume that epoch is at least a u32, then it will 3372 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3373 */ 3374 return epoch == READ_ONCE(i915->gt.epoch); 3375 } 3376 3377 static void __sleep_work(struct work_struct *work) 3378 { 3379 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3380 struct drm_i915_private *i915 = s->i915; 3381 unsigned int epoch = s->epoch; 3382 3383 kfree(s); 3384 if (same_epoch(i915, epoch)) 3385 shrink_caches(i915); 3386 } 3387 3388 static void __sleep_rcu(struct rcu_head *rcu) 3389 { 3390 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3391 struct drm_i915_private *i915 = s->i915; 3392 3393 if (same_epoch(i915, s->epoch)) { 3394 INIT_WORK(&s->work, __sleep_work); 3395 queue_work(i915->wq, &s->work); 3396 } else { 3397 kfree(s); 3398 } 3399 } 3400 3401 static inline bool 3402 new_requests_since_last_retire(const struct drm_i915_private *i915) 3403 { 3404 return (READ_ONCE(i915->gt.active_requests) || 3405 work_pending(&i915->gt.idle_work.work)); 3406 } 3407 3408 static void 3409 i915_gem_idle_work_handler(struct work_struct *work) 3410 { 3411 struct drm_i915_private *dev_priv = 3412 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3413 unsigned int epoch = I915_EPOCH_INVALID; 3414 bool rearm_hangcheck; 3415 ktime_t end; 3416 3417 if (!READ_ONCE(dev_priv->gt.awake)) 3418 return; 3419 3420 /* 3421 * Wait for last execlists context complete, but bail out in case a 3422 * new request is submitted. 3423 */ 3424 end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); 3425 do { 3426 if (new_requests_since_last_retire(dev_priv)) 3427 return; 3428 3429 if (intel_engines_are_idle(dev_priv)) 3430 break; 3431 3432 usleep_range(100, 500); 3433 } while (ktime_before(ktime_get(), end)); 3434 3435 rearm_hangcheck = 3436 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3437 3438 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3439 /* Currently busy, come back later */ 3440 mod_delayed_work(dev_priv->wq, 3441 &dev_priv->gt.idle_work, 3442 msecs_to_jiffies(50)); 3443 goto out_rearm; 3444 } 3445 3446 /* 3447 * New request retired after this work handler started, extend active 3448 * period until next instance of the work. 3449 */ 3450 if (new_requests_since_last_retire(dev_priv)) 3451 goto out_unlock; 3452 3453 /* 3454 * Be paranoid and flush a concurrent interrupt to make sure 3455 * we don't reactivate any irq tasklets after parking. 3456 * 3457 * FIXME: Note that even though we have waited for execlists to be idle, 3458 * there may still be an in-flight interrupt even though the CSB 3459 * is now empty. synchronize_irq() makes sure that a residual interrupt 3460 * is completed before we continue, but it doesn't prevent the HW from 3461 * raising a spurious interrupt later. To complete the shield we should 3462 * coordinate disabling the CS irq with flushing the interrupts. 3463 */ 3464 synchronize_irq(dev_priv->drm.irq); 3465 3466 intel_engines_park(dev_priv); 3467 i915_gem_timelines_park(dev_priv); 3468 3469 i915_pmu_gt_parked(dev_priv); 3470 3471 GEM_BUG_ON(!dev_priv->gt.awake); 3472 dev_priv->gt.awake = false; 3473 epoch = dev_priv->gt.epoch; 3474 GEM_BUG_ON(epoch == I915_EPOCH_INVALID); 3475 rearm_hangcheck = false; 3476 3477 if (INTEL_GEN(dev_priv) >= 6) 3478 gen6_rps_idle(dev_priv); 3479 3480 intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); 3481 3482 intel_runtime_pm_put(dev_priv); 3483 out_unlock: 3484 mutex_unlock(&dev_priv->drm.struct_mutex); 3485 3486 out_rearm: 3487 if (rearm_hangcheck) { 3488 GEM_BUG_ON(!dev_priv->gt.awake); 3489 i915_queue_hangcheck(dev_priv); 3490 } 3491 3492 /* 3493 * When we are idle, it is an opportune time to reap our caches. 3494 * However, we have many objects that utilise RCU and the ordered 3495 * i915->wq that this work is executing on. To try and flush any 3496 * pending frees now we are idle, we first wait for an RCU grace 3497 * period, and then queue a task (that will run last on the wq) to 3498 * shrink and re-optimize the caches. 3499 */ 3500 if (same_epoch(dev_priv, epoch)) { 3501 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3502 if (s) { 3503 s->i915 = dev_priv; 3504 s->epoch = epoch; 3505 call_rcu(&s->rcu, __sleep_rcu); 3506 } 3507 } 3508 } 3509 3510 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3511 { 3512 struct drm_i915_private *i915 = to_i915(gem->dev); 3513 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3514 struct drm_i915_file_private *fpriv = file->driver_priv; 3515 struct i915_lut_handle *lut, *ln; 3516 3517 mutex_lock(&i915->drm.struct_mutex); 3518 3519 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3520 struct i915_gem_context *ctx = lut->ctx; 3521 struct i915_vma *vma; 3522 3523 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3524 if (ctx->file_priv != fpriv) 3525 continue; 3526 3527 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3528 GEM_BUG_ON(vma->obj != obj); 3529 3530 /* We allow the process to have multiple handles to the same 3531 * vma, in the same fd namespace, by virtue of flink/open. 3532 */ 3533 GEM_BUG_ON(!vma->open_count); 3534 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3535 i915_vma_close(vma); 3536 3537 list_del(&lut->obj_link); 3538 list_del(&lut->ctx_link); 3539 3540 kmem_cache_free(i915->luts, lut); 3541 __i915_gem_object_release_unless_active(obj); 3542 } 3543 3544 mutex_unlock(&i915->drm.struct_mutex); 3545 } 3546 3547 static unsigned long to_wait_timeout(s64 timeout_ns) 3548 { 3549 if (timeout_ns < 0) 3550 return MAX_SCHEDULE_TIMEOUT; 3551 3552 if (timeout_ns == 0) 3553 return 0; 3554 3555 return nsecs_to_jiffies_timeout(timeout_ns); 3556 } 3557 3558 /** 3559 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3560 * @dev: drm device pointer 3561 * @data: ioctl data blob 3562 * @file: drm file pointer 3563 * 3564 * Returns 0 if successful, else an error is returned with the remaining time in 3565 * the timeout parameter. 3566 * -ETIME: object is still busy after timeout 3567 * -ERESTARTSYS: signal interrupted the wait 3568 * -ENONENT: object doesn't exist 3569 * Also possible, but rare: 3570 * -EAGAIN: incomplete, restart syscall 3571 * -ENOMEM: damn 3572 * -ENODEV: Internal IRQ fail 3573 * -E?: The add request failed 3574 * 3575 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3576 * non-zero timeout parameter the wait ioctl will wait for the given number of 3577 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3578 * without holding struct_mutex the object may become re-busied before this 3579 * function completes. A similar but shorter * race condition exists in the busy 3580 * ioctl 3581 */ 3582 int 3583 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3584 { 3585 struct drm_i915_gem_wait *args = data; 3586 struct drm_i915_gem_object *obj; 3587 ktime_t start; 3588 long ret; 3589 3590 if (args->flags != 0) 3591 return -EINVAL; 3592 3593 obj = i915_gem_object_lookup(file, args->bo_handle); 3594 if (!obj) 3595 return -ENOENT; 3596 3597 start = ktime_get(); 3598 3599 ret = i915_gem_object_wait(obj, 3600 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3601 to_wait_timeout(args->timeout_ns), 3602 to_rps_client(file)); 3603 3604 if (args->timeout_ns > 0) { 3605 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3606 if (args->timeout_ns < 0) 3607 args->timeout_ns = 0; 3608 3609 /* 3610 * Apparently ktime isn't accurate enough and occasionally has a 3611 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3612 * things up to make the test happy. We allow up to 1 jiffy. 3613 * 3614 * This is a regression from the timespec->ktime conversion. 3615 */ 3616 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3617 args->timeout_ns = 0; 3618 3619 /* Asked to wait beyond the jiffie/scheduler precision? */ 3620 if (ret == -ETIME && args->timeout_ns) 3621 ret = -EAGAIN; 3622 } 3623 3624 i915_gem_object_put(obj); 3625 return ret; 3626 } 3627 3628 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) 3629 { 3630 int ret, i; 3631 3632 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3633 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); 3634 if (ret) 3635 return ret; 3636 } 3637 3638 return 0; 3639 } 3640 3641 static int wait_for_engines(struct drm_i915_private *i915) 3642 { 3643 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3644 dev_err(i915->drm.dev, 3645 "Failed to idle engines, declaring wedged!\n"); 3646 if (drm_debug & DRM_UT_DRIVER) { 3647 struct drm_printer p = drm_debug_printer(__func__); 3648 struct intel_engine_cs *engine; 3649 enum intel_engine_id id; 3650 3651 for_each_engine(engine, i915, id) 3652 intel_engine_dump(engine, &p, 3653 "%s\n", engine->name); 3654 } 3655 3656 i915_gem_set_wedged(i915); 3657 return -EIO; 3658 } 3659 3660 return 0; 3661 } 3662 3663 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3664 { 3665 int ret; 3666 3667 /* If the device is asleep, we have no requests outstanding */ 3668 if (!READ_ONCE(i915->gt.awake)) 3669 return 0; 3670 3671 if (flags & I915_WAIT_LOCKED) { 3672 struct i915_gem_timeline *tl; 3673 3674 lockdep_assert_held(&i915->drm.struct_mutex); 3675 3676 list_for_each_entry(tl, &i915->gt.timelines, link) { 3677 ret = wait_for_timeline(tl, flags); 3678 if (ret) 3679 return ret; 3680 } 3681 i915_gem_retire_requests(i915); 3682 3683 ret = wait_for_engines(i915); 3684 } else { 3685 ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3686 } 3687 3688 return ret; 3689 } 3690 3691 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3692 { 3693 /* 3694 * We manually flush the CPU domain so that we can override and 3695 * force the flush for the display, and perform it asyncrhonously. 3696 */ 3697 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3698 if (obj->cache_dirty) 3699 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3700 obj->base.write_domain = 0; 3701 } 3702 3703 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3704 { 3705 if (!READ_ONCE(obj->pin_global)) 3706 return; 3707 3708 mutex_lock(&obj->base.dev->struct_mutex); 3709 __i915_gem_object_flush_for_display(obj); 3710 mutex_unlock(&obj->base.dev->struct_mutex); 3711 } 3712 3713 /** 3714 * Moves a single object to the WC read, and possibly write domain. 3715 * @obj: object to act on 3716 * @write: ask for write access or read only 3717 * 3718 * This function returns when the move is complete, including waiting on 3719 * flushes to occur. 3720 */ 3721 int 3722 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3723 { 3724 int ret; 3725 3726 lockdep_assert_held(&obj->base.dev->struct_mutex); 3727 3728 ret = i915_gem_object_wait(obj, 3729 I915_WAIT_INTERRUPTIBLE | 3730 I915_WAIT_LOCKED | 3731 (write ? I915_WAIT_ALL : 0), 3732 MAX_SCHEDULE_TIMEOUT, 3733 NULL); 3734 if (ret) 3735 return ret; 3736 3737 if (obj->base.write_domain == I915_GEM_DOMAIN_WC) 3738 return 0; 3739 3740 /* Flush and acquire obj->pages so that we are coherent through 3741 * direct access in memory with previous cached writes through 3742 * shmemfs and that our cache domain tracking remains valid. 3743 * For example, if the obj->filp was moved to swap without us 3744 * being notified and releasing the pages, we would mistakenly 3745 * continue to assume that the obj remained out of the CPU cached 3746 * domain. 3747 */ 3748 ret = i915_gem_object_pin_pages(obj); 3749 if (ret) 3750 return ret; 3751 3752 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3753 3754 /* Serialise direct access to this object with the barriers for 3755 * coherent writes from the GPU, by effectively invalidating the 3756 * WC domain upon first access. 3757 */ 3758 if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) 3759 mb(); 3760 3761 /* It should now be out of any other write domains, and we can update 3762 * the domain values for our changes. 3763 */ 3764 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3765 obj->base.read_domains |= I915_GEM_DOMAIN_WC; 3766 if (write) { 3767 obj->base.read_domains = I915_GEM_DOMAIN_WC; 3768 obj->base.write_domain = I915_GEM_DOMAIN_WC; 3769 obj->mm.dirty = true; 3770 } 3771 3772 i915_gem_object_unpin_pages(obj); 3773 return 0; 3774 } 3775 3776 /** 3777 * Moves a single object to the GTT read, and possibly write domain. 3778 * @obj: object to act on 3779 * @write: ask for write access or read only 3780 * 3781 * This function returns when the move is complete, including waiting on 3782 * flushes to occur. 3783 */ 3784 int 3785 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3786 { 3787 int ret; 3788 3789 lockdep_assert_held(&obj->base.dev->struct_mutex); 3790 3791 ret = i915_gem_object_wait(obj, 3792 I915_WAIT_INTERRUPTIBLE | 3793 I915_WAIT_LOCKED | 3794 (write ? I915_WAIT_ALL : 0), 3795 MAX_SCHEDULE_TIMEOUT, 3796 NULL); 3797 if (ret) 3798 return ret; 3799 3800 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3801 return 0; 3802 3803 /* Flush and acquire obj->pages so that we are coherent through 3804 * direct access in memory with previous cached writes through 3805 * shmemfs and that our cache domain tracking remains valid. 3806 * For example, if the obj->filp was moved to swap without us 3807 * being notified and releasing the pages, we would mistakenly 3808 * continue to assume that the obj remained out of the CPU cached 3809 * domain. 3810 */ 3811 ret = i915_gem_object_pin_pages(obj); 3812 if (ret) 3813 return ret; 3814 3815 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3816 3817 /* Serialise direct access to this object with the barriers for 3818 * coherent writes from the GPU, by effectively invalidating the 3819 * GTT domain upon first access. 3820 */ 3821 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3822 mb(); 3823 3824 /* It should now be out of any other write domains, and we can update 3825 * the domain values for our changes. 3826 */ 3827 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3828 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3829 if (write) { 3830 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3831 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3832 obj->mm.dirty = true; 3833 } 3834 3835 i915_gem_object_unpin_pages(obj); 3836 return 0; 3837 } 3838 3839 /** 3840 * Changes the cache-level of an object across all VMA. 3841 * @obj: object to act on 3842 * @cache_level: new cache level to set for the object 3843 * 3844 * After this function returns, the object will be in the new cache-level 3845 * across all GTT and the contents of the backing storage will be coherent, 3846 * with respect to the new cache-level. In order to keep the backing storage 3847 * coherent for all users, we only allow a single cache level to be set 3848 * globally on the object and prevent it from being changed whilst the 3849 * hardware is reading from the object. That is if the object is currently 3850 * on the scanout it will be set to uncached (or equivalent display 3851 * cache coherency) and all non-MOCS GPU access will also be uncached so 3852 * that all direct access to the scanout remains coherent. 3853 */ 3854 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3855 enum i915_cache_level cache_level) 3856 { 3857 struct i915_vma *vma; 3858 int ret; 3859 3860 lockdep_assert_held(&obj->base.dev->struct_mutex); 3861 3862 if (obj->cache_level == cache_level) 3863 return 0; 3864 3865 /* Inspect the list of currently bound VMA and unbind any that would 3866 * be invalid given the new cache-level. This is principally to 3867 * catch the issue of the CS prefetch crossing page boundaries and 3868 * reading an invalid PTE on older architectures. 3869 */ 3870 restart: 3871 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3872 if (!drm_mm_node_allocated(&vma->node)) 3873 continue; 3874 3875 if (i915_vma_is_pinned(vma)) { 3876 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3877 return -EBUSY; 3878 } 3879 3880 if (!i915_vma_is_closed(vma) && 3881 i915_gem_valid_gtt_space(vma, cache_level)) 3882 continue; 3883 3884 ret = i915_vma_unbind(vma); 3885 if (ret) 3886 return ret; 3887 3888 /* As unbinding may affect other elements in the 3889 * obj->vma_list (due to side-effects from retiring 3890 * an active vma), play safe and restart the iterator. 3891 */ 3892 goto restart; 3893 } 3894 3895 /* We can reuse the existing drm_mm nodes but need to change the 3896 * cache-level on the PTE. We could simply unbind them all and 3897 * rebind with the correct cache-level on next use. However since 3898 * we already have a valid slot, dma mapping, pages etc, we may as 3899 * rewrite the PTE in the belief that doing so tramples upon less 3900 * state and so involves less work. 3901 */ 3902 if (obj->bind_count) { 3903 /* Before we change the PTE, the GPU must not be accessing it. 3904 * If we wait upon the object, we know that all the bound 3905 * VMA are no longer active. 3906 */ 3907 ret = i915_gem_object_wait(obj, 3908 I915_WAIT_INTERRUPTIBLE | 3909 I915_WAIT_LOCKED | 3910 I915_WAIT_ALL, 3911 MAX_SCHEDULE_TIMEOUT, 3912 NULL); 3913 if (ret) 3914 return ret; 3915 3916 if (!HAS_LLC(to_i915(obj->base.dev)) && 3917 cache_level != I915_CACHE_NONE) { 3918 /* Access to snoopable pages through the GTT is 3919 * incoherent and on some machines causes a hard 3920 * lockup. Relinquish the CPU mmaping to force 3921 * userspace to refault in the pages and we can 3922 * then double check if the GTT mapping is still 3923 * valid for that pointer access. 3924 */ 3925 i915_gem_release_mmap(obj); 3926 3927 /* As we no longer need a fence for GTT access, 3928 * we can relinquish it now (and so prevent having 3929 * to steal a fence from someone else on the next 3930 * fence request). Note GPU activity would have 3931 * dropped the fence as all snoopable access is 3932 * supposed to be linear. 3933 */ 3934 for_each_ggtt_vma(vma, obj) { 3935 ret = i915_vma_put_fence(vma); 3936 if (ret) 3937 return ret; 3938 } 3939 } else { 3940 /* We either have incoherent backing store and 3941 * so no GTT access or the architecture is fully 3942 * coherent. In such cases, existing GTT mmaps 3943 * ignore the cache bit in the PTE and we can 3944 * rewrite it without confusing the GPU or having 3945 * to force userspace to fault back in its mmaps. 3946 */ 3947 } 3948 3949 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3950 if (!drm_mm_node_allocated(&vma->node)) 3951 continue; 3952 3953 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3954 if (ret) 3955 return ret; 3956 } 3957 } 3958 3959 list_for_each_entry(vma, &obj->vma_list, obj_link) 3960 vma->node.color = cache_level; 3961 i915_gem_object_set_cache_coherency(obj, cache_level); 3962 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3963 3964 return 0; 3965 } 3966 3967 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3968 struct drm_file *file) 3969 { 3970 struct drm_i915_gem_caching *args = data; 3971 struct drm_i915_gem_object *obj; 3972 int err = 0; 3973 3974 rcu_read_lock(); 3975 obj = i915_gem_object_lookup_rcu(file, args->handle); 3976 if (!obj) { 3977 err = -ENOENT; 3978 goto out; 3979 } 3980 3981 switch (obj->cache_level) { 3982 case I915_CACHE_LLC: 3983 case I915_CACHE_L3_LLC: 3984 args->caching = I915_CACHING_CACHED; 3985 break; 3986 3987 case I915_CACHE_WT: 3988 args->caching = I915_CACHING_DISPLAY; 3989 break; 3990 3991 default: 3992 args->caching = I915_CACHING_NONE; 3993 break; 3994 } 3995 out: 3996 rcu_read_unlock(); 3997 return err; 3998 } 3999 4000 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4001 struct drm_file *file) 4002 { 4003 struct drm_i915_private *i915 = to_i915(dev); 4004 struct drm_i915_gem_caching *args = data; 4005 struct drm_i915_gem_object *obj; 4006 enum i915_cache_level level; 4007 int ret = 0; 4008 4009 switch (args->caching) { 4010 case I915_CACHING_NONE: 4011 level = I915_CACHE_NONE; 4012 break; 4013 case I915_CACHING_CACHED: 4014 /* 4015 * Due to a HW issue on BXT A stepping, GPU stores via a 4016 * snooped mapping may leave stale data in a corresponding CPU 4017 * cacheline, whereas normally such cachelines would get 4018 * invalidated. 4019 */ 4020 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4021 return -ENODEV; 4022 4023 level = I915_CACHE_LLC; 4024 break; 4025 case I915_CACHING_DISPLAY: 4026 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4027 break; 4028 default: 4029 return -EINVAL; 4030 } 4031 4032 obj = i915_gem_object_lookup(file, args->handle); 4033 if (!obj) 4034 return -ENOENT; 4035 4036 /* 4037 * The caching mode of proxy object is handled by its generator, and 4038 * not allowed to be changed by userspace. 4039 */ 4040 if (i915_gem_object_is_proxy(obj)) { 4041 ret = -ENXIO; 4042 goto out; 4043 } 4044 4045 if (obj->cache_level == level) 4046 goto out; 4047 4048 ret = i915_gem_object_wait(obj, 4049 I915_WAIT_INTERRUPTIBLE, 4050 MAX_SCHEDULE_TIMEOUT, 4051 to_rps_client(file)); 4052 if (ret) 4053 goto out; 4054 4055 ret = i915_mutex_lock_interruptible(dev); 4056 if (ret) 4057 goto out; 4058 4059 ret = i915_gem_object_set_cache_level(obj, level); 4060 mutex_unlock(&dev->struct_mutex); 4061 4062 out: 4063 i915_gem_object_put(obj); 4064 return ret; 4065 } 4066 4067 /* 4068 * Prepare buffer for display plane (scanout, cursors, etc). 4069 * Can be called from an uninterruptible phase (modesetting) and allows 4070 * any flushes to be pipelined (for pageflips). 4071 */ 4072 struct i915_vma * 4073 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4074 u32 alignment, 4075 const struct i915_ggtt_view *view) 4076 { 4077 struct i915_vma *vma; 4078 int ret; 4079 4080 lockdep_assert_held(&obj->base.dev->struct_mutex); 4081 4082 /* Mark the global pin early so that we account for the 4083 * display coherency whilst setting up the cache domains. 4084 */ 4085 obj->pin_global++; 4086 4087 /* The display engine is not coherent with the LLC cache on gen6. As 4088 * a result, we make sure that the pinning that is about to occur is 4089 * done with uncached PTEs. This is lowest common denominator for all 4090 * chipsets. 4091 * 4092 * However for gen6+, we could do better by using the GFDT bit instead 4093 * of uncaching, which would allow us to flush all the LLC-cached data 4094 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4095 */ 4096 ret = i915_gem_object_set_cache_level(obj, 4097 HAS_WT(to_i915(obj->base.dev)) ? 4098 I915_CACHE_WT : I915_CACHE_NONE); 4099 if (ret) { 4100 vma = ERR_PTR(ret); 4101 goto err_unpin_global; 4102 } 4103 4104 /* As the user may map the buffer once pinned in the display plane 4105 * (e.g. libkms for the bootup splash), we have to ensure that we 4106 * always use map_and_fenceable for all scanout buffers. However, 4107 * it may simply be too big to fit into mappable, in which case 4108 * put it anyway and hope that userspace can cope (but always first 4109 * try to preserve the existing ABI). 4110 */ 4111 vma = ERR_PTR(-ENOSPC); 4112 if (!view || view->type == I915_GGTT_VIEW_NORMAL) 4113 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4114 PIN_MAPPABLE | PIN_NONBLOCK); 4115 if (IS_ERR(vma)) { 4116 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4117 unsigned int flags; 4118 4119 /* Valleyview is definitely limited to scanning out the first 4120 * 512MiB. Lets presume this behaviour was inherited from the 4121 * g4x display engine and that all earlier gen are similarly 4122 * limited. Testing suggests that it is a little more 4123 * complicated than this. For example, Cherryview appears quite 4124 * happy to scanout from anywhere within its global aperture. 4125 */ 4126 flags = 0; 4127 if (HAS_GMCH_DISPLAY(i915)) 4128 flags = PIN_MAPPABLE; 4129 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4130 } 4131 if (IS_ERR(vma)) 4132 goto err_unpin_global; 4133 4134 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4135 4136 /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ 4137 __i915_gem_object_flush_for_display(obj); 4138 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); 4139 4140 /* It should now be out of any other write domains, and we can update 4141 * the domain values for our changes. 4142 */ 4143 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4144 4145 return vma; 4146 4147 err_unpin_global: 4148 obj->pin_global--; 4149 return vma; 4150 } 4151 4152 void 4153 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4154 { 4155 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4156 4157 if (WARN_ON(vma->obj->pin_global == 0)) 4158 return; 4159 4160 if (--vma->obj->pin_global == 0) 4161 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4162 4163 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4164 i915_gem_object_bump_inactive_ggtt(vma->obj); 4165 4166 i915_vma_unpin(vma); 4167 } 4168 4169 /** 4170 * Moves a single object to the CPU read, and possibly write domain. 4171 * @obj: object to act on 4172 * @write: requesting write or read-only access 4173 * 4174 * This function returns when the move is complete, including waiting on 4175 * flushes to occur. 4176 */ 4177 int 4178 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4179 { 4180 int ret; 4181 4182 lockdep_assert_held(&obj->base.dev->struct_mutex); 4183 4184 ret = i915_gem_object_wait(obj, 4185 I915_WAIT_INTERRUPTIBLE | 4186 I915_WAIT_LOCKED | 4187 (write ? I915_WAIT_ALL : 0), 4188 MAX_SCHEDULE_TIMEOUT, 4189 NULL); 4190 if (ret) 4191 return ret; 4192 4193 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4194 4195 /* Flush the CPU cache if it's still invalid. */ 4196 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4197 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4198 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4199 } 4200 4201 /* It should now be out of any other write domains, and we can update 4202 * the domain values for our changes. 4203 */ 4204 GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 4205 4206 /* If we're writing through the CPU, then the GPU read domains will 4207 * need to be invalidated at next use. 4208 */ 4209 if (write) 4210 __start_cpu_write(obj); 4211 4212 return 0; 4213 } 4214 4215 /* Throttle our rendering by waiting until the ring has completed our requests 4216 * emitted over 20 msec ago. 4217 * 4218 * Note that if we were to use the current jiffies each time around the loop, 4219 * we wouldn't escape the function with any frames outstanding if the time to 4220 * render a frame was over 20ms. 4221 * 4222 * This should get us reasonable parallelism between CPU and GPU but also 4223 * relatively low latency when blocking on a particular request to finish. 4224 */ 4225 static int 4226 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4227 { 4228 struct drm_i915_private *dev_priv = to_i915(dev); 4229 struct drm_i915_file_private *file_priv = file->driver_priv; 4230 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4231 struct drm_i915_gem_request *request, *target = NULL; 4232 long ret; 4233 4234 /* ABI: return -EIO if already wedged */ 4235 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4236 return -EIO; 4237 4238 spin_lock(&file_priv->mm.lock); 4239 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4240 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4241 break; 4242 4243 if (target) { 4244 list_del(&target->client_link); 4245 target->file_priv = NULL; 4246 } 4247 4248 target = request; 4249 } 4250 if (target) 4251 i915_gem_request_get(target); 4252 spin_unlock(&file_priv->mm.lock); 4253 4254 if (target == NULL) 4255 return 0; 4256 4257 ret = i915_wait_request(target, 4258 I915_WAIT_INTERRUPTIBLE, 4259 MAX_SCHEDULE_TIMEOUT); 4260 i915_gem_request_put(target); 4261 4262 return ret < 0 ? ret : 0; 4263 } 4264 4265 struct i915_vma * 4266 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4267 const struct i915_ggtt_view *view, 4268 u64 size, 4269 u64 alignment, 4270 u64 flags) 4271 { 4272 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4273 struct i915_address_space *vm = &dev_priv->ggtt.base; 4274 struct i915_vma *vma; 4275 int ret; 4276 4277 lockdep_assert_held(&obj->base.dev->struct_mutex); 4278 4279 if (!view && flags & PIN_MAPPABLE) { 4280 /* If the required space is larger than the available 4281 * aperture, we will not able to find a slot for the 4282 * object and unbinding the object now will be in 4283 * vain. Worse, doing so may cause us to ping-pong 4284 * the object in and out of the Global GTT and 4285 * waste a lot of cycles under the mutex. 4286 */ 4287 if (obj->base.size > dev_priv->ggtt.mappable_end) 4288 return ERR_PTR(-E2BIG); 4289 4290 /* If NONBLOCK is set the caller is optimistically 4291 * trying to cache the full object within the mappable 4292 * aperture, and *must* have a fallback in place for 4293 * situations where we cannot bind the object. We 4294 * can be a little more lax here and use the fallback 4295 * more often to avoid costly migrations of ourselves 4296 * and other objects within the aperture. 4297 * 4298 * Half-the-aperture is used as a simple heuristic. 4299 * More interesting would to do search for a free 4300 * block prior to making the commitment to unbind. 4301 * That caters for the self-harm case, and with a 4302 * little more heuristics (e.g. NOFAULT, NOEVICT) 4303 * we could try to minimise harm to others. 4304 */ 4305 if (flags & PIN_NONBLOCK && 4306 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4307 return ERR_PTR(-ENOSPC); 4308 } 4309 4310 vma = i915_vma_instance(obj, vm, view); 4311 if (unlikely(IS_ERR(vma))) 4312 return vma; 4313 4314 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4315 if (flags & PIN_NONBLOCK) { 4316 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4317 return ERR_PTR(-ENOSPC); 4318 4319 if (flags & PIN_MAPPABLE && 4320 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4321 return ERR_PTR(-ENOSPC); 4322 } 4323 4324 WARN(i915_vma_is_pinned(vma), 4325 "bo is already pinned in ggtt with incorrect alignment:" 4326 " offset=%08x, req.alignment=%llx," 4327 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4328 i915_ggtt_offset(vma), alignment, 4329 !!(flags & PIN_MAPPABLE), 4330 i915_vma_is_map_and_fenceable(vma)); 4331 ret = i915_vma_unbind(vma); 4332 if (ret) 4333 return ERR_PTR(ret); 4334 } 4335 4336 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4337 if (ret) 4338 return ERR_PTR(ret); 4339 4340 return vma; 4341 } 4342 4343 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4344 { 4345 /* Note that we could alias engines in the execbuf API, but 4346 * that would be very unwise as it prevents userspace from 4347 * fine control over engine selection. Ahem. 4348 * 4349 * This should be something like EXEC_MAX_ENGINE instead of 4350 * I915_NUM_ENGINES. 4351 */ 4352 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4353 return 0x10000 << id; 4354 } 4355 4356 static __always_inline unsigned int __busy_write_id(unsigned int id) 4357 { 4358 /* The uABI guarantees an active writer is also amongst the read 4359 * engines. This would be true if we accessed the activity tracking 4360 * under the lock, but as we perform the lookup of the object and 4361 * its activity locklessly we can not guarantee that the last_write 4362 * being active implies that we have set the same engine flag from 4363 * last_read - hence we always set both read and write busy for 4364 * last_write. 4365 */ 4366 return id | __busy_read_flag(id); 4367 } 4368 4369 static __always_inline unsigned int 4370 __busy_set_if_active(const struct dma_fence *fence, 4371 unsigned int (*flag)(unsigned int id)) 4372 { 4373 struct drm_i915_gem_request *rq; 4374 4375 /* We have to check the current hw status of the fence as the uABI 4376 * guarantees forward progress. We could rely on the idle worker 4377 * to eventually flush us, but to minimise latency just ask the 4378 * hardware. 4379 * 4380 * Note we only report on the status of native fences. 4381 */ 4382 if (!dma_fence_is_i915(fence)) 4383 return 0; 4384 4385 /* opencode to_request() in order to avoid const warnings */ 4386 rq = container_of(fence, struct drm_i915_gem_request, fence); 4387 if (i915_gem_request_completed(rq)) 4388 return 0; 4389 4390 return flag(rq->engine->uabi_id); 4391 } 4392 4393 static __always_inline unsigned int 4394 busy_check_reader(const struct dma_fence *fence) 4395 { 4396 return __busy_set_if_active(fence, __busy_read_flag); 4397 } 4398 4399 static __always_inline unsigned int 4400 busy_check_writer(const struct dma_fence *fence) 4401 { 4402 if (!fence) 4403 return 0; 4404 4405 return __busy_set_if_active(fence, __busy_write_id); 4406 } 4407 4408 int 4409 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4410 struct drm_file *file) 4411 { 4412 struct drm_i915_gem_busy *args = data; 4413 struct drm_i915_gem_object *obj; 4414 struct reservation_object_list *list; 4415 unsigned int seq; 4416 int err; 4417 4418 err = -ENOENT; 4419 rcu_read_lock(); 4420 obj = i915_gem_object_lookup_rcu(file, args->handle); 4421 if (!obj) 4422 goto out; 4423 4424 /* A discrepancy here is that we do not report the status of 4425 * non-i915 fences, i.e. even though we may report the object as idle, 4426 * a call to set-domain may still stall waiting for foreign rendering. 4427 * This also means that wait-ioctl may report an object as busy, 4428 * where busy-ioctl considers it idle. 4429 * 4430 * We trade the ability to warn of foreign fences to report on which 4431 * i915 engines are active for the object. 4432 * 4433 * Alternatively, we can trade that extra information on read/write 4434 * activity with 4435 * args->busy = 4436 * !reservation_object_test_signaled_rcu(obj->resv, true); 4437 * to report the overall busyness. This is what the wait-ioctl does. 4438 * 4439 */ 4440 retry: 4441 seq = raw_read_seqcount(&obj->resv->seq); 4442 4443 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4444 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4445 4446 /* Translate shared fences to READ set of engines */ 4447 list = rcu_dereference(obj->resv->fence); 4448 if (list) { 4449 unsigned int shared_count = list->shared_count, i; 4450 4451 for (i = 0; i < shared_count; ++i) { 4452 struct dma_fence *fence = 4453 rcu_dereference(list->shared[i]); 4454 4455 args->busy |= busy_check_reader(fence); 4456 } 4457 } 4458 4459 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4460 goto retry; 4461 4462 err = 0; 4463 out: 4464 rcu_read_unlock(); 4465 return err; 4466 } 4467 4468 int 4469 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4470 struct drm_file *file_priv) 4471 { 4472 return i915_gem_ring_throttle(dev, file_priv); 4473 } 4474 4475 int 4476 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4477 struct drm_file *file_priv) 4478 { 4479 struct drm_i915_private *dev_priv = to_i915(dev); 4480 struct drm_i915_gem_madvise *args = data; 4481 struct drm_i915_gem_object *obj; 4482 int err; 4483 4484 switch (args->madv) { 4485 case I915_MADV_DONTNEED: 4486 case I915_MADV_WILLNEED: 4487 break; 4488 default: 4489 return -EINVAL; 4490 } 4491 4492 obj = i915_gem_object_lookup(file_priv, args->handle); 4493 if (!obj) 4494 return -ENOENT; 4495 4496 err = mutex_lock_interruptible(&obj->mm.lock); 4497 if (err) 4498 goto out; 4499 4500 if (i915_gem_object_has_pages(obj) && 4501 i915_gem_object_is_tiled(obj) && 4502 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4503 if (obj->mm.madv == I915_MADV_WILLNEED) { 4504 GEM_BUG_ON(!obj->mm.quirked); 4505 __i915_gem_object_unpin_pages(obj); 4506 obj->mm.quirked = false; 4507 } 4508 if (args->madv == I915_MADV_WILLNEED) { 4509 GEM_BUG_ON(obj->mm.quirked); 4510 __i915_gem_object_pin_pages(obj); 4511 obj->mm.quirked = true; 4512 } 4513 } 4514 4515 if (obj->mm.madv != __I915_MADV_PURGED) 4516 obj->mm.madv = args->madv; 4517 4518 /* if the object is no longer attached, discard its backing storage */ 4519 if (obj->mm.madv == I915_MADV_DONTNEED && 4520 !i915_gem_object_has_pages(obj)) 4521 i915_gem_object_truncate(obj); 4522 4523 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4524 mutex_unlock(&obj->mm.lock); 4525 4526 out: 4527 i915_gem_object_put(obj); 4528 return err; 4529 } 4530 4531 static void 4532 frontbuffer_retire(struct i915_gem_active *active, 4533 struct drm_i915_gem_request *request) 4534 { 4535 struct drm_i915_gem_object *obj = 4536 container_of(active, typeof(*obj), frontbuffer_write); 4537 4538 intel_fb_obj_flush(obj, ORIGIN_CS); 4539 } 4540 4541 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4542 const struct drm_i915_gem_object_ops *ops) 4543 { 4544 mutex_init(&obj->mm.lock); 4545 4546 INIT_LIST_HEAD(&obj->vma_list); 4547 INIT_LIST_HEAD(&obj->lut_list); 4548 INIT_LIST_HEAD(&obj->batch_pool_link); 4549 4550 obj->ops = ops; 4551 4552 reservation_object_init(&obj->__builtin_resv); 4553 obj->resv = &obj->__builtin_resv; 4554 4555 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4556 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4557 4558 obj->mm.madv = I915_MADV_WILLNEED; 4559 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4560 mutex_init(&obj->mm.get_page.lock); 4561 4562 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4563 } 4564 4565 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4566 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4567 I915_GEM_OBJECT_IS_SHRINKABLE, 4568 4569 .get_pages = i915_gem_object_get_pages_gtt, 4570 .put_pages = i915_gem_object_put_pages_gtt, 4571 4572 .pwrite = i915_gem_object_pwrite_gtt, 4573 }; 4574 4575 static int i915_gem_object_create_shmem(struct drm_device *dev, 4576 struct drm_gem_object *obj, 4577 size_t size) 4578 { 4579 struct drm_i915_private *i915 = to_i915(dev); 4580 unsigned long flags = VM_NORESERVE; 4581 struct file *filp; 4582 4583 drm_gem_private_object_init(dev, obj, size); 4584 4585 if (i915->mm.gemfs) 4586 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4587 flags); 4588 else 4589 filp = shmem_file_setup("i915", size, flags); 4590 4591 if (IS_ERR(filp)) 4592 return PTR_ERR(filp); 4593 4594 obj->filp = filp; 4595 4596 return 0; 4597 } 4598 4599 struct drm_i915_gem_object * 4600 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4601 { 4602 struct drm_i915_gem_object *obj; 4603 struct address_space *mapping; 4604 unsigned int cache_level; 4605 gfp_t mask; 4606 int ret; 4607 4608 /* There is a prevalence of the assumption that we fit the object's 4609 * page count inside a 32bit _signed_ variable. Let's document this and 4610 * catch if we ever need to fix it. In the meantime, if you do spot 4611 * such a local variable, please consider fixing! 4612 */ 4613 if (size >> PAGE_SHIFT > INT_MAX) 4614 return ERR_PTR(-E2BIG); 4615 4616 if (overflows_type(size, obj->base.size)) 4617 return ERR_PTR(-E2BIG); 4618 4619 obj = i915_gem_object_alloc(dev_priv); 4620 if (obj == NULL) 4621 return ERR_PTR(-ENOMEM); 4622 4623 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4624 if (ret) 4625 goto fail; 4626 4627 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4628 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4629 /* 965gm cannot relocate objects above 4GiB. */ 4630 mask &= ~__GFP_HIGHMEM; 4631 mask |= __GFP_DMA32; 4632 } 4633 4634 mapping = obj->base.filp->f_mapping; 4635 mapping_set_gfp_mask(mapping, mask); 4636 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4637 4638 i915_gem_object_init(obj, &i915_gem_object_ops); 4639 4640 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4641 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4642 4643 if (HAS_LLC(dev_priv)) 4644 /* On some devices, we can have the GPU use the LLC (the CPU 4645 * cache) for about a 10% performance improvement 4646 * compared to uncached. Graphics requests other than 4647 * display scanout are coherent with the CPU in 4648 * accessing this cache. This means in this mode we 4649 * don't need to clflush on the CPU side, and on the 4650 * GPU side we only need to flush internal caches to 4651 * get data visible to the CPU. 4652 * 4653 * However, we maintain the display planes as UC, and so 4654 * need to rebind when first used as such. 4655 */ 4656 cache_level = I915_CACHE_LLC; 4657 else 4658 cache_level = I915_CACHE_NONE; 4659 4660 i915_gem_object_set_cache_coherency(obj, cache_level); 4661 4662 trace_i915_gem_object_create(obj); 4663 4664 return obj; 4665 4666 fail: 4667 i915_gem_object_free(obj); 4668 return ERR_PTR(ret); 4669 } 4670 4671 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4672 { 4673 /* If we are the last user of the backing storage (be it shmemfs 4674 * pages or stolen etc), we know that the pages are going to be 4675 * immediately released. In this case, we can then skip copying 4676 * back the contents from the GPU. 4677 */ 4678 4679 if (obj->mm.madv != I915_MADV_WILLNEED) 4680 return false; 4681 4682 if (obj->base.filp == NULL) 4683 return true; 4684 4685 /* At first glance, this looks racy, but then again so would be 4686 * userspace racing mmap against close. However, the first external 4687 * reference to the filp can only be obtained through the 4688 * i915_gem_mmap_ioctl() which safeguards us against the user 4689 * acquiring such a reference whilst we are in the middle of 4690 * freeing the object. 4691 */ 4692 return atomic_long_read(&obj->base.filp->f_count) == 1; 4693 } 4694 4695 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4696 struct llist_node *freed) 4697 { 4698 struct drm_i915_gem_object *obj, *on; 4699 4700 intel_runtime_pm_get(i915); 4701 llist_for_each_entry_safe(obj, on, freed, freed) { 4702 struct i915_vma *vma, *vn; 4703 4704 trace_i915_gem_object_destroy(obj); 4705 4706 mutex_lock(&i915->drm.struct_mutex); 4707 4708 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4709 list_for_each_entry_safe(vma, vn, 4710 &obj->vma_list, obj_link) { 4711 GEM_BUG_ON(i915_vma_is_active(vma)); 4712 vma->flags &= ~I915_VMA_PIN_MASK; 4713 i915_vma_close(vma); 4714 } 4715 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4716 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4717 4718 /* This serializes freeing with the shrinker. Since the free 4719 * is delayed, first by RCU then by the workqueue, we want the 4720 * shrinker to be able to free pages of unreferenced objects, 4721 * or else we may oom whilst there are plenty of deferred 4722 * freed objects. 4723 */ 4724 if (i915_gem_object_has_pages(obj)) { 4725 spin_lock(&i915->mm.obj_lock); 4726 list_del_init(&obj->mm.link); 4727 spin_unlock(&i915->mm.obj_lock); 4728 } 4729 4730 mutex_unlock(&i915->drm.struct_mutex); 4731 4732 GEM_BUG_ON(obj->bind_count); 4733 GEM_BUG_ON(obj->userfault_count); 4734 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4735 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4736 4737 if (obj->ops->release) 4738 obj->ops->release(obj); 4739 4740 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4741 atomic_set(&obj->mm.pages_pin_count, 0); 4742 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4743 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4744 4745 if (obj->base.import_attach) 4746 drm_prime_gem_destroy(&obj->base, NULL); 4747 4748 reservation_object_fini(&obj->__builtin_resv); 4749 drm_gem_object_release(&obj->base); 4750 i915_gem_info_remove_obj(i915, obj->base.size); 4751 4752 kfree(obj->bit_17); 4753 i915_gem_object_free(obj); 4754 4755 if (on) 4756 cond_resched(); 4757 } 4758 intel_runtime_pm_put(i915); 4759 } 4760 4761 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4762 { 4763 struct llist_node *freed; 4764 4765 /* Free the oldest, most stale object to keep the free_list short */ 4766 freed = NULL; 4767 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4768 /* Only one consumer of llist_del_first() allowed */ 4769 spin_lock(&i915->mm.free_lock); 4770 freed = llist_del_first(&i915->mm.free_list); 4771 spin_unlock(&i915->mm.free_lock); 4772 } 4773 if (unlikely(freed)) { 4774 freed->next = NULL; 4775 __i915_gem_free_objects(i915, freed); 4776 } 4777 } 4778 4779 static void __i915_gem_free_work(struct work_struct *work) 4780 { 4781 struct drm_i915_private *i915 = 4782 container_of(work, struct drm_i915_private, mm.free_work); 4783 struct llist_node *freed; 4784 4785 /* 4786 * All file-owned VMA should have been released by this point through 4787 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4788 * However, the object may also be bound into the global GTT (e.g. 4789 * older GPUs without per-process support, or for direct access through 4790 * the GTT either for the user or for scanout). Those VMA still need to 4791 * unbound now. 4792 */ 4793 4794 spin_lock(&i915->mm.free_lock); 4795 while ((freed = llist_del_all(&i915->mm.free_list))) { 4796 spin_unlock(&i915->mm.free_lock); 4797 4798 __i915_gem_free_objects(i915, freed); 4799 if (need_resched()) 4800 return; 4801 4802 spin_lock(&i915->mm.free_lock); 4803 } 4804 spin_unlock(&i915->mm.free_lock); 4805 } 4806 4807 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4808 { 4809 struct drm_i915_gem_object *obj = 4810 container_of(head, typeof(*obj), rcu); 4811 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4812 4813 /* 4814 * Since we require blocking on struct_mutex to unbind the freed 4815 * object from the GPU before releasing resources back to the 4816 * system, we can not do that directly from the RCU callback (which may 4817 * be a softirq context), but must instead then defer that work onto a 4818 * kthread. We use the RCU callback rather than move the freed object 4819 * directly onto the work queue so that we can mix between using the 4820 * worker and performing frees directly from subsequent allocations for 4821 * crude but effective memory throttling. 4822 */ 4823 if (llist_add(&obj->freed, &i915->mm.free_list)) 4824 queue_work(i915->wq, &i915->mm.free_work); 4825 } 4826 4827 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4828 { 4829 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4830 4831 if (obj->mm.quirked) 4832 __i915_gem_object_unpin_pages(obj); 4833 4834 if (discard_backing_storage(obj)) 4835 obj->mm.madv = I915_MADV_DONTNEED; 4836 4837 /* 4838 * Before we free the object, make sure any pure RCU-only 4839 * read-side critical sections are complete, e.g. 4840 * i915_gem_busy_ioctl(). For the corresponding synchronized 4841 * lookup see i915_gem_object_lookup_rcu(). 4842 */ 4843 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4844 } 4845 4846 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4847 { 4848 lockdep_assert_held(&obj->base.dev->struct_mutex); 4849 4850 if (!i915_gem_object_has_active_reference(obj) && 4851 i915_gem_object_is_active(obj)) 4852 i915_gem_object_set_active_reference(obj); 4853 else 4854 i915_gem_object_put(obj); 4855 } 4856 4857 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 4858 { 4859 struct i915_gem_context *kernel_context = i915->kernel_context; 4860 struct intel_engine_cs *engine; 4861 enum intel_engine_id id; 4862 4863 for_each_engine(engine, i915, id) { 4864 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); 4865 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4866 } 4867 } 4868 4869 void i915_gem_sanitize(struct drm_i915_private *i915) 4870 { 4871 if (i915_terminally_wedged(&i915->gpu_error)) { 4872 mutex_lock(&i915->drm.struct_mutex); 4873 i915_gem_unset_wedged(i915); 4874 mutex_unlock(&i915->drm.struct_mutex); 4875 } 4876 4877 /* 4878 * If we inherit context state from the BIOS or earlier occupants 4879 * of the GPU, the GPU may be in an inconsistent state when we 4880 * try to take over. The only way to remove the earlier state 4881 * is by resetting. However, resetting on earlier gen is tricky as 4882 * it may impact the display and we are uncertain about the stability 4883 * of the reset, so this could be applied to even earlier gen. 4884 */ 4885 if (INTEL_GEN(i915) >= 5) { 4886 int reset = intel_gpu_reset(i915, ALL_ENGINES); 4887 WARN_ON(reset && reset != -ENODEV); 4888 } 4889 } 4890 4891 int i915_gem_suspend(struct drm_i915_private *dev_priv) 4892 { 4893 struct drm_device *dev = &dev_priv->drm; 4894 int ret; 4895 4896 intel_runtime_pm_get(dev_priv); 4897 intel_suspend_gt_powersave(dev_priv); 4898 4899 mutex_lock(&dev->struct_mutex); 4900 4901 /* We have to flush all the executing contexts to main memory so 4902 * that they can saved in the hibernation image. To ensure the last 4903 * context image is coherent, we have to switch away from it. That 4904 * leaves the dev_priv->kernel_context still active when 4905 * we actually suspend, and its image in memory may not match the GPU 4906 * state. Fortunately, the kernel_context is disposable and we do 4907 * not rely on its state. 4908 */ 4909 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 4910 ret = i915_gem_switch_to_kernel_context(dev_priv); 4911 if (ret) 4912 goto err_unlock; 4913 4914 ret = i915_gem_wait_for_idle(dev_priv, 4915 I915_WAIT_INTERRUPTIBLE | 4916 I915_WAIT_LOCKED); 4917 if (ret && ret != -EIO) 4918 goto err_unlock; 4919 4920 assert_kernel_context_is_current(dev_priv); 4921 } 4922 i915_gem_contexts_lost(dev_priv); 4923 mutex_unlock(&dev->struct_mutex); 4924 4925 intel_guc_suspend(dev_priv); 4926 4927 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4928 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4929 4930 /* As the idle_work is rearming if it detects a race, play safe and 4931 * repeat the flush until it is definitely idle. 4932 */ 4933 drain_delayed_work(&dev_priv->gt.idle_work); 4934 4935 /* Assert that we sucessfully flushed all the work and 4936 * reset the GPU back to its idle, low power state. 4937 */ 4938 WARN_ON(dev_priv->gt.awake); 4939 if (WARN_ON(!intel_engines_are_idle(dev_priv))) 4940 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ 4941 4942 /* 4943 * Neither the BIOS, ourselves or any other kernel 4944 * expects the system to be in execlists mode on startup, 4945 * so we need to reset the GPU back to legacy mode. And the only 4946 * known way to disable logical contexts is through a GPU reset. 4947 * 4948 * So in order to leave the system in a known default configuration, 4949 * always reset the GPU upon unload and suspend. Afterwards we then 4950 * clean up the GEM state tracking, flushing off the requests and 4951 * leaving the system in a known idle state. 4952 * 4953 * Note that is of the upmost importance that the GPU is idle and 4954 * all stray writes are flushed *before* we dismantle the backing 4955 * storage for the pinned objects. 4956 * 4957 * However, since we are uncertain that resetting the GPU on older 4958 * machines is a good idea, we don't - just in case it leaves the 4959 * machine in an unusable condition. 4960 */ 4961 i915_gem_sanitize(dev_priv); 4962 4963 intel_runtime_pm_put(dev_priv); 4964 return 0; 4965 4966 err_unlock: 4967 mutex_unlock(&dev->struct_mutex); 4968 intel_runtime_pm_put(dev_priv); 4969 return ret; 4970 } 4971 4972 void i915_gem_resume(struct drm_i915_private *i915) 4973 { 4974 WARN_ON(i915->gt.awake); 4975 4976 mutex_lock(&i915->drm.struct_mutex); 4977 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4978 4979 i915_gem_restore_gtt_mappings(i915); 4980 i915_gem_restore_fences(i915); 4981 4982 /* 4983 * As we didn't flush the kernel context before suspend, we cannot 4984 * guarantee that the context image is complete. So let's just reset 4985 * it and start again. 4986 */ 4987 i915->gt.resume(i915); 4988 4989 if (i915_gem_init_hw(i915)) 4990 goto err_wedged; 4991 4992 intel_guc_resume(i915); 4993 4994 /* Always reload a context for powersaving. */ 4995 if (i915_gem_switch_to_kernel_context(i915)) 4996 goto err_wedged; 4997 4998 out_unlock: 4999 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5000 mutex_unlock(&i915->drm.struct_mutex); 5001 return; 5002 5003 err_wedged: 5004 if (!i915_terminally_wedged(&i915->gpu_error)) { 5005 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5006 i915_gem_set_wedged(i915); 5007 } 5008 goto out_unlock; 5009 } 5010 5011 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5012 { 5013 if (INTEL_GEN(dev_priv) < 5 || 5014 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5015 return; 5016 5017 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5018 DISP_TILE_SURFACE_SWIZZLING); 5019 5020 if (IS_GEN5(dev_priv)) 5021 return; 5022 5023 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5024 if (IS_GEN6(dev_priv)) 5025 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5026 else if (IS_GEN7(dev_priv)) 5027 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5028 else if (IS_GEN8(dev_priv)) 5029 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5030 else 5031 BUG(); 5032 } 5033 5034 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5035 { 5036 I915_WRITE(RING_CTL(base), 0); 5037 I915_WRITE(RING_HEAD(base), 0); 5038 I915_WRITE(RING_TAIL(base), 0); 5039 I915_WRITE(RING_START(base), 0); 5040 } 5041 5042 static void init_unused_rings(struct drm_i915_private *dev_priv) 5043 { 5044 if (IS_I830(dev_priv)) { 5045 init_unused_ring(dev_priv, PRB1_BASE); 5046 init_unused_ring(dev_priv, SRB0_BASE); 5047 init_unused_ring(dev_priv, SRB1_BASE); 5048 init_unused_ring(dev_priv, SRB2_BASE); 5049 init_unused_ring(dev_priv, SRB3_BASE); 5050 } else if (IS_GEN2(dev_priv)) { 5051 init_unused_ring(dev_priv, SRB0_BASE); 5052 init_unused_ring(dev_priv, SRB1_BASE); 5053 } else if (IS_GEN3(dev_priv)) { 5054 init_unused_ring(dev_priv, PRB1_BASE); 5055 init_unused_ring(dev_priv, PRB2_BASE); 5056 } 5057 } 5058 5059 static int __i915_gem_restart_engines(void *data) 5060 { 5061 struct drm_i915_private *i915 = data; 5062 struct intel_engine_cs *engine; 5063 enum intel_engine_id id; 5064 int err; 5065 5066 for_each_engine(engine, i915, id) { 5067 err = engine->init_hw(engine); 5068 if (err) 5069 return err; 5070 } 5071 5072 return 0; 5073 } 5074 5075 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5076 { 5077 int ret; 5078 5079 dev_priv->gt.last_init_time = ktime_get(); 5080 5081 /* Double layer security blanket, see i915_gem_init() */ 5082 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5083 5084 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5085 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5086 5087 if (IS_HASWELL(dev_priv)) 5088 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5089 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5090 5091 if (HAS_PCH_NOP(dev_priv)) { 5092 if (IS_IVYBRIDGE(dev_priv)) { 5093 u32 temp = I915_READ(GEN7_MSG_CTL); 5094 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5095 I915_WRITE(GEN7_MSG_CTL, temp); 5096 } else if (INTEL_GEN(dev_priv) >= 7) { 5097 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5098 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5099 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5100 } 5101 } 5102 5103 i915_gem_init_swizzling(dev_priv); 5104 5105 /* 5106 * At least 830 can leave some of the unused rings 5107 * "active" (ie. head != tail) after resume which 5108 * will prevent c3 entry. Makes sure all unused rings 5109 * are totally idle. 5110 */ 5111 init_unused_rings(dev_priv); 5112 5113 BUG_ON(!dev_priv->kernel_context); 5114 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5115 ret = -EIO; 5116 goto out; 5117 } 5118 5119 ret = i915_ppgtt_init_hw(dev_priv); 5120 if (ret) { 5121 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5122 goto out; 5123 } 5124 5125 /* We can't enable contexts until all firmware is loaded */ 5126 ret = intel_uc_init_hw(dev_priv); 5127 if (ret) 5128 goto out; 5129 5130 intel_mocs_init_l3cc_table(dev_priv); 5131 5132 /* Only when the HW is re-initialised, can we replay the requests */ 5133 ret = __i915_gem_restart_engines(dev_priv); 5134 out: 5135 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5136 return ret; 5137 } 5138 5139 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5140 { 5141 struct i915_gem_context *ctx; 5142 struct intel_engine_cs *engine; 5143 enum intel_engine_id id; 5144 int err; 5145 5146 /* 5147 * As we reset the gpu during very early sanitisation, the current 5148 * register state on the GPU should reflect its defaults values. 5149 * We load a context onto the hw (with restore-inhibit), then switch 5150 * over to a second context to save that default register state. We 5151 * can then prime every new context with that state so they all start 5152 * from the same default HW values. 5153 */ 5154 5155 ctx = i915_gem_context_create_kernel(i915, 0); 5156 if (IS_ERR(ctx)) 5157 return PTR_ERR(ctx); 5158 5159 for_each_engine(engine, i915, id) { 5160 struct drm_i915_gem_request *rq; 5161 5162 rq = i915_gem_request_alloc(engine, ctx); 5163 if (IS_ERR(rq)) { 5164 err = PTR_ERR(rq); 5165 goto out_ctx; 5166 } 5167 5168 err = 0; 5169 if (engine->init_context) 5170 err = engine->init_context(rq); 5171 5172 __i915_add_request(rq, true); 5173 if (err) 5174 goto err_active; 5175 } 5176 5177 err = i915_gem_switch_to_kernel_context(i915); 5178 if (err) 5179 goto err_active; 5180 5181 err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); 5182 if (err) 5183 goto err_active; 5184 5185 assert_kernel_context_is_current(i915); 5186 5187 for_each_engine(engine, i915, id) { 5188 struct i915_vma *state; 5189 5190 state = ctx->engine[id].state; 5191 if (!state) 5192 continue; 5193 5194 /* 5195 * As we will hold a reference to the logical state, it will 5196 * not be torn down with the context, and importantly the 5197 * object will hold onto its vma (making it possible for a 5198 * stray GTT write to corrupt our defaults). Unmap the vma 5199 * from the GTT to prevent such accidents and reclaim the 5200 * space. 5201 */ 5202 err = i915_vma_unbind(state); 5203 if (err) 5204 goto err_active; 5205 5206 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5207 if (err) 5208 goto err_active; 5209 5210 engine->default_state = i915_gem_object_get(state->obj); 5211 } 5212 5213 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5214 unsigned int found = intel_engines_has_context_isolation(i915); 5215 5216 /* 5217 * Make sure that classes with multiple engine instances all 5218 * share the same basic configuration. 5219 */ 5220 for_each_engine(engine, i915, id) { 5221 unsigned int bit = BIT(engine->uabi_class); 5222 unsigned int expected = engine->default_state ? bit : 0; 5223 5224 if ((found & bit) != expected) { 5225 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5226 engine->uabi_class, engine->name); 5227 } 5228 } 5229 } 5230 5231 out_ctx: 5232 i915_gem_context_set_closed(ctx); 5233 i915_gem_context_put(ctx); 5234 return err; 5235 5236 err_active: 5237 /* 5238 * If we have to abandon now, we expect the engines to be idle 5239 * and ready to be torn-down. First try to flush any remaining 5240 * request, ensure we are pointing at the kernel context and 5241 * then remove it. 5242 */ 5243 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5244 goto out_ctx; 5245 5246 if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) 5247 goto out_ctx; 5248 5249 i915_gem_contexts_lost(i915); 5250 goto out_ctx; 5251 } 5252 5253 int i915_gem_init(struct drm_i915_private *dev_priv) 5254 { 5255 int ret; 5256 5257 /* 5258 * We need to fallback to 4K pages since gvt gtt handling doesn't 5259 * support huge page entries - we will need to check either hypervisor 5260 * mm can support huge guest page or just do emulation in gvt. 5261 */ 5262 if (intel_vgpu_active(dev_priv)) 5263 mkwrite_device_info(dev_priv)->page_sizes = 5264 I915_GTT_PAGE_SIZE_4K; 5265 5266 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5267 5268 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5269 dev_priv->gt.resume = intel_lr_context_resume; 5270 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5271 } else { 5272 dev_priv->gt.resume = intel_legacy_submission_resume; 5273 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5274 } 5275 5276 ret = i915_gem_init_userptr(dev_priv); 5277 if (ret) 5278 return ret; 5279 5280 ret = intel_uc_init_misc(dev_priv); 5281 if (ret) 5282 return ret; 5283 5284 /* This is just a security blanket to placate dragons. 5285 * On some systems, we very sporadically observe that the first TLBs 5286 * used by the CS may be stale, despite us poking the TLB reset. If 5287 * we hold the forcewake during initialisation these problems 5288 * just magically go away. 5289 */ 5290 mutex_lock(&dev_priv->drm.struct_mutex); 5291 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5292 5293 ret = i915_gem_init_ggtt(dev_priv); 5294 if (ret) { 5295 GEM_BUG_ON(ret == -EIO); 5296 goto err_unlock; 5297 } 5298 5299 ret = i915_gem_contexts_init(dev_priv); 5300 if (ret) { 5301 GEM_BUG_ON(ret == -EIO); 5302 goto err_ggtt; 5303 } 5304 5305 ret = intel_engines_init(dev_priv); 5306 if (ret) { 5307 GEM_BUG_ON(ret == -EIO); 5308 goto err_context; 5309 } 5310 5311 intel_init_gt_powersave(dev_priv); 5312 5313 ret = intel_uc_init(dev_priv); 5314 if (ret) 5315 goto err_pm; 5316 5317 ret = i915_gem_init_hw(dev_priv); 5318 if (ret) 5319 goto err_uc_init; 5320 5321 /* 5322 * Despite its name intel_init_clock_gating applies both display 5323 * clock gating workarounds; GT mmio workarounds and the occasional 5324 * GT power context workaround. Worse, sometimes it includes a context 5325 * register workaround which we need to apply before we record the 5326 * default HW state for all contexts. 5327 * 5328 * FIXME: break up the workarounds and apply them at the right time! 5329 */ 5330 intel_init_clock_gating(dev_priv); 5331 5332 ret = __intel_engines_record_defaults(dev_priv); 5333 if (ret) 5334 goto err_init_hw; 5335 5336 if (i915_inject_load_failure()) { 5337 ret = -ENODEV; 5338 goto err_init_hw; 5339 } 5340 5341 if (i915_inject_load_failure()) { 5342 ret = -EIO; 5343 goto err_init_hw; 5344 } 5345 5346 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5347 mutex_unlock(&dev_priv->drm.struct_mutex); 5348 5349 return 0; 5350 5351 /* 5352 * Unwinding is complicated by that we want to handle -EIO to mean 5353 * disable GPU submission but keep KMS alive. We want to mark the 5354 * HW as irrevisibly wedged, but keep enough state around that the 5355 * driver doesn't explode during runtime. 5356 */ 5357 err_init_hw: 5358 i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); 5359 i915_gem_contexts_lost(dev_priv); 5360 intel_uc_fini_hw(dev_priv); 5361 err_uc_init: 5362 intel_uc_fini(dev_priv); 5363 err_pm: 5364 if (ret != -EIO) { 5365 intel_cleanup_gt_powersave(dev_priv); 5366 i915_gem_cleanup_engines(dev_priv); 5367 } 5368 err_context: 5369 if (ret != -EIO) 5370 i915_gem_contexts_fini(dev_priv); 5371 err_ggtt: 5372 err_unlock: 5373 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5374 mutex_unlock(&dev_priv->drm.struct_mutex); 5375 5376 intel_uc_fini_misc(dev_priv); 5377 5378 if (ret != -EIO) 5379 i915_gem_cleanup_userptr(dev_priv); 5380 5381 if (ret == -EIO) { 5382 /* 5383 * Allow engine initialisation to fail by marking the GPU as 5384 * wedged. But we only want to do this where the GPU is angry, 5385 * for all other failure, such as an allocation failure, bail. 5386 */ 5387 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5388 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5389 i915_gem_set_wedged(dev_priv); 5390 } 5391 ret = 0; 5392 } 5393 5394 i915_gem_drain_freed_objects(dev_priv); 5395 return ret; 5396 } 5397 5398 void i915_gem_init_mmio(struct drm_i915_private *i915) 5399 { 5400 i915_gem_sanitize(i915); 5401 } 5402 5403 void 5404 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5405 { 5406 struct intel_engine_cs *engine; 5407 enum intel_engine_id id; 5408 5409 for_each_engine(engine, dev_priv, id) 5410 dev_priv->gt.cleanup_engine(engine); 5411 } 5412 5413 void 5414 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5415 { 5416 int i; 5417 5418 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5419 !IS_CHERRYVIEW(dev_priv)) 5420 dev_priv->num_fence_regs = 32; 5421 else if (INTEL_INFO(dev_priv)->gen >= 4 || 5422 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5423 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5424 dev_priv->num_fence_regs = 16; 5425 else 5426 dev_priv->num_fence_regs = 8; 5427 5428 if (intel_vgpu_active(dev_priv)) 5429 dev_priv->num_fence_regs = 5430 I915_READ(vgtif_reg(avail_rs.fence_num)); 5431 5432 /* Initialize fence registers to zero */ 5433 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5434 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5435 5436 fence->i915 = dev_priv; 5437 fence->id = i; 5438 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5439 } 5440 i915_gem_restore_fences(dev_priv); 5441 5442 i915_gem_detect_bit_6_swizzle(dev_priv); 5443 } 5444 5445 static void i915_gem_init__mm(struct drm_i915_private *i915) 5446 { 5447 spin_lock_init(&i915->mm.object_stat_lock); 5448 spin_lock_init(&i915->mm.obj_lock); 5449 spin_lock_init(&i915->mm.free_lock); 5450 5451 init_llist_head(&i915->mm.free_list); 5452 5453 INIT_LIST_HEAD(&i915->mm.unbound_list); 5454 INIT_LIST_HEAD(&i915->mm.bound_list); 5455 INIT_LIST_HEAD(&i915->mm.fence_list); 5456 INIT_LIST_HEAD(&i915->mm.userfault_list); 5457 5458 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5459 } 5460 5461 int 5462 i915_gem_load_init(struct drm_i915_private *dev_priv) 5463 { 5464 int err = -ENOMEM; 5465 5466 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5467 if (!dev_priv->objects) 5468 goto err_out; 5469 5470 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5471 if (!dev_priv->vmas) 5472 goto err_objects; 5473 5474 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5475 if (!dev_priv->luts) 5476 goto err_vmas; 5477 5478 dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, 5479 SLAB_HWCACHE_ALIGN | 5480 SLAB_RECLAIM_ACCOUNT | 5481 SLAB_TYPESAFE_BY_RCU); 5482 if (!dev_priv->requests) 5483 goto err_luts; 5484 5485 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5486 SLAB_HWCACHE_ALIGN | 5487 SLAB_RECLAIM_ACCOUNT); 5488 if (!dev_priv->dependencies) 5489 goto err_requests; 5490 5491 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5492 if (!dev_priv->priorities) 5493 goto err_dependencies; 5494 5495 mutex_lock(&dev_priv->drm.struct_mutex); 5496 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5497 err = i915_gem_timeline_init__global(dev_priv); 5498 mutex_unlock(&dev_priv->drm.struct_mutex); 5499 if (err) 5500 goto err_priorities; 5501 5502 i915_gem_init__mm(dev_priv); 5503 5504 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5505 i915_gem_retire_work_handler); 5506 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5507 i915_gem_idle_work_handler); 5508 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5509 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5510 5511 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5512 5513 spin_lock_init(&dev_priv->fb_tracking.lock); 5514 5515 err = i915_gemfs_init(dev_priv); 5516 if (err) 5517 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5518 5519 return 0; 5520 5521 err_priorities: 5522 kmem_cache_destroy(dev_priv->priorities); 5523 err_dependencies: 5524 kmem_cache_destroy(dev_priv->dependencies); 5525 err_requests: 5526 kmem_cache_destroy(dev_priv->requests); 5527 err_luts: 5528 kmem_cache_destroy(dev_priv->luts); 5529 err_vmas: 5530 kmem_cache_destroy(dev_priv->vmas); 5531 err_objects: 5532 kmem_cache_destroy(dev_priv->objects); 5533 err_out: 5534 return err; 5535 } 5536 5537 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) 5538 { 5539 i915_gem_drain_freed_objects(dev_priv); 5540 WARN_ON(!llist_empty(&dev_priv->mm.free_list)); 5541 WARN_ON(dev_priv->mm.object_count); 5542 5543 mutex_lock(&dev_priv->drm.struct_mutex); 5544 i915_gem_timeline_fini(&dev_priv->gt.global_timeline); 5545 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5546 mutex_unlock(&dev_priv->drm.struct_mutex); 5547 5548 kmem_cache_destroy(dev_priv->priorities); 5549 kmem_cache_destroy(dev_priv->dependencies); 5550 kmem_cache_destroy(dev_priv->requests); 5551 kmem_cache_destroy(dev_priv->luts); 5552 kmem_cache_destroy(dev_priv->vmas); 5553 kmem_cache_destroy(dev_priv->objects); 5554 5555 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5556 rcu_barrier(); 5557 5558 i915_gemfs_fini(dev_priv); 5559 } 5560 5561 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5562 { 5563 /* Discard all purgeable objects, let userspace recover those as 5564 * required after resuming. 5565 */ 5566 i915_gem_shrink_all(dev_priv); 5567 5568 return 0; 5569 } 5570 5571 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5572 { 5573 struct drm_i915_gem_object *obj; 5574 struct list_head *phases[] = { 5575 &dev_priv->mm.unbound_list, 5576 &dev_priv->mm.bound_list, 5577 NULL 5578 }, **p; 5579 5580 /* Called just before we write the hibernation image. 5581 * 5582 * We need to update the domain tracking to reflect that the CPU 5583 * will be accessing all the pages to create and restore from the 5584 * hibernation, and so upon restoration those pages will be in the 5585 * CPU domain. 5586 * 5587 * To make sure the hibernation image contains the latest state, 5588 * we update that state just before writing out the image. 5589 * 5590 * To try and reduce the hibernation image, we manually shrink 5591 * the objects as well, see i915_gem_freeze() 5592 */ 5593 5594 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); 5595 i915_gem_drain_freed_objects(dev_priv); 5596 5597 spin_lock(&dev_priv->mm.obj_lock); 5598 for (p = phases; *p; p++) { 5599 list_for_each_entry(obj, *p, mm.link) 5600 __start_cpu_write(obj); 5601 } 5602 spin_unlock(&dev_priv->mm.obj_lock); 5603 5604 return 0; 5605 } 5606 5607 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5608 { 5609 struct drm_i915_file_private *file_priv = file->driver_priv; 5610 struct drm_i915_gem_request *request; 5611 5612 /* Clean up our request list when the client is going away, so that 5613 * later retire_requests won't dereference our soon-to-be-gone 5614 * file_priv. 5615 */ 5616 spin_lock(&file_priv->mm.lock); 5617 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5618 request->file_priv = NULL; 5619 spin_unlock(&file_priv->mm.lock); 5620 } 5621 5622 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5623 { 5624 struct drm_i915_file_private *file_priv; 5625 int ret; 5626 5627 DRM_DEBUG("\n"); 5628 5629 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5630 if (!file_priv) 5631 return -ENOMEM; 5632 5633 file->driver_priv = file_priv; 5634 file_priv->dev_priv = i915; 5635 file_priv->file = file; 5636 5637 spin_lock_init(&file_priv->mm.lock); 5638 INIT_LIST_HEAD(&file_priv->mm.request_list); 5639 5640 file_priv->bsd_engine = -1; 5641 5642 ret = i915_gem_context_open(i915, file); 5643 if (ret) 5644 kfree(file_priv); 5645 5646 return ret; 5647 } 5648 5649 /** 5650 * i915_gem_track_fb - update frontbuffer tracking 5651 * @old: current GEM buffer for the frontbuffer slots 5652 * @new: new GEM buffer for the frontbuffer slots 5653 * @frontbuffer_bits: bitmask of frontbuffer slots 5654 * 5655 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5656 * from @old and setting them in @new. Both @old and @new can be NULL. 5657 */ 5658 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5659 struct drm_i915_gem_object *new, 5660 unsigned frontbuffer_bits) 5661 { 5662 /* Control of individual bits within the mask are guarded by 5663 * the owning plane->mutex, i.e. we can never see concurrent 5664 * manipulation of individual bits. But since the bitfield as a whole 5665 * is updated using RMW, we need to use atomics in order to update 5666 * the bits. 5667 */ 5668 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5669 sizeof(atomic_t) * BITS_PER_BYTE); 5670 5671 if (old) { 5672 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5673 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5674 } 5675 5676 if (new) { 5677 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5678 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5679 } 5680 } 5681 5682 /* Allocate a new GEM object and fill it with the supplied data */ 5683 struct drm_i915_gem_object * 5684 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5685 const void *data, size_t size) 5686 { 5687 struct drm_i915_gem_object *obj; 5688 struct file *file; 5689 size_t offset; 5690 int err; 5691 5692 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5693 if (IS_ERR(obj)) 5694 return obj; 5695 5696 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 5697 5698 file = obj->base.filp; 5699 offset = 0; 5700 do { 5701 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5702 struct page *page; 5703 void *pgdata, *vaddr; 5704 5705 err = pagecache_write_begin(file, file->f_mapping, 5706 offset, len, 0, 5707 &page, &pgdata); 5708 if (err < 0) 5709 goto fail; 5710 5711 vaddr = kmap(page); 5712 memcpy(vaddr, data, len); 5713 kunmap(page); 5714 5715 err = pagecache_write_end(file, file->f_mapping, 5716 offset, len, len, 5717 page, pgdata); 5718 if (err < 0) 5719 goto fail; 5720 5721 size -= len; 5722 data += len; 5723 offset += len; 5724 } while (size); 5725 5726 return obj; 5727 5728 fail: 5729 i915_gem_object_put(obj); 5730 return ERR_PTR(err); 5731 } 5732 5733 struct scatterlist * 5734 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5735 unsigned int n, 5736 unsigned int *offset) 5737 { 5738 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5739 struct scatterlist *sg; 5740 unsigned int idx, count; 5741 5742 might_sleep(); 5743 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5744 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5745 5746 /* As we iterate forward through the sg, we record each entry in a 5747 * radixtree for quick repeated (backwards) lookups. If we have seen 5748 * this index previously, we will have an entry for it. 5749 * 5750 * Initial lookup is O(N), but this is amortized to O(1) for 5751 * sequential page access (where each new request is consecutive 5752 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5753 * i.e. O(1) with a large constant! 5754 */ 5755 if (n < READ_ONCE(iter->sg_idx)) 5756 goto lookup; 5757 5758 mutex_lock(&iter->lock); 5759 5760 /* We prefer to reuse the last sg so that repeated lookup of this 5761 * (or the subsequent) sg are fast - comparing against the last 5762 * sg is faster than going through the radixtree. 5763 */ 5764 5765 sg = iter->sg_pos; 5766 idx = iter->sg_idx; 5767 count = __sg_page_count(sg); 5768 5769 while (idx + count <= n) { 5770 unsigned long exception, i; 5771 int ret; 5772 5773 /* If we cannot allocate and insert this entry, or the 5774 * individual pages from this range, cancel updating the 5775 * sg_idx so that on this lookup we are forced to linearly 5776 * scan onwards, but on future lookups we will try the 5777 * insertion again (in which case we need to be careful of 5778 * the error return reporting that we have already inserted 5779 * this index). 5780 */ 5781 ret = radix_tree_insert(&iter->radix, idx, sg); 5782 if (ret && ret != -EEXIST) 5783 goto scan; 5784 5785 exception = 5786 RADIX_TREE_EXCEPTIONAL_ENTRY | 5787 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5788 for (i = 1; i < count; i++) { 5789 ret = radix_tree_insert(&iter->radix, idx + i, 5790 (void *)exception); 5791 if (ret && ret != -EEXIST) 5792 goto scan; 5793 } 5794 5795 idx += count; 5796 sg = ____sg_next(sg); 5797 count = __sg_page_count(sg); 5798 } 5799 5800 scan: 5801 iter->sg_pos = sg; 5802 iter->sg_idx = idx; 5803 5804 mutex_unlock(&iter->lock); 5805 5806 if (unlikely(n < idx)) /* insertion completed by another thread */ 5807 goto lookup; 5808 5809 /* In case we failed to insert the entry into the radixtree, we need 5810 * to look beyond the current sg. 5811 */ 5812 while (idx + count <= n) { 5813 idx += count; 5814 sg = ____sg_next(sg); 5815 count = __sg_page_count(sg); 5816 } 5817 5818 *offset = n - idx; 5819 return sg; 5820 5821 lookup: 5822 rcu_read_lock(); 5823 5824 sg = radix_tree_lookup(&iter->radix, n); 5825 GEM_BUG_ON(!sg); 5826 5827 /* If this index is in the middle of multi-page sg entry, 5828 * the radixtree will contain an exceptional entry that points 5829 * to the start of that range. We will return the pointer to 5830 * the base page and the offset of this page within the 5831 * sg entry's range. 5832 */ 5833 *offset = 0; 5834 if (unlikely(radix_tree_exception(sg))) { 5835 unsigned long base = 5836 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5837 5838 sg = radix_tree_lookup(&iter->radix, base); 5839 GEM_BUG_ON(!sg); 5840 5841 *offset = n - base; 5842 } 5843 5844 rcu_read_unlock(); 5845 5846 return sg; 5847 } 5848 5849 struct page * 5850 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5851 { 5852 struct scatterlist *sg; 5853 unsigned int offset; 5854 5855 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5856 5857 sg = i915_gem_object_get_sg(obj, n, &offset); 5858 return nth_page(sg_page(sg), offset); 5859 } 5860 5861 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5862 struct page * 5863 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5864 unsigned int n) 5865 { 5866 struct page *page; 5867 5868 page = i915_gem_object_get_page(obj, n); 5869 if (!obj->mm.dirty) 5870 set_page_dirty(page); 5871 5872 return page; 5873 } 5874 5875 dma_addr_t 5876 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5877 unsigned long n) 5878 { 5879 struct scatterlist *sg; 5880 unsigned int offset; 5881 5882 sg = i915_gem_object_get_sg(obj, n, &offset); 5883 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5884 } 5885 5886 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5887 { 5888 struct sg_table *pages; 5889 int err; 5890 5891 if (align > obj->base.size) 5892 return -EINVAL; 5893 5894 if (obj->ops == &i915_gem_phys_ops) 5895 return 0; 5896 5897 if (obj->ops != &i915_gem_object_ops) 5898 return -EINVAL; 5899 5900 err = i915_gem_object_unbind(obj); 5901 if (err) 5902 return err; 5903 5904 mutex_lock(&obj->mm.lock); 5905 5906 if (obj->mm.madv != I915_MADV_WILLNEED) { 5907 err = -EFAULT; 5908 goto err_unlock; 5909 } 5910 5911 if (obj->mm.quirked) { 5912 err = -EFAULT; 5913 goto err_unlock; 5914 } 5915 5916 if (obj->mm.mapping) { 5917 err = -EBUSY; 5918 goto err_unlock; 5919 } 5920 5921 pages = fetch_and_zero(&obj->mm.pages); 5922 if (pages) { 5923 struct drm_i915_private *i915 = to_i915(obj->base.dev); 5924 5925 __i915_gem_object_reset_page_iter(obj); 5926 5927 spin_lock(&i915->mm.obj_lock); 5928 list_del(&obj->mm.link); 5929 spin_unlock(&i915->mm.obj_lock); 5930 } 5931 5932 obj->ops = &i915_gem_phys_ops; 5933 5934 err = ____i915_gem_object_get_pages(obj); 5935 if (err) 5936 goto err_xfer; 5937 5938 /* Perma-pin (until release) the physical set of pages */ 5939 __i915_gem_object_pin_pages(obj); 5940 5941 if (!IS_ERR_OR_NULL(pages)) 5942 i915_gem_object_ops.put_pages(obj, pages); 5943 mutex_unlock(&obj->mm.lock); 5944 return 0; 5945 5946 err_xfer: 5947 obj->ops = &i915_gem_object_ops; 5948 obj->mm.pages = pages; 5949 err_unlock: 5950 mutex_unlock(&obj->mm.lock); 5951 return err; 5952 } 5953 5954 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5955 #include "selftests/scatterlist.c" 5956 #include "selftests/mock_gem_device.c" 5957 #include "selftests/huge_gem_object.c" 5958 #include "selftests/huge_pages.c" 5959 #include "selftests/i915_gem_object.c" 5960 #include "selftests/i915_gem_coherency.c" 5961 #endif 5962