1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "i915_gemfs.h" 39 #include <linux/dma-fence-array.h> 40 #include <linux/kthread.h> 41 #include <linux/reservation.h> 42 #include <linux/shmem_fs.h> 43 #include <linux/slab.h> 44 #include <linux/stop_machine.h> 45 #include <linux/swap.h> 46 #include <linux/pci.h> 47 #include <linux/dma-buf.h> 48 49 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 50 51 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 52 { 53 if (obj->cache_dirty) 54 return false; 55 56 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 57 return true; 58 59 return obj->pin_global; /* currently in use by HW, keep flushed */ 60 } 61 62 static int 63 insert_mappable_node(struct i915_ggtt *ggtt, 64 struct drm_mm_node *node, u32 size) 65 { 66 memset(node, 0, sizeof(*node)); 67 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 68 size, 0, I915_COLOR_UNEVICTABLE, 69 0, ggtt->mappable_end, 70 DRM_MM_INSERT_LOW); 71 } 72 73 static void 74 remove_mappable_node(struct drm_mm_node *node) 75 { 76 drm_mm_remove_node(node); 77 } 78 79 /* some bookkeeping */ 80 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 81 u64 size) 82 { 83 spin_lock(&dev_priv->mm.object_stat_lock); 84 dev_priv->mm.object_count++; 85 dev_priv->mm.object_memory += size; 86 spin_unlock(&dev_priv->mm.object_stat_lock); 87 } 88 89 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 90 u64 size) 91 { 92 spin_lock(&dev_priv->mm.object_stat_lock); 93 dev_priv->mm.object_count--; 94 dev_priv->mm.object_memory -= size; 95 spin_unlock(&dev_priv->mm.object_stat_lock); 96 } 97 98 static int 99 i915_gem_wait_for_error(struct i915_gpu_error *error) 100 { 101 int ret; 102 103 might_sleep(); 104 105 /* 106 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 107 * userspace. If it takes that long something really bad is going on and 108 * we should simply try to bail out and fail as gracefully as possible. 109 */ 110 ret = wait_event_interruptible_timeout(error->reset_queue, 111 !i915_reset_backoff(error), 112 I915_RESET_TIMEOUT); 113 if (ret == 0) { 114 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 115 return -EIO; 116 } else if (ret < 0) { 117 return ret; 118 } else { 119 return 0; 120 } 121 } 122 123 int i915_mutex_lock_interruptible(struct drm_device *dev) 124 { 125 struct drm_i915_private *dev_priv = to_i915(dev); 126 int ret; 127 128 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 129 if (ret) 130 return ret; 131 132 ret = mutex_lock_interruptible(&dev->struct_mutex); 133 if (ret) 134 return ret; 135 136 return 0; 137 } 138 139 int 140 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 141 struct drm_file *file) 142 { 143 struct drm_i915_private *dev_priv = to_i915(dev); 144 struct i915_ggtt *ggtt = &dev_priv->ggtt; 145 struct drm_i915_gem_get_aperture *args = data; 146 struct i915_vma *vma; 147 u64 pinned; 148 149 pinned = ggtt->base.reserved; 150 mutex_lock(&dev->struct_mutex); 151 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 152 if (i915_vma_is_pinned(vma)) 153 pinned += vma->node.size; 154 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 155 if (i915_vma_is_pinned(vma)) 156 pinned += vma->node.size; 157 mutex_unlock(&dev->struct_mutex); 158 159 args->aper_size = ggtt->base.total; 160 args->aper_available_size = args->aper_size - pinned; 161 162 return 0; 163 } 164 165 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 166 { 167 struct address_space *mapping = obj->base.filp->f_mapping; 168 drm_dma_handle_t *phys; 169 struct sg_table *st; 170 struct scatterlist *sg; 171 char *vaddr; 172 int i; 173 int err; 174 175 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 176 return -EINVAL; 177 178 /* Always aligning to the object size, allows a single allocation 179 * to handle all possible callers, and given typical object sizes, 180 * the alignment of the buddy allocation will naturally match. 181 */ 182 phys = drm_pci_alloc(obj->base.dev, 183 roundup_pow_of_two(obj->base.size), 184 roundup_pow_of_two(obj->base.size)); 185 if (!phys) 186 return -ENOMEM; 187 188 vaddr = phys->vaddr; 189 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 190 struct page *page; 191 char *src; 192 193 page = shmem_read_mapping_page(mapping, i); 194 if (IS_ERR(page)) { 195 err = PTR_ERR(page); 196 goto err_phys; 197 } 198 199 src = kmap_atomic(page); 200 memcpy(vaddr, src, PAGE_SIZE); 201 drm_clflush_virt_range(vaddr, PAGE_SIZE); 202 kunmap_atomic(src); 203 204 put_page(page); 205 vaddr += PAGE_SIZE; 206 } 207 208 i915_gem_chipset_flush(to_i915(obj->base.dev)); 209 210 st = kmalloc(sizeof(*st), GFP_KERNEL); 211 if (!st) { 212 err = -ENOMEM; 213 goto err_phys; 214 } 215 216 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 217 kfree(st); 218 err = -ENOMEM; 219 goto err_phys; 220 } 221 222 sg = st->sgl; 223 sg->offset = 0; 224 sg->length = obj->base.size; 225 226 sg_dma_address(sg) = phys->busaddr; 227 sg_dma_len(sg) = obj->base.size; 228 229 obj->phys_handle = phys; 230 231 __i915_gem_object_set_pages(obj, st, sg->length); 232 233 return 0; 234 235 err_phys: 236 drm_pci_free(obj->base.dev, phys); 237 238 return err; 239 } 240 241 static void __start_cpu_write(struct drm_i915_gem_object *obj) 242 { 243 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 244 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 245 if (cpu_write_needs_clflush(obj)) 246 obj->cache_dirty = true; 247 } 248 249 static void 250 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 251 struct sg_table *pages, 252 bool needs_clflush) 253 { 254 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 255 256 if (obj->mm.madv == I915_MADV_DONTNEED) 257 obj->mm.dirty = false; 258 259 if (needs_clflush && 260 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && 261 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 262 drm_clflush_sg(pages); 263 264 __start_cpu_write(obj); 265 } 266 267 static void 268 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 269 struct sg_table *pages) 270 { 271 __i915_gem_object_release_shmem(obj, pages, false); 272 273 if (obj->mm.dirty) { 274 struct address_space *mapping = obj->base.filp->f_mapping; 275 char *vaddr = obj->phys_handle->vaddr; 276 int i; 277 278 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 279 struct page *page; 280 char *dst; 281 282 page = shmem_read_mapping_page(mapping, i); 283 if (IS_ERR(page)) 284 continue; 285 286 dst = kmap_atomic(page); 287 drm_clflush_virt_range(vaddr, PAGE_SIZE); 288 memcpy(dst, vaddr, PAGE_SIZE); 289 kunmap_atomic(dst); 290 291 set_page_dirty(page); 292 if (obj->mm.madv == I915_MADV_WILLNEED) 293 mark_page_accessed(page); 294 put_page(page); 295 vaddr += PAGE_SIZE; 296 } 297 obj->mm.dirty = false; 298 } 299 300 sg_free_table(pages); 301 kfree(pages); 302 303 drm_pci_free(obj->base.dev, obj->phys_handle); 304 } 305 306 static void 307 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 308 { 309 i915_gem_object_unpin_pages(obj); 310 } 311 312 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 313 .get_pages = i915_gem_object_get_pages_phys, 314 .put_pages = i915_gem_object_put_pages_phys, 315 .release = i915_gem_object_release_phys, 316 }; 317 318 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 319 320 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 321 { 322 struct i915_vma *vma; 323 LIST_HEAD(still_in_list); 324 int ret; 325 326 lockdep_assert_held(&obj->base.dev->struct_mutex); 327 328 /* Closed vma are removed from the obj->vma_list - but they may 329 * still have an active binding on the object. To remove those we 330 * must wait for all rendering to complete to the object (as unbinding 331 * must anyway), and retire the requests. 332 */ 333 ret = i915_gem_object_wait(obj, 334 I915_WAIT_INTERRUPTIBLE | 335 I915_WAIT_LOCKED | 336 I915_WAIT_ALL, 337 MAX_SCHEDULE_TIMEOUT, 338 NULL); 339 if (ret) 340 return ret; 341 342 i915_gem_retire_requests(to_i915(obj->base.dev)); 343 344 while ((vma = list_first_entry_or_null(&obj->vma_list, 345 struct i915_vma, 346 obj_link))) { 347 list_move_tail(&vma->obj_link, &still_in_list); 348 ret = i915_vma_unbind(vma); 349 if (ret) 350 break; 351 } 352 list_splice(&still_in_list, &obj->vma_list); 353 354 return ret; 355 } 356 357 static long 358 i915_gem_object_wait_fence(struct dma_fence *fence, 359 unsigned int flags, 360 long timeout, 361 struct intel_rps_client *rps_client) 362 { 363 struct drm_i915_gem_request *rq; 364 365 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 366 367 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 368 return timeout; 369 370 if (!dma_fence_is_i915(fence)) 371 return dma_fence_wait_timeout(fence, 372 flags & I915_WAIT_INTERRUPTIBLE, 373 timeout); 374 375 rq = to_request(fence); 376 if (i915_gem_request_completed(rq)) 377 goto out; 378 379 /* This client is about to stall waiting for the GPU. In many cases 380 * this is undesirable and limits the throughput of the system, as 381 * many clients cannot continue processing user input/output whilst 382 * blocked. RPS autotuning may take tens of milliseconds to respond 383 * to the GPU load and thus incurs additional latency for the client. 384 * We can circumvent that by promoting the GPU frequency to maximum 385 * before we wait. This makes the GPU throttle up much more quickly 386 * (good for benchmarks and user experience, e.g. window animations), 387 * but at a cost of spending more power processing the workload 388 * (bad for battery). Not all clients even want their results 389 * immediately and for them we should just let the GPU select its own 390 * frequency to maximise efficiency. To prevent a single client from 391 * forcing the clocks too high for the whole system, we only allow 392 * each client to waitboost once in a busy period. 393 */ 394 if (rps_client) { 395 if (INTEL_GEN(rq->i915) >= 6) 396 gen6_rps_boost(rq, rps_client); 397 else 398 rps_client = NULL; 399 } 400 401 timeout = i915_wait_request(rq, flags, timeout); 402 403 out: 404 if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) 405 i915_gem_request_retire_upto(rq); 406 407 return timeout; 408 } 409 410 static long 411 i915_gem_object_wait_reservation(struct reservation_object *resv, 412 unsigned int flags, 413 long timeout, 414 struct intel_rps_client *rps_client) 415 { 416 unsigned int seq = __read_seqcount_begin(&resv->seq); 417 struct dma_fence *excl; 418 bool prune_fences = false; 419 420 if (flags & I915_WAIT_ALL) { 421 struct dma_fence **shared; 422 unsigned int count, i; 423 int ret; 424 425 ret = reservation_object_get_fences_rcu(resv, 426 &excl, &count, &shared); 427 if (ret) 428 return ret; 429 430 for (i = 0; i < count; i++) { 431 timeout = i915_gem_object_wait_fence(shared[i], 432 flags, timeout, 433 rps_client); 434 if (timeout < 0) 435 break; 436 437 dma_fence_put(shared[i]); 438 } 439 440 for (; i < count; i++) 441 dma_fence_put(shared[i]); 442 kfree(shared); 443 444 prune_fences = count && timeout >= 0; 445 } else { 446 excl = reservation_object_get_excl_rcu(resv); 447 } 448 449 if (excl && timeout >= 0) { 450 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 451 rps_client); 452 prune_fences = timeout >= 0; 453 } 454 455 dma_fence_put(excl); 456 457 /* Oportunistically prune the fences iff we know they have *all* been 458 * signaled and that the reservation object has not been changed (i.e. 459 * no new fences have been added). 460 */ 461 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 462 if (reservation_object_trylock(resv)) { 463 if (!__read_seqcount_retry(&resv->seq, seq)) 464 reservation_object_add_excl_fence(resv, NULL); 465 reservation_object_unlock(resv); 466 } 467 } 468 469 return timeout; 470 } 471 472 static void __fence_set_priority(struct dma_fence *fence, int prio) 473 { 474 struct drm_i915_gem_request *rq; 475 struct intel_engine_cs *engine; 476 477 if (!dma_fence_is_i915(fence)) 478 return; 479 480 rq = to_request(fence); 481 engine = rq->engine; 482 if (!engine->schedule) 483 return; 484 485 engine->schedule(rq, prio); 486 } 487 488 static void fence_set_priority(struct dma_fence *fence, int prio) 489 { 490 /* Recurse once into a fence-array */ 491 if (dma_fence_is_array(fence)) { 492 struct dma_fence_array *array = to_dma_fence_array(fence); 493 int i; 494 495 for (i = 0; i < array->num_fences; i++) 496 __fence_set_priority(array->fences[i], prio); 497 } else { 498 __fence_set_priority(fence, prio); 499 } 500 } 501 502 int 503 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 504 unsigned int flags, 505 int prio) 506 { 507 struct dma_fence *excl; 508 509 if (flags & I915_WAIT_ALL) { 510 struct dma_fence **shared; 511 unsigned int count, i; 512 int ret; 513 514 ret = reservation_object_get_fences_rcu(obj->resv, 515 &excl, &count, &shared); 516 if (ret) 517 return ret; 518 519 for (i = 0; i < count; i++) { 520 fence_set_priority(shared[i], prio); 521 dma_fence_put(shared[i]); 522 } 523 524 kfree(shared); 525 } else { 526 excl = reservation_object_get_excl_rcu(obj->resv); 527 } 528 529 if (excl) { 530 fence_set_priority(excl, prio); 531 dma_fence_put(excl); 532 } 533 return 0; 534 } 535 536 /** 537 * Waits for rendering to the object to be completed 538 * @obj: i915 gem object 539 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 540 * @timeout: how long to wait 541 * @rps_client: client (user process) to charge for any waitboosting 542 */ 543 int 544 i915_gem_object_wait(struct drm_i915_gem_object *obj, 545 unsigned int flags, 546 long timeout, 547 struct intel_rps_client *rps_client) 548 { 549 might_sleep(); 550 #if IS_ENABLED(CONFIG_LOCKDEP) 551 GEM_BUG_ON(debug_locks && 552 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 553 !!(flags & I915_WAIT_LOCKED)); 554 #endif 555 GEM_BUG_ON(timeout < 0); 556 557 timeout = i915_gem_object_wait_reservation(obj->resv, 558 flags, timeout, 559 rps_client); 560 return timeout < 0 ? timeout : 0; 561 } 562 563 static struct intel_rps_client *to_rps_client(struct drm_file *file) 564 { 565 struct drm_i915_file_private *fpriv = file->driver_priv; 566 567 return &fpriv->rps_client; 568 } 569 570 static int 571 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 572 struct drm_i915_gem_pwrite *args, 573 struct drm_file *file) 574 { 575 void *vaddr = obj->phys_handle->vaddr + args->offset; 576 char __user *user_data = u64_to_user_ptr(args->data_ptr); 577 578 /* We manually control the domain here and pretend that it 579 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 580 */ 581 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 582 if (copy_from_user(vaddr, user_data, args->size)) 583 return -EFAULT; 584 585 drm_clflush_virt_range(vaddr, args->size); 586 i915_gem_chipset_flush(to_i915(obj->base.dev)); 587 588 intel_fb_obj_flush(obj, ORIGIN_CPU); 589 return 0; 590 } 591 592 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 593 { 594 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 595 } 596 597 void i915_gem_object_free(struct drm_i915_gem_object *obj) 598 { 599 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 600 kmem_cache_free(dev_priv->objects, obj); 601 } 602 603 static int 604 i915_gem_create(struct drm_file *file, 605 struct drm_i915_private *dev_priv, 606 uint64_t size, 607 uint32_t *handle_p) 608 { 609 struct drm_i915_gem_object *obj; 610 int ret; 611 u32 handle; 612 613 size = roundup(size, PAGE_SIZE); 614 if (size == 0) 615 return -EINVAL; 616 617 /* Allocate the new object */ 618 obj = i915_gem_object_create(dev_priv, size); 619 if (IS_ERR(obj)) 620 return PTR_ERR(obj); 621 622 ret = drm_gem_handle_create(file, &obj->base, &handle); 623 /* drop reference from allocate - handle holds it now */ 624 i915_gem_object_put(obj); 625 if (ret) 626 return ret; 627 628 *handle_p = handle; 629 return 0; 630 } 631 632 int 633 i915_gem_dumb_create(struct drm_file *file, 634 struct drm_device *dev, 635 struct drm_mode_create_dumb *args) 636 { 637 /* have to work out size/pitch and return them */ 638 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 639 args->size = args->pitch * args->height; 640 return i915_gem_create(file, to_i915(dev), 641 args->size, &args->handle); 642 } 643 644 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 645 { 646 return !(obj->cache_level == I915_CACHE_NONE || 647 obj->cache_level == I915_CACHE_WT); 648 } 649 650 /** 651 * Creates a new mm object and returns a handle to it. 652 * @dev: drm device pointer 653 * @data: ioctl data blob 654 * @file: drm file pointer 655 */ 656 int 657 i915_gem_create_ioctl(struct drm_device *dev, void *data, 658 struct drm_file *file) 659 { 660 struct drm_i915_private *dev_priv = to_i915(dev); 661 struct drm_i915_gem_create *args = data; 662 663 i915_gem_flush_free_objects(dev_priv); 664 665 return i915_gem_create(file, dev_priv, 666 args->size, &args->handle); 667 } 668 669 static inline enum fb_op_origin 670 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 671 { 672 return (domain == I915_GEM_DOMAIN_GTT ? 673 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 674 } 675 676 static void 677 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 678 { 679 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 680 681 if (!(obj->base.write_domain & flush_domains)) 682 return; 683 684 /* No actual flushing is required for the GTT write domain. Writes 685 * to it "immediately" go to main memory as far as we know, so there's 686 * no chipset flush. It also doesn't land in render cache. 687 * 688 * However, we do have to enforce the order so that all writes through 689 * the GTT land before any writes to the device, such as updates to 690 * the GATT itself. 691 * 692 * We also have to wait a bit for the writes to land from the GTT. 693 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 694 * timing. This issue has only been observed when switching quickly 695 * between GTT writes and CPU reads from inside the kernel on recent hw, 696 * and it appears to only affect discrete GTT blocks (i.e. on LLC 697 * system agents we cannot reproduce this behaviour). 698 */ 699 wmb(); 700 701 switch (obj->base.write_domain) { 702 case I915_GEM_DOMAIN_GTT: 703 if (!HAS_LLC(dev_priv)) { 704 intel_runtime_pm_get(dev_priv); 705 spin_lock_irq(&dev_priv->uncore.lock); 706 POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); 707 spin_unlock_irq(&dev_priv->uncore.lock); 708 intel_runtime_pm_put(dev_priv); 709 } 710 711 intel_fb_obj_flush(obj, 712 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 713 break; 714 715 case I915_GEM_DOMAIN_CPU: 716 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 717 break; 718 719 case I915_GEM_DOMAIN_RENDER: 720 if (gpu_write_needs_clflush(obj)) 721 obj->cache_dirty = true; 722 break; 723 } 724 725 obj->base.write_domain = 0; 726 } 727 728 static inline int 729 __copy_to_user_swizzled(char __user *cpu_vaddr, 730 const char *gpu_vaddr, int gpu_offset, 731 int length) 732 { 733 int ret, cpu_offset = 0; 734 735 while (length > 0) { 736 int cacheline_end = ALIGN(gpu_offset + 1, 64); 737 int this_length = min(cacheline_end - gpu_offset, length); 738 int swizzled_gpu_offset = gpu_offset ^ 64; 739 740 ret = __copy_to_user(cpu_vaddr + cpu_offset, 741 gpu_vaddr + swizzled_gpu_offset, 742 this_length); 743 if (ret) 744 return ret + length; 745 746 cpu_offset += this_length; 747 gpu_offset += this_length; 748 length -= this_length; 749 } 750 751 return 0; 752 } 753 754 static inline int 755 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 756 const char __user *cpu_vaddr, 757 int length) 758 { 759 int ret, cpu_offset = 0; 760 761 while (length > 0) { 762 int cacheline_end = ALIGN(gpu_offset + 1, 64); 763 int this_length = min(cacheline_end - gpu_offset, length); 764 int swizzled_gpu_offset = gpu_offset ^ 64; 765 766 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 767 cpu_vaddr + cpu_offset, 768 this_length); 769 if (ret) 770 return ret + length; 771 772 cpu_offset += this_length; 773 gpu_offset += this_length; 774 length -= this_length; 775 } 776 777 return 0; 778 } 779 780 /* 781 * Pins the specified object's pages and synchronizes the object with 782 * GPU accesses. Sets needs_clflush to non-zero if the caller should 783 * flush the object from the CPU cache. 784 */ 785 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 786 unsigned int *needs_clflush) 787 { 788 int ret; 789 790 lockdep_assert_held(&obj->base.dev->struct_mutex); 791 792 *needs_clflush = 0; 793 if (!i915_gem_object_has_struct_page(obj)) 794 return -ENODEV; 795 796 ret = i915_gem_object_wait(obj, 797 I915_WAIT_INTERRUPTIBLE | 798 I915_WAIT_LOCKED, 799 MAX_SCHEDULE_TIMEOUT, 800 NULL); 801 if (ret) 802 return ret; 803 804 ret = i915_gem_object_pin_pages(obj); 805 if (ret) 806 return ret; 807 808 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 809 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 810 ret = i915_gem_object_set_to_cpu_domain(obj, false); 811 if (ret) 812 goto err_unpin; 813 else 814 goto out; 815 } 816 817 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 818 819 /* If we're not in the cpu read domain, set ourself into the gtt 820 * read domain and manually flush cachelines (if required). This 821 * optimizes for the case when the gpu will dirty the data 822 * anyway again before the next pread happens. 823 */ 824 if (!obj->cache_dirty && 825 !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 826 *needs_clflush = CLFLUSH_BEFORE; 827 828 out: 829 /* return with the pages pinned */ 830 return 0; 831 832 err_unpin: 833 i915_gem_object_unpin_pages(obj); 834 return ret; 835 } 836 837 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 838 unsigned int *needs_clflush) 839 { 840 int ret; 841 842 lockdep_assert_held(&obj->base.dev->struct_mutex); 843 844 *needs_clflush = 0; 845 if (!i915_gem_object_has_struct_page(obj)) 846 return -ENODEV; 847 848 ret = i915_gem_object_wait(obj, 849 I915_WAIT_INTERRUPTIBLE | 850 I915_WAIT_LOCKED | 851 I915_WAIT_ALL, 852 MAX_SCHEDULE_TIMEOUT, 853 NULL); 854 if (ret) 855 return ret; 856 857 ret = i915_gem_object_pin_pages(obj); 858 if (ret) 859 return ret; 860 861 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 862 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 863 ret = i915_gem_object_set_to_cpu_domain(obj, true); 864 if (ret) 865 goto err_unpin; 866 else 867 goto out; 868 } 869 870 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 871 872 /* If we're not in the cpu write domain, set ourself into the 873 * gtt write domain and manually flush cachelines (as required). 874 * This optimizes for the case when the gpu will use the data 875 * right away and we therefore have to clflush anyway. 876 */ 877 if (!obj->cache_dirty) { 878 *needs_clflush |= CLFLUSH_AFTER; 879 880 /* 881 * Same trick applies to invalidate partially written 882 * cachelines read before writing. 883 */ 884 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 885 *needs_clflush |= CLFLUSH_BEFORE; 886 } 887 888 out: 889 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 890 obj->mm.dirty = true; 891 /* return with the pages pinned */ 892 return 0; 893 894 err_unpin: 895 i915_gem_object_unpin_pages(obj); 896 return ret; 897 } 898 899 static void 900 shmem_clflush_swizzled_range(char *addr, unsigned long length, 901 bool swizzled) 902 { 903 if (unlikely(swizzled)) { 904 unsigned long start = (unsigned long) addr; 905 unsigned long end = (unsigned long) addr + length; 906 907 /* For swizzling simply ensure that we always flush both 908 * channels. Lame, but simple and it works. Swizzled 909 * pwrite/pread is far from a hotpath - current userspace 910 * doesn't use it at all. */ 911 start = round_down(start, 128); 912 end = round_up(end, 128); 913 914 drm_clflush_virt_range((void *)start, end - start); 915 } else { 916 drm_clflush_virt_range(addr, length); 917 } 918 919 } 920 921 /* Only difference to the fast-path function is that this can handle bit17 922 * and uses non-atomic copy and kmap functions. */ 923 static int 924 shmem_pread_slow(struct page *page, int offset, int length, 925 char __user *user_data, 926 bool page_do_bit17_swizzling, bool needs_clflush) 927 { 928 char *vaddr; 929 int ret; 930 931 vaddr = kmap(page); 932 if (needs_clflush) 933 shmem_clflush_swizzled_range(vaddr + offset, length, 934 page_do_bit17_swizzling); 935 936 if (page_do_bit17_swizzling) 937 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 938 else 939 ret = __copy_to_user(user_data, vaddr + offset, length); 940 kunmap(page); 941 942 return ret ? - EFAULT : 0; 943 } 944 945 static int 946 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 947 bool page_do_bit17_swizzling, bool needs_clflush) 948 { 949 int ret; 950 951 ret = -ENODEV; 952 if (!page_do_bit17_swizzling) { 953 char *vaddr = kmap_atomic(page); 954 955 if (needs_clflush) 956 drm_clflush_virt_range(vaddr + offset, length); 957 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 958 kunmap_atomic(vaddr); 959 } 960 if (ret == 0) 961 return 0; 962 963 return shmem_pread_slow(page, offset, length, user_data, 964 page_do_bit17_swizzling, needs_clflush); 965 } 966 967 static int 968 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 969 struct drm_i915_gem_pread *args) 970 { 971 char __user *user_data; 972 u64 remain; 973 unsigned int obj_do_bit17_swizzling; 974 unsigned int needs_clflush; 975 unsigned int idx, offset; 976 int ret; 977 978 obj_do_bit17_swizzling = 0; 979 if (i915_gem_object_needs_bit17_swizzle(obj)) 980 obj_do_bit17_swizzling = BIT(17); 981 982 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 983 if (ret) 984 return ret; 985 986 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 987 mutex_unlock(&obj->base.dev->struct_mutex); 988 if (ret) 989 return ret; 990 991 remain = args->size; 992 user_data = u64_to_user_ptr(args->data_ptr); 993 offset = offset_in_page(args->offset); 994 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 995 struct page *page = i915_gem_object_get_page(obj, idx); 996 int length; 997 998 length = remain; 999 if (offset + length > PAGE_SIZE) 1000 length = PAGE_SIZE - offset; 1001 1002 ret = shmem_pread(page, offset, length, user_data, 1003 page_to_phys(page) & obj_do_bit17_swizzling, 1004 needs_clflush); 1005 if (ret) 1006 break; 1007 1008 remain -= length; 1009 user_data += length; 1010 offset = 0; 1011 } 1012 1013 i915_gem_obj_finish_shmem_access(obj); 1014 return ret; 1015 } 1016 1017 static inline bool 1018 gtt_user_read(struct io_mapping *mapping, 1019 loff_t base, int offset, 1020 char __user *user_data, int length) 1021 { 1022 void __iomem *vaddr; 1023 unsigned long unwritten; 1024 1025 /* We can use the cpu mem copy function because this is X86. */ 1026 vaddr = io_mapping_map_atomic_wc(mapping, base); 1027 unwritten = __copy_to_user_inatomic(user_data, 1028 (void __force *)vaddr + offset, 1029 length); 1030 io_mapping_unmap_atomic(vaddr); 1031 if (unwritten) { 1032 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1033 unwritten = copy_to_user(user_data, 1034 (void __force *)vaddr + offset, 1035 length); 1036 io_mapping_unmap(vaddr); 1037 } 1038 return unwritten; 1039 } 1040 1041 static int 1042 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1043 const struct drm_i915_gem_pread *args) 1044 { 1045 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1046 struct i915_ggtt *ggtt = &i915->ggtt; 1047 struct drm_mm_node node; 1048 struct i915_vma *vma; 1049 void __user *user_data; 1050 u64 remain, offset; 1051 int ret; 1052 1053 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1054 if (ret) 1055 return ret; 1056 1057 intel_runtime_pm_get(i915); 1058 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1059 PIN_MAPPABLE | 1060 PIN_NONFAULT | 1061 PIN_NONBLOCK); 1062 if (!IS_ERR(vma)) { 1063 node.start = i915_ggtt_offset(vma); 1064 node.allocated = false; 1065 ret = i915_vma_put_fence(vma); 1066 if (ret) { 1067 i915_vma_unpin(vma); 1068 vma = ERR_PTR(ret); 1069 } 1070 } 1071 if (IS_ERR(vma)) { 1072 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1073 if (ret) 1074 goto out_unlock; 1075 GEM_BUG_ON(!node.allocated); 1076 } 1077 1078 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1079 if (ret) 1080 goto out_unpin; 1081 1082 mutex_unlock(&i915->drm.struct_mutex); 1083 1084 user_data = u64_to_user_ptr(args->data_ptr); 1085 remain = args->size; 1086 offset = args->offset; 1087 1088 while (remain > 0) { 1089 /* Operation in this page 1090 * 1091 * page_base = page offset within aperture 1092 * page_offset = offset within page 1093 * page_length = bytes to copy for this page 1094 */ 1095 u32 page_base = node.start; 1096 unsigned page_offset = offset_in_page(offset); 1097 unsigned page_length = PAGE_SIZE - page_offset; 1098 page_length = remain < page_length ? remain : page_length; 1099 if (node.allocated) { 1100 wmb(); 1101 ggtt->base.insert_page(&ggtt->base, 1102 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1103 node.start, I915_CACHE_NONE, 0); 1104 wmb(); 1105 } else { 1106 page_base += offset & PAGE_MASK; 1107 } 1108 1109 if (gtt_user_read(&ggtt->mappable, page_base, page_offset, 1110 user_data, page_length)) { 1111 ret = -EFAULT; 1112 break; 1113 } 1114 1115 remain -= page_length; 1116 user_data += page_length; 1117 offset += page_length; 1118 } 1119 1120 mutex_lock(&i915->drm.struct_mutex); 1121 out_unpin: 1122 if (node.allocated) { 1123 wmb(); 1124 ggtt->base.clear_range(&ggtt->base, 1125 node.start, node.size); 1126 remove_mappable_node(&node); 1127 } else { 1128 i915_vma_unpin(vma); 1129 } 1130 out_unlock: 1131 intel_runtime_pm_put(i915); 1132 mutex_unlock(&i915->drm.struct_mutex); 1133 1134 return ret; 1135 } 1136 1137 /** 1138 * Reads data from the object referenced by handle. 1139 * @dev: drm device pointer 1140 * @data: ioctl data blob 1141 * @file: drm file pointer 1142 * 1143 * On error, the contents of *data are undefined. 1144 */ 1145 int 1146 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1147 struct drm_file *file) 1148 { 1149 struct drm_i915_gem_pread *args = data; 1150 struct drm_i915_gem_object *obj; 1151 int ret; 1152 1153 if (args->size == 0) 1154 return 0; 1155 1156 if (!access_ok(VERIFY_WRITE, 1157 u64_to_user_ptr(args->data_ptr), 1158 args->size)) 1159 return -EFAULT; 1160 1161 obj = i915_gem_object_lookup(file, args->handle); 1162 if (!obj) 1163 return -ENOENT; 1164 1165 /* Bounds check source. */ 1166 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1167 ret = -EINVAL; 1168 goto out; 1169 } 1170 1171 trace_i915_gem_object_pread(obj, args->offset, args->size); 1172 1173 ret = i915_gem_object_wait(obj, 1174 I915_WAIT_INTERRUPTIBLE, 1175 MAX_SCHEDULE_TIMEOUT, 1176 to_rps_client(file)); 1177 if (ret) 1178 goto out; 1179 1180 ret = i915_gem_object_pin_pages(obj); 1181 if (ret) 1182 goto out; 1183 1184 ret = i915_gem_shmem_pread(obj, args); 1185 if (ret == -EFAULT || ret == -ENODEV) 1186 ret = i915_gem_gtt_pread(obj, args); 1187 1188 i915_gem_object_unpin_pages(obj); 1189 out: 1190 i915_gem_object_put(obj); 1191 return ret; 1192 } 1193 1194 /* This is the fast write path which cannot handle 1195 * page faults in the source data 1196 */ 1197 1198 static inline bool 1199 ggtt_write(struct io_mapping *mapping, 1200 loff_t base, int offset, 1201 char __user *user_data, int length) 1202 { 1203 void __iomem *vaddr; 1204 unsigned long unwritten; 1205 1206 /* We can use the cpu mem copy function because this is X86. */ 1207 vaddr = io_mapping_map_atomic_wc(mapping, base); 1208 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1209 user_data, length); 1210 io_mapping_unmap_atomic(vaddr); 1211 if (unwritten) { 1212 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1213 unwritten = copy_from_user((void __force *)vaddr + offset, 1214 user_data, length); 1215 io_mapping_unmap(vaddr); 1216 } 1217 1218 return unwritten; 1219 } 1220 1221 /** 1222 * This is the fast pwrite path, where we copy the data directly from the 1223 * user into the GTT, uncached. 1224 * @obj: i915 GEM object 1225 * @args: pwrite arguments structure 1226 */ 1227 static int 1228 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1229 const struct drm_i915_gem_pwrite *args) 1230 { 1231 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1232 struct i915_ggtt *ggtt = &i915->ggtt; 1233 struct drm_mm_node node; 1234 struct i915_vma *vma; 1235 u64 remain, offset; 1236 void __user *user_data; 1237 int ret; 1238 1239 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1240 if (ret) 1241 return ret; 1242 1243 if (i915_gem_object_has_struct_page(obj)) { 1244 /* 1245 * Avoid waking the device up if we can fallback, as 1246 * waking/resuming is very slow (worst-case 10-100 ms 1247 * depending on PCI sleeps and our own resume time). 1248 * This easily dwarfs any performance advantage from 1249 * using the cache bypass of indirect GGTT access. 1250 */ 1251 if (!intel_runtime_pm_get_if_in_use(i915)) { 1252 ret = -EFAULT; 1253 goto out_unlock; 1254 } 1255 } else { 1256 /* No backing pages, no fallback, we must force GGTT access */ 1257 intel_runtime_pm_get(i915); 1258 } 1259 1260 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1261 PIN_MAPPABLE | 1262 PIN_NONFAULT | 1263 PIN_NONBLOCK); 1264 if (!IS_ERR(vma)) { 1265 node.start = i915_ggtt_offset(vma); 1266 node.allocated = false; 1267 ret = i915_vma_put_fence(vma); 1268 if (ret) { 1269 i915_vma_unpin(vma); 1270 vma = ERR_PTR(ret); 1271 } 1272 } 1273 if (IS_ERR(vma)) { 1274 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1275 if (ret) 1276 goto out_rpm; 1277 GEM_BUG_ON(!node.allocated); 1278 } 1279 1280 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1281 if (ret) 1282 goto out_unpin; 1283 1284 mutex_unlock(&i915->drm.struct_mutex); 1285 1286 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1287 1288 user_data = u64_to_user_ptr(args->data_ptr); 1289 offset = args->offset; 1290 remain = args->size; 1291 while (remain) { 1292 /* Operation in this page 1293 * 1294 * page_base = page offset within aperture 1295 * page_offset = offset within page 1296 * page_length = bytes to copy for this page 1297 */ 1298 u32 page_base = node.start; 1299 unsigned int page_offset = offset_in_page(offset); 1300 unsigned int page_length = PAGE_SIZE - page_offset; 1301 page_length = remain < page_length ? remain : page_length; 1302 if (node.allocated) { 1303 wmb(); /* flush the write before we modify the GGTT */ 1304 ggtt->base.insert_page(&ggtt->base, 1305 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1306 node.start, I915_CACHE_NONE, 0); 1307 wmb(); /* flush modifications to the GGTT (insert_page) */ 1308 } else { 1309 page_base += offset & PAGE_MASK; 1310 } 1311 /* If we get a fault while copying data, then (presumably) our 1312 * source page isn't available. Return the error and we'll 1313 * retry in the slow path. 1314 * If the object is non-shmem backed, we retry again with the 1315 * path that handles page fault. 1316 */ 1317 if (ggtt_write(&ggtt->mappable, page_base, page_offset, 1318 user_data, page_length)) { 1319 ret = -EFAULT; 1320 break; 1321 } 1322 1323 remain -= page_length; 1324 user_data += page_length; 1325 offset += page_length; 1326 } 1327 intel_fb_obj_flush(obj, ORIGIN_CPU); 1328 1329 mutex_lock(&i915->drm.struct_mutex); 1330 out_unpin: 1331 if (node.allocated) { 1332 wmb(); 1333 ggtt->base.clear_range(&ggtt->base, 1334 node.start, node.size); 1335 remove_mappable_node(&node); 1336 } else { 1337 i915_vma_unpin(vma); 1338 } 1339 out_rpm: 1340 intel_runtime_pm_put(i915); 1341 out_unlock: 1342 mutex_unlock(&i915->drm.struct_mutex); 1343 return ret; 1344 } 1345 1346 static int 1347 shmem_pwrite_slow(struct page *page, int offset, int length, 1348 char __user *user_data, 1349 bool page_do_bit17_swizzling, 1350 bool needs_clflush_before, 1351 bool needs_clflush_after) 1352 { 1353 char *vaddr; 1354 int ret; 1355 1356 vaddr = kmap(page); 1357 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1358 shmem_clflush_swizzled_range(vaddr + offset, length, 1359 page_do_bit17_swizzling); 1360 if (page_do_bit17_swizzling) 1361 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1362 length); 1363 else 1364 ret = __copy_from_user(vaddr + offset, user_data, length); 1365 if (needs_clflush_after) 1366 shmem_clflush_swizzled_range(vaddr + offset, length, 1367 page_do_bit17_swizzling); 1368 kunmap(page); 1369 1370 return ret ? -EFAULT : 0; 1371 } 1372 1373 /* Per-page copy function for the shmem pwrite fastpath. 1374 * Flushes invalid cachelines before writing to the target if 1375 * needs_clflush_before is set and flushes out any written cachelines after 1376 * writing if needs_clflush is set. 1377 */ 1378 static int 1379 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1380 bool page_do_bit17_swizzling, 1381 bool needs_clflush_before, 1382 bool needs_clflush_after) 1383 { 1384 int ret; 1385 1386 ret = -ENODEV; 1387 if (!page_do_bit17_swizzling) { 1388 char *vaddr = kmap_atomic(page); 1389 1390 if (needs_clflush_before) 1391 drm_clflush_virt_range(vaddr + offset, len); 1392 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1393 if (needs_clflush_after) 1394 drm_clflush_virt_range(vaddr + offset, len); 1395 1396 kunmap_atomic(vaddr); 1397 } 1398 if (ret == 0) 1399 return ret; 1400 1401 return shmem_pwrite_slow(page, offset, len, user_data, 1402 page_do_bit17_swizzling, 1403 needs_clflush_before, 1404 needs_clflush_after); 1405 } 1406 1407 static int 1408 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1409 const struct drm_i915_gem_pwrite *args) 1410 { 1411 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1412 void __user *user_data; 1413 u64 remain; 1414 unsigned int obj_do_bit17_swizzling; 1415 unsigned int partial_cacheline_write; 1416 unsigned int needs_clflush; 1417 unsigned int offset, idx; 1418 int ret; 1419 1420 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1421 if (ret) 1422 return ret; 1423 1424 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1425 mutex_unlock(&i915->drm.struct_mutex); 1426 if (ret) 1427 return ret; 1428 1429 obj_do_bit17_swizzling = 0; 1430 if (i915_gem_object_needs_bit17_swizzle(obj)) 1431 obj_do_bit17_swizzling = BIT(17); 1432 1433 /* If we don't overwrite a cacheline completely we need to be 1434 * careful to have up-to-date data by first clflushing. Don't 1435 * overcomplicate things and flush the entire patch. 1436 */ 1437 partial_cacheline_write = 0; 1438 if (needs_clflush & CLFLUSH_BEFORE) 1439 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1440 1441 user_data = u64_to_user_ptr(args->data_ptr); 1442 remain = args->size; 1443 offset = offset_in_page(args->offset); 1444 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1445 struct page *page = i915_gem_object_get_page(obj, idx); 1446 int length; 1447 1448 length = remain; 1449 if (offset + length > PAGE_SIZE) 1450 length = PAGE_SIZE - offset; 1451 1452 ret = shmem_pwrite(page, offset, length, user_data, 1453 page_to_phys(page) & obj_do_bit17_swizzling, 1454 (offset | length) & partial_cacheline_write, 1455 needs_clflush & CLFLUSH_AFTER); 1456 if (ret) 1457 break; 1458 1459 remain -= length; 1460 user_data += length; 1461 offset = 0; 1462 } 1463 1464 intel_fb_obj_flush(obj, ORIGIN_CPU); 1465 i915_gem_obj_finish_shmem_access(obj); 1466 return ret; 1467 } 1468 1469 /** 1470 * Writes data to the object referenced by handle. 1471 * @dev: drm device 1472 * @data: ioctl data blob 1473 * @file: drm file 1474 * 1475 * On error, the contents of the buffer that were to be modified are undefined. 1476 */ 1477 int 1478 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1479 struct drm_file *file) 1480 { 1481 struct drm_i915_gem_pwrite *args = data; 1482 struct drm_i915_gem_object *obj; 1483 int ret; 1484 1485 if (args->size == 0) 1486 return 0; 1487 1488 if (!access_ok(VERIFY_READ, 1489 u64_to_user_ptr(args->data_ptr), 1490 args->size)) 1491 return -EFAULT; 1492 1493 obj = i915_gem_object_lookup(file, args->handle); 1494 if (!obj) 1495 return -ENOENT; 1496 1497 /* Bounds check destination. */ 1498 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1499 ret = -EINVAL; 1500 goto err; 1501 } 1502 1503 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1504 1505 ret = -ENODEV; 1506 if (obj->ops->pwrite) 1507 ret = obj->ops->pwrite(obj, args); 1508 if (ret != -ENODEV) 1509 goto err; 1510 1511 ret = i915_gem_object_wait(obj, 1512 I915_WAIT_INTERRUPTIBLE | 1513 I915_WAIT_ALL, 1514 MAX_SCHEDULE_TIMEOUT, 1515 to_rps_client(file)); 1516 if (ret) 1517 goto err; 1518 1519 ret = i915_gem_object_pin_pages(obj); 1520 if (ret) 1521 goto err; 1522 1523 ret = -EFAULT; 1524 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1525 * it would end up going through the fenced access, and we'll get 1526 * different detiling behavior between reading and writing. 1527 * pread/pwrite currently are reading and writing from the CPU 1528 * perspective, requiring manual detiling by the client. 1529 */ 1530 if (!i915_gem_object_has_struct_page(obj) || 1531 cpu_write_needs_clflush(obj)) 1532 /* Note that the gtt paths might fail with non-page-backed user 1533 * pointers (e.g. gtt mappings when moving data between 1534 * textures). Fallback to the shmem path in that case. 1535 */ 1536 ret = i915_gem_gtt_pwrite_fast(obj, args); 1537 1538 if (ret == -EFAULT || ret == -ENOSPC) { 1539 if (obj->phys_handle) 1540 ret = i915_gem_phys_pwrite(obj, args, file); 1541 else 1542 ret = i915_gem_shmem_pwrite(obj, args); 1543 } 1544 1545 i915_gem_object_unpin_pages(obj); 1546 err: 1547 i915_gem_object_put(obj); 1548 return ret; 1549 } 1550 1551 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1552 { 1553 struct drm_i915_private *i915; 1554 struct list_head *list; 1555 struct i915_vma *vma; 1556 1557 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1558 1559 list_for_each_entry(vma, &obj->vma_list, obj_link) { 1560 if (!i915_vma_is_ggtt(vma)) 1561 break; 1562 1563 if (i915_vma_is_active(vma)) 1564 continue; 1565 1566 if (!drm_mm_node_allocated(&vma->node)) 1567 continue; 1568 1569 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1570 } 1571 1572 i915 = to_i915(obj->base.dev); 1573 spin_lock(&i915->mm.obj_lock); 1574 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1575 list_move_tail(&obj->mm.link, list); 1576 spin_unlock(&i915->mm.obj_lock); 1577 } 1578 1579 /** 1580 * Called when user space prepares to use an object with the CPU, either 1581 * through the mmap ioctl's mapping or a GTT mapping. 1582 * @dev: drm device 1583 * @data: ioctl data blob 1584 * @file: drm file 1585 */ 1586 int 1587 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1588 struct drm_file *file) 1589 { 1590 struct drm_i915_gem_set_domain *args = data; 1591 struct drm_i915_gem_object *obj; 1592 uint32_t read_domains = args->read_domains; 1593 uint32_t write_domain = args->write_domain; 1594 int err; 1595 1596 /* Only handle setting domains to types used by the CPU. */ 1597 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1598 return -EINVAL; 1599 1600 /* Having something in the write domain implies it's in the read 1601 * domain, and only that read domain. Enforce that in the request. 1602 */ 1603 if (write_domain != 0 && read_domains != write_domain) 1604 return -EINVAL; 1605 1606 obj = i915_gem_object_lookup(file, args->handle); 1607 if (!obj) 1608 return -ENOENT; 1609 1610 /* Try to flush the object off the GPU without holding the lock. 1611 * We will repeat the flush holding the lock in the normal manner 1612 * to catch cases where we are gazumped. 1613 */ 1614 err = i915_gem_object_wait(obj, 1615 I915_WAIT_INTERRUPTIBLE | 1616 (write_domain ? I915_WAIT_ALL : 0), 1617 MAX_SCHEDULE_TIMEOUT, 1618 to_rps_client(file)); 1619 if (err) 1620 goto out; 1621 1622 /* 1623 * Proxy objects do not control access to the backing storage, ergo 1624 * they cannot be used as a means to manipulate the cache domain 1625 * tracking for that backing storage. The proxy object is always 1626 * considered to be outside of any cache domain. 1627 */ 1628 if (i915_gem_object_is_proxy(obj)) { 1629 err = -ENXIO; 1630 goto out; 1631 } 1632 1633 /* 1634 * Flush and acquire obj->pages so that we are coherent through 1635 * direct access in memory with previous cached writes through 1636 * shmemfs and that our cache domain tracking remains valid. 1637 * For example, if the obj->filp was moved to swap without us 1638 * being notified and releasing the pages, we would mistakenly 1639 * continue to assume that the obj remained out of the CPU cached 1640 * domain. 1641 */ 1642 err = i915_gem_object_pin_pages(obj); 1643 if (err) 1644 goto out; 1645 1646 err = i915_mutex_lock_interruptible(dev); 1647 if (err) 1648 goto out_unpin; 1649 1650 if (read_domains & I915_GEM_DOMAIN_WC) 1651 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1652 else if (read_domains & I915_GEM_DOMAIN_GTT) 1653 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1654 else 1655 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1656 1657 /* And bump the LRU for this access */ 1658 i915_gem_object_bump_inactive_ggtt(obj); 1659 1660 mutex_unlock(&dev->struct_mutex); 1661 1662 if (write_domain != 0) 1663 intel_fb_obj_invalidate(obj, 1664 fb_write_origin(obj, write_domain)); 1665 1666 out_unpin: 1667 i915_gem_object_unpin_pages(obj); 1668 out: 1669 i915_gem_object_put(obj); 1670 return err; 1671 } 1672 1673 /** 1674 * Called when user space has done writes to this buffer 1675 * @dev: drm device 1676 * @data: ioctl data blob 1677 * @file: drm file 1678 */ 1679 int 1680 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1681 struct drm_file *file) 1682 { 1683 struct drm_i915_gem_sw_finish *args = data; 1684 struct drm_i915_gem_object *obj; 1685 1686 obj = i915_gem_object_lookup(file, args->handle); 1687 if (!obj) 1688 return -ENOENT; 1689 1690 /* 1691 * Proxy objects are barred from CPU access, so there is no 1692 * need to ban sw_finish as it is a nop. 1693 */ 1694 1695 /* Pinned buffers may be scanout, so flush the cache */ 1696 i915_gem_object_flush_if_display(obj); 1697 i915_gem_object_put(obj); 1698 1699 return 0; 1700 } 1701 1702 /** 1703 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1704 * it is mapped to. 1705 * @dev: drm device 1706 * @data: ioctl data blob 1707 * @file: drm file 1708 * 1709 * While the mapping holds a reference on the contents of the object, it doesn't 1710 * imply a ref on the object itself. 1711 * 1712 * IMPORTANT: 1713 * 1714 * DRM driver writers who look a this function as an example for how to do GEM 1715 * mmap support, please don't implement mmap support like here. The modern way 1716 * to implement DRM mmap support is with an mmap offset ioctl (like 1717 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1718 * That way debug tooling like valgrind will understand what's going on, hiding 1719 * the mmap call in a driver private ioctl will break that. The i915 driver only 1720 * does cpu mmaps this way because we didn't know better. 1721 */ 1722 int 1723 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1724 struct drm_file *file) 1725 { 1726 struct drm_i915_gem_mmap *args = data; 1727 struct drm_i915_gem_object *obj; 1728 unsigned long addr; 1729 1730 if (args->flags & ~(I915_MMAP_WC)) 1731 return -EINVAL; 1732 1733 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1734 return -ENODEV; 1735 1736 obj = i915_gem_object_lookup(file, args->handle); 1737 if (!obj) 1738 return -ENOENT; 1739 1740 /* prime objects have no backing filp to GEM mmap 1741 * pages from. 1742 */ 1743 if (!obj->base.filp) { 1744 i915_gem_object_put(obj); 1745 return -ENXIO; 1746 } 1747 1748 addr = vm_mmap(obj->base.filp, 0, args->size, 1749 PROT_READ | PROT_WRITE, MAP_SHARED, 1750 args->offset); 1751 if (args->flags & I915_MMAP_WC) { 1752 struct mm_struct *mm = current->mm; 1753 struct vm_area_struct *vma; 1754 1755 if (down_write_killable(&mm->mmap_sem)) { 1756 i915_gem_object_put(obj); 1757 return -EINTR; 1758 } 1759 vma = find_vma(mm, addr); 1760 if (vma) 1761 vma->vm_page_prot = 1762 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1763 else 1764 addr = -ENOMEM; 1765 up_write(&mm->mmap_sem); 1766 1767 /* This may race, but that's ok, it only gets set */ 1768 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1769 } 1770 i915_gem_object_put(obj); 1771 if (IS_ERR((void *)addr)) 1772 return addr; 1773 1774 args->addr_ptr = (uint64_t) addr; 1775 1776 return 0; 1777 } 1778 1779 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1780 { 1781 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1782 } 1783 1784 /** 1785 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1786 * 1787 * A history of the GTT mmap interface: 1788 * 1789 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1790 * aligned and suitable for fencing, and still fit into the available 1791 * mappable space left by the pinned display objects. A classic problem 1792 * we called the page-fault-of-doom where we would ping-pong between 1793 * two objects that could not fit inside the GTT and so the memcpy 1794 * would page one object in at the expense of the other between every 1795 * single byte. 1796 * 1797 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1798 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1799 * object is too large for the available space (or simply too large 1800 * for the mappable aperture!), a view is created instead and faulted 1801 * into userspace. (This view is aligned and sized appropriately for 1802 * fenced access.) 1803 * 1804 * 2 - Recognise WC as a separate cache domain so that we can flush the 1805 * delayed writes via GTT before performing direct access via WC. 1806 * 1807 * Restrictions: 1808 * 1809 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1810 * hangs on some architectures, corruption on others. An attempt to service 1811 * a GTT page fault from a snoopable object will generate a SIGBUS. 1812 * 1813 * * the object must be able to fit into RAM (physical memory, though no 1814 * limited to the mappable aperture). 1815 * 1816 * 1817 * Caveats: 1818 * 1819 * * a new GTT page fault will synchronize rendering from the GPU and flush 1820 * all data to system memory. Subsequent access will not be synchronized. 1821 * 1822 * * all mappings are revoked on runtime device suspend. 1823 * 1824 * * there are only 8, 16 or 32 fence registers to share between all users 1825 * (older machines require fence register for display and blitter access 1826 * as well). Contention of the fence registers will cause the previous users 1827 * to be unmapped and any new access will generate new page faults. 1828 * 1829 * * running out of memory while servicing a fault may generate a SIGBUS, 1830 * rather than the expected SIGSEGV. 1831 */ 1832 int i915_gem_mmap_gtt_version(void) 1833 { 1834 return 2; 1835 } 1836 1837 static inline struct i915_ggtt_view 1838 compute_partial_view(struct drm_i915_gem_object *obj, 1839 pgoff_t page_offset, 1840 unsigned int chunk) 1841 { 1842 struct i915_ggtt_view view; 1843 1844 if (i915_gem_object_is_tiled(obj)) 1845 chunk = roundup(chunk, tile_row_pages(obj)); 1846 1847 view.type = I915_GGTT_VIEW_PARTIAL; 1848 view.partial.offset = rounddown(page_offset, chunk); 1849 view.partial.size = 1850 min_t(unsigned int, chunk, 1851 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1852 1853 /* If the partial covers the entire object, just create a normal VMA. */ 1854 if (chunk >= obj->base.size >> PAGE_SHIFT) 1855 view.type = I915_GGTT_VIEW_NORMAL; 1856 1857 return view; 1858 } 1859 1860 /** 1861 * i915_gem_fault - fault a page into the GTT 1862 * @vmf: fault info 1863 * 1864 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1865 * from userspace. The fault handler takes care of binding the object to 1866 * the GTT (if needed), allocating and programming a fence register (again, 1867 * only if needed based on whether the old reg is still valid or the object 1868 * is tiled) and inserting a new PTE into the faulting process. 1869 * 1870 * Note that the faulting process may involve evicting existing objects 1871 * from the GTT and/or fence registers to make room. So performance may 1872 * suffer if the GTT working set is large or there are few fence registers 1873 * left. 1874 * 1875 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1876 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1877 */ 1878 int i915_gem_fault(struct vm_fault *vmf) 1879 { 1880 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1881 struct vm_area_struct *area = vmf->vma; 1882 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1883 struct drm_device *dev = obj->base.dev; 1884 struct drm_i915_private *dev_priv = to_i915(dev); 1885 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1886 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1887 struct i915_vma *vma; 1888 pgoff_t page_offset; 1889 unsigned int flags; 1890 int ret; 1891 1892 /* We don't use vmf->pgoff since that has the fake offset */ 1893 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1894 1895 trace_i915_gem_object_fault(obj, page_offset, true, write); 1896 1897 /* Try to flush the object off the GPU first without holding the lock. 1898 * Upon acquiring the lock, we will perform our sanity checks and then 1899 * repeat the flush holding the lock in the normal manner to catch cases 1900 * where we are gazumped. 1901 */ 1902 ret = i915_gem_object_wait(obj, 1903 I915_WAIT_INTERRUPTIBLE, 1904 MAX_SCHEDULE_TIMEOUT, 1905 NULL); 1906 if (ret) 1907 goto err; 1908 1909 ret = i915_gem_object_pin_pages(obj); 1910 if (ret) 1911 goto err; 1912 1913 intel_runtime_pm_get(dev_priv); 1914 1915 ret = i915_mutex_lock_interruptible(dev); 1916 if (ret) 1917 goto err_rpm; 1918 1919 /* Access to snoopable pages through the GTT is incoherent. */ 1920 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1921 ret = -EFAULT; 1922 goto err_unlock; 1923 } 1924 1925 /* If the object is smaller than a couple of partial vma, it is 1926 * not worth only creating a single partial vma - we may as well 1927 * clear enough space for the full object. 1928 */ 1929 flags = PIN_MAPPABLE; 1930 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) 1931 flags |= PIN_NONBLOCK | PIN_NONFAULT; 1932 1933 /* Now pin it into the GTT as needed */ 1934 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); 1935 if (IS_ERR(vma)) { 1936 /* Use a partial view if it is bigger than available space */ 1937 struct i915_ggtt_view view = 1938 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1939 1940 /* Userspace is now writing through an untracked VMA, abandon 1941 * all hope that the hardware is able to track future writes. 1942 */ 1943 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1944 1945 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1946 } 1947 if (IS_ERR(vma)) { 1948 ret = PTR_ERR(vma); 1949 goto err_unlock; 1950 } 1951 1952 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1953 if (ret) 1954 goto err_unpin; 1955 1956 ret = i915_vma_pin_fence(vma); 1957 if (ret) 1958 goto err_unpin; 1959 1960 /* Finally, remap it using the new GTT offset */ 1961 ret = remap_io_mapping(area, 1962 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1963 (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, 1964 min_t(u64, vma->size, area->vm_end - area->vm_start), 1965 &ggtt->mappable); 1966 if (ret) 1967 goto err_fence; 1968 1969 /* Mark as being mmapped into userspace for later revocation */ 1970 assert_rpm_wakelock_held(dev_priv); 1971 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1972 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1973 GEM_BUG_ON(!obj->userfault_count); 1974 1975 err_fence: 1976 i915_vma_unpin_fence(vma); 1977 err_unpin: 1978 __i915_vma_unpin(vma); 1979 err_unlock: 1980 mutex_unlock(&dev->struct_mutex); 1981 err_rpm: 1982 intel_runtime_pm_put(dev_priv); 1983 i915_gem_object_unpin_pages(obj); 1984 err: 1985 switch (ret) { 1986 case -EIO: 1987 /* 1988 * We eat errors when the gpu is terminally wedged to avoid 1989 * userspace unduly crashing (gl has no provisions for mmaps to 1990 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1991 * and so needs to be reported. 1992 */ 1993 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1994 ret = VM_FAULT_SIGBUS; 1995 break; 1996 } 1997 case -EAGAIN: 1998 /* 1999 * EAGAIN means the gpu is hung and we'll wait for the error 2000 * handler to reset everything when re-faulting in 2001 * i915_mutex_lock_interruptible. 2002 */ 2003 case 0: 2004 case -ERESTARTSYS: 2005 case -EINTR: 2006 case -EBUSY: 2007 /* 2008 * EBUSY is ok: this just means that another thread 2009 * already did the job. 2010 */ 2011 ret = VM_FAULT_NOPAGE; 2012 break; 2013 case -ENOMEM: 2014 ret = VM_FAULT_OOM; 2015 break; 2016 case -ENOSPC: 2017 case -EFAULT: 2018 ret = VM_FAULT_SIGBUS; 2019 break; 2020 default: 2021 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2022 ret = VM_FAULT_SIGBUS; 2023 break; 2024 } 2025 return ret; 2026 } 2027 2028 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2029 { 2030 struct i915_vma *vma; 2031 2032 GEM_BUG_ON(!obj->userfault_count); 2033 2034 obj->userfault_count = 0; 2035 list_del(&obj->userfault_link); 2036 drm_vma_node_unmap(&obj->base.vma_node, 2037 obj->base.dev->anon_inode->i_mapping); 2038 2039 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2040 if (!i915_vma_is_ggtt(vma)) 2041 break; 2042 2043 i915_vma_unset_userfault(vma); 2044 } 2045 } 2046 2047 /** 2048 * i915_gem_release_mmap - remove physical page mappings 2049 * @obj: obj in question 2050 * 2051 * Preserve the reservation of the mmapping with the DRM core code, but 2052 * relinquish ownership of the pages back to the system. 2053 * 2054 * It is vital that we remove the page mapping if we have mapped a tiled 2055 * object through the GTT and then lose the fence register due to 2056 * resource pressure. Similarly if the object has been moved out of the 2057 * aperture, than pages mapped into userspace must be revoked. Removing the 2058 * mapping will then trigger a page fault on the next user access, allowing 2059 * fixup by i915_gem_fault(). 2060 */ 2061 void 2062 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2063 { 2064 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2065 2066 /* Serialisation between user GTT access and our code depends upon 2067 * revoking the CPU's PTE whilst the mutex is held. The next user 2068 * pagefault then has to wait until we release the mutex. 2069 * 2070 * Note that RPM complicates somewhat by adding an additional 2071 * requirement that operations to the GGTT be made holding the RPM 2072 * wakeref. 2073 */ 2074 lockdep_assert_held(&i915->drm.struct_mutex); 2075 intel_runtime_pm_get(i915); 2076 2077 if (!obj->userfault_count) 2078 goto out; 2079 2080 __i915_gem_object_release_mmap(obj); 2081 2082 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2083 * memory transactions from userspace before we return. The TLB 2084 * flushing implied above by changing the PTE above *should* be 2085 * sufficient, an extra barrier here just provides us with a bit 2086 * of paranoid documentation about our requirement to serialise 2087 * memory writes before touching registers / GSM. 2088 */ 2089 wmb(); 2090 2091 out: 2092 intel_runtime_pm_put(i915); 2093 } 2094 2095 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2096 { 2097 struct drm_i915_gem_object *obj, *on; 2098 int i; 2099 2100 /* 2101 * Only called during RPM suspend. All users of the userfault_list 2102 * must be holding an RPM wakeref to ensure that this can not 2103 * run concurrently with themselves (and use the struct_mutex for 2104 * protection between themselves). 2105 */ 2106 2107 list_for_each_entry_safe(obj, on, 2108 &dev_priv->mm.userfault_list, userfault_link) 2109 __i915_gem_object_release_mmap(obj); 2110 2111 /* The fence will be lost when the device powers down. If any were 2112 * in use by hardware (i.e. they are pinned), we should not be powering 2113 * down! All other fences will be reacquired by the user upon waking. 2114 */ 2115 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2116 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2117 2118 /* Ideally we want to assert that the fence register is not 2119 * live at this point (i.e. that no piece of code will be 2120 * trying to write through fence + GTT, as that both violates 2121 * our tracking of activity and associated locking/barriers, 2122 * but also is illegal given that the hw is powered down). 2123 * 2124 * Previously we used reg->pin_count as a "liveness" indicator. 2125 * That is not sufficient, and we need a more fine-grained 2126 * tool if we want to have a sanity check here. 2127 */ 2128 2129 if (!reg->vma) 2130 continue; 2131 2132 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2133 reg->dirty = true; 2134 } 2135 } 2136 2137 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2138 { 2139 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2140 int err; 2141 2142 err = drm_gem_create_mmap_offset(&obj->base); 2143 if (likely(!err)) 2144 return 0; 2145 2146 /* Attempt to reap some mmap space from dead objects */ 2147 do { 2148 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2149 if (err) 2150 break; 2151 2152 i915_gem_drain_freed_objects(dev_priv); 2153 err = drm_gem_create_mmap_offset(&obj->base); 2154 if (!err) 2155 break; 2156 2157 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2158 2159 return err; 2160 } 2161 2162 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2163 { 2164 drm_gem_free_mmap_offset(&obj->base); 2165 } 2166 2167 int 2168 i915_gem_mmap_gtt(struct drm_file *file, 2169 struct drm_device *dev, 2170 uint32_t handle, 2171 uint64_t *offset) 2172 { 2173 struct drm_i915_gem_object *obj; 2174 int ret; 2175 2176 obj = i915_gem_object_lookup(file, handle); 2177 if (!obj) 2178 return -ENOENT; 2179 2180 ret = i915_gem_object_create_mmap_offset(obj); 2181 if (ret == 0) 2182 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2183 2184 i915_gem_object_put(obj); 2185 return ret; 2186 } 2187 2188 /** 2189 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2190 * @dev: DRM device 2191 * @data: GTT mapping ioctl data 2192 * @file: GEM object info 2193 * 2194 * Simply returns the fake offset to userspace so it can mmap it. 2195 * The mmap call will end up in drm_gem_mmap(), which will set things 2196 * up so we can get faults in the handler above. 2197 * 2198 * The fault handler will take care of binding the object into the GTT 2199 * (since it may have been evicted to make room for something), allocating 2200 * a fence register, and mapping the appropriate aperture address into 2201 * userspace. 2202 */ 2203 int 2204 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2205 struct drm_file *file) 2206 { 2207 struct drm_i915_gem_mmap_gtt *args = data; 2208 2209 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2210 } 2211 2212 /* Immediately discard the backing storage */ 2213 static void 2214 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2215 { 2216 i915_gem_object_free_mmap_offset(obj); 2217 2218 if (obj->base.filp == NULL) 2219 return; 2220 2221 /* Our goal here is to return as much of the memory as 2222 * is possible back to the system as we are called from OOM. 2223 * To do this we must instruct the shmfs to drop all of its 2224 * backing pages, *now*. 2225 */ 2226 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2227 obj->mm.madv = __I915_MADV_PURGED; 2228 obj->mm.pages = ERR_PTR(-EFAULT); 2229 } 2230 2231 /* Try to discard unwanted pages */ 2232 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2233 { 2234 struct address_space *mapping; 2235 2236 lockdep_assert_held(&obj->mm.lock); 2237 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2238 2239 switch (obj->mm.madv) { 2240 case I915_MADV_DONTNEED: 2241 i915_gem_object_truncate(obj); 2242 case __I915_MADV_PURGED: 2243 return; 2244 } 2245 2246 if (obj->base.filp == NULL) 2247 return; 2248 2249 mapping = obj->base.filp->f_mapping, 2250 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2251 } 2252 2253 static void 2254 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2255 struct sg_table *pages) 2256 { 2257 struct sgt_iter sgt_iter; 2258 struct page *page; 2259 2260 __i915_gem_object_release_shmem(obj, pages, true); 2261 2262 i915_gem_gtt_finish_pages(obj, pages); 2263 2264 if (i915_gem_object_needs_bit17_swizzle(obj)) 2265 i915_gem_object_save_bit_17_swizzle(obj, pages); 2266 2267 for_each_sgt_page(page, sgt_iter, pages) { 2268 if (obj->mm.dirty) 2269 set_page_dirty(page); 2270 2271 if (obj->mm.madv == I915_MADV_WILLNEED) 2272 mark_page_accessed(page); 2273 2274 put_page(page); 2275 } 2276 obj->mm.dirty = false; 2277 2278 sg_free_table(pages); 2279 kfree(pages); 2280 } 2281 2282 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2283 { 2284 struct radix_tree_iter iter; 2285 void __rcu **slot; 2286 2287 rcu_read_lock(); 2288 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2289 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2290 rcu_read_unlock(); 2291 } 2292 2293 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2294 enum i915_mm_subclass subclass) 2295 { 2296 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2297 struct sg_table *pages; 2298 2299 if (i915_gem_object_has_pinned_pages(obj)) 2300 return; 2301 2302 GEM_BUG_ON(obj->bind_count); 2303 if (!i915_gem_object_has_pages(obj)) 2304 return; 2305 2306 /* May be called by shrinker from within get_pages() (on another bo) */ 2307 mutex_lock_nested(&obj->mm.lock, subclass); 2308 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2309 goto unlock; 2310 2311 /* ->put_pages might need to allocate memory for the bit17 swizzle 2312 * array, hence protect them from being reaped by removing them from gtt 2313 * lists early. */ 2314 pages = fetch_and_zero(&obj->mm.pages); 2315 GEM_BUG_ON(!pages); 2316 2317 spin_lock(&i915->mm.obj_lock); 2318 list_del(&obj->mm.link); 2319 spin_unlock(&i915->mm.obj_lock); 2320 2321 if (obj->mm.mapping) { 2322 void *ptr; 2323 2324 ptr = page_mask_bits(obj->mm.mapping); 2325 if (is_vmalloc_addr(ptr)) 2326 vunmap(ptr); 2327 else 2328 kunmap(kmap_to_page(ptr)); 2329 2330 obj->mm.mapping = NULL; 2331 } 2332 2333 __i915_gem_object_reset_page_iter(obj); 2334 2335 if (!IS_ERR(pages)) 2336 obj->ops->put_pages(obj, pages); 2337 2338 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2339 2340 unlock: 2341 mutex_unlock(&obj->mm.lock); 2342 } 2343 2344 static bool i915_sg_trim(struct sg_table *orig_st) 2345 { 2346 struct sg_table new_st; 2347 struct scatterlist *sg, *new_sg; 2348 unsigned int i; 2349 2350 if (orig_st->nents == orig_st->orig_nents) 2351 return false; 2352 2353 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2354 return false; 2355 2356 new_sg = new_st.sgl; 2357 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2358 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2359 /* called before being DMA mapped, no need to copy sg->dma_* */ 2360 new_sg = sg_next(new_sg); 2361 } 2362 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2363 2364 sg_free_table(orig_st); 2365 2366 *orig_st = new_st; 2367 return true; 2368 } 2369 2370 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2371 { 2372 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2373 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2374 unsigned long i; 2375 struct address_space *mapping; 2376 struct sg_table *st; 2377 struct scatterlist *sg; 2378 struct sgt_iter sgt_iter; 2379 struct page *page; 2380 unsigned long last_pfn = 0; /* suppress gcc warning */ 2381 unsigned int max_segment = i915_sg_segment_size(); 2382 unsigned int sg_page_sizes; 2383 gfp_t noreclaim; 2384 int ret; 2385 2386 /* Assert that the object is not currently in any GPU domain. As it 2387 * wasn't in the GTT, there shouldn't be any way it could have been in 2388 * a GPU cache 2389 */ 2390 GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2391 GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2392 2393 st = kmalloc(sizeof(*st), GFP_KERNEL); 2394 if (st == NULL) 2395 return -ENOMEM; 2396 2397 rebuild_st: 2398 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2399 kfree(st); 2400 return -ENOMEM; 2401 } 2402 2403 /* Get the list of pages out of our struct file. They'll be pinned 2404 * at this point until we release them. 2405 * 2406 * Fail silently without starting the shrinker 2407 */ 2408 mapping = obj->base.filp->f_mapping; 2409 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2410 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2411 2412 sg = st->sgl; 2413 st->nents = 0; 2414 sg_page_sizes = 0; 2415 for (i = 0; i < page_count; i++) { 2416 const unsigned int shrink[] = { 2417 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2418 0, 2419 }, *s = shrink; 2420 gfp_t gfp = noreclaim; 2421 2422 do { 2423 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2424 if (likely(!IS_ERR(page))) 2425 break; 2426 2427 if (!*s) { 2428 ret = PTR_ERR(page); 2429 goto err_sg; 2430 } 2431 2432 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2433 cond_resched(); 2434 2435 /* We've tried hard to allocate the memory by reaping 2436 * our own buffer, now let the real VM do its job and 2437 * go down in flames if truly OOM. 2438 * 2439 * However, since graphics tend to be disposable, 2440 * defer the oom here by reporting the ENOMEM back 2441 * to userspace. 2442 */ 2443 if (!*s) { 2444 /* reclaim and warn, but no oom */ 2445 gfp = mapping_gfp_mask(mapping); 2446 2447 /* Our bo are always dirty and so we require 2448 * kswapd to reclaim our pages (direct reclaim 2449 * does not effectively begin pageout of our 2450 * buffers on its own). However, direct reclaim 2451 * only waits for kswapd when under allocation 2452 * congestion. So as a result __GFP_RECLAIM is 2453 * unreliable and fails to actually reclaim our 2454 * dirty pages -- unless you try over and over 2455 * again with !__GFP_NORETRY. However, we still 2456 * want to fail this allocation rather than 2457 * trigger the out-of-memory killer and for 2458 * this we want __GFP_RETRY_MAYFAIL. 2459 */ 2460 gfp |= __GFP_RETRY_MAYFAIL; 2461 } 2462 } while (1); 2463 2464 if (!i || 2465 sg->length >= max_segment || 2466 page_to_pfn(page) != last_pfn + 1) { 2467 if (i) { 2468 sg_page_sizes |= sg->length; 2469 sg = sg_next(sg); 2470 } 2471 st->nents++; 2472 sg_set_page(sg, page, PAGE_SIZE, 0); 2473 } else { 2474 sg->length += PAGE_SIZE; 2475 } 2476 last_pfn = page_to_pfn(page); 2477 2478 /* Check that the i965g/gm workaround works. */ 2479 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2480 } 2481 if (sg) { /* loop terminated early; short sg table */ 2482 sg_page_sizes |= sg->length; 2483 sg_mark_end(sg); 2484 } 2485 2486 /* Trim unused sg entries to avoid wasting memory. */ 2487 i915_sg_trim(st); 2488 2489 ret = i915_gem_gtt_prepare_pages(obj, st); 2490 if (ret) { 2491 /* DMA remapping failed? One possible cause is that 2492 * it could not reserve enough large entries, asking 2493 * for PAGE_SIZE chunks instead may be helpful. 2494 */ 2495 if (max_segment > PAGE_SIZE) { 2496 for_each_sgt_page(page, sgt_iter, st) 2497 put_page(page); 2498 sg_free_table(st); 2499 2500 max_segment = PAGE_SIZE; 2501 goto rebuild_st; 2502 } else { 2503 dev_warn(&dev_priv->drm.pdev->dev, 2504 "Failed to DMA remap %lu pages\n", 2505 page_count); 2506 goto err_pages; 2507 } 2508 } 2509 2510 if (i915_gem_object_needs_bit17_swizzle(obj)) 2511 i915_gem_object_do_bit_17_swizzle(obj, st); 2512 2513 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2514 2515 return 0; 2516 2517 err_sg: 2518 sg_mark_end(sg); 2519 err_pages: 2520 for_each_sgt_page(page, sgt_iter, st) 2521 put_page(page); 2522 sg_free_table(st); 2523 kfree(st); 2524 2525 /* shmemfs first checks if there is enough memory to allocate the page 2526 * and reports ENOSPC should there be insufficient, along with the usual 2527 * ENOMEM for a genuine allocation failure. 2528 * 2529 * We use ENOSPC in our driver to mean that we have run out of aperture 2530 * space and so want to translate the error from shmemfs back to our 2531 * usual understanding of ENOMEM. 2532 */ 2533 if (ret == -ENOSPC) 2534 ret = -ENOMEM; 2535 2536 return ret; 2537 } 2538 2539 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2540 struct sg_table *pages, 2541 unsigned int sg_page_sizes) 2542 { 2543 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2544 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2545 int i; 2546 2547 lockdep_assert_held(&obj->mm.lock); 2548 2549 obj->mm.get_page.sg_pos = pages->sgl; 2550 obj->mm.get_page.sg_idx = 0; 2551 2552 obj->mm.pages = pages; 2553 2554 if (i915_gem_object_is_tiled(obj) && 2555 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2556 GEM_BUG_ON(obj->mm.quirked); 2557 __i915_gem_object_pin_pages(obj); 2558 obj->mm.quirked = true; 2559 } 2560 2561 GEM_BUG_ON(!sg_page_sizes); 2562 obj->mm.page_sizes.phys = sg_page_sizes; 2563 2564 /* 2565 * Calculate the supported page-sizes which fit into the given 2566 * sg_page_sizes. This will give us the page-sizes which we may be able 2567 * to use opportunistically when later inserting into the GTT. For 2568 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2569 * 64K or 4K pages, although in practice this will depend on a number of 2570 * other factors. 2571 */ 2572 obj->mm.page_sizes.sg = 0; 2573 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2574 if (obj->mm.page_sizes.phys & ~0u << i) 2575 obj->mm.page_sizes.sg |= BIT(i); 2576 } 2577 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2578 2579 spin_lock(&i915->mm.obj_lock); 2580 list_add(&obj->mm.link, &i915->mm.unbound_list); 2581 spin_unlock(&i915->mm.obj_lock); 2582 } 2583 2584 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2585 { 2586 int err; 2587 2588 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2589 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2590 return -EFAULT; 2591 } 2592 2593 err = obj->ops->get_pages(obj); 2594 GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); 2595 2596 return err; 2597 } 2598 2599 /* Ensure that the associated pages are gathered from the backing storage 2600 * and pinned into our object. i915_gem_object_pin_pages() may be called 2601 * multiple times before they are released by a single call to 2602 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2603 * either as a result of memory pressure (reaping pages under the shrinker) 2604 * or as the object is itself released. 2605 */ 2606 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2607 { 2608 int err; 2609 2610 err = mutex_lock_interruptible(&obj->mm.lock); 2611 if (err) 2612 return err; 2613 2614 if (unlikely(!i915_gem_object_has_pages(obj))) { 2615 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2616 2617 err = ____i915_gem_object_get_pages(obj); 2618 if (err) 2619 goto unlock; 2620 2621 smp_mb__before_atomic(); 2622 } 2623 atomic_inc(&obj->mm.pages_pin_count); 2624 2625 unlock: 2626 mutex_unlock(&obj->mm.lock); 2627 return err; 2628 } 2629 2630 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2631 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2632 enum i915_map_type type) 2633 { 2634 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2635 struct sg_table *sgt = obj->mm.pages; 2636 struct sgt_iter sgt_iter; 2637 struct page *page; 2638 struct page *stack_pages[32]; 2639 struct page **pages = stack_pages; 2640 unsigned long i = 0; 2641 pgprot_t pgprot; 2642 void *addr; 2643 2644 /* A single page can always be kmapped */ 2645 if (n_pages == 1 && type == I915_MAP_WB) 2646 return kmap(sg_page(sgt->sgl)); 2647 2648 if (n_pages > ARRAY_SIZE(stack_pages)) { 2649 /* Too big for stack -- allocate temporary array instead */ 2650 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2651 if (!pages) 2652 return NULL; 2653 } 2654 2655 for_each_sgt_page(page, sgt_iter, sgt) 2656 pages[i++] = page; 2657 2658 /* Check that we have the expected number of pages */ 2659 GEM_BUG_ON(i != n_pages); 2660 2661 switch (type) { 2662 default: 2663 MISSING_CASE(type); 2664 /* fallthrough to use PAGE_KERNEL anyway */ 2665 case I915_MAP_WB: 2666 pgprot = PAGE_KERNEL; 2667 break; 2668 case I915_MAP_WC: 2669 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2670 break; 2671 } 2672 addr = vmap(pages, n_pages, 0, pgprot); 2673 2674 if (pages != stack_pages) 2675 kvfree(pages); 2676 2677 return addr; 2678 } 2679 2680 /* get, pin, and map the pages of the object into kernel space */ 2681 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2682 enum i915_map_type type) 2683 { 2684 enum i915_map_type has_type; 2685 bool pinned; 2686 void *ptr; 2687 int ret; 2688 2689 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2690 return ERR_PTR(-ENXIO); 2691 2692 ret = mutex_lock_interruptible(&obj->mm.lock); 2693 if (ret) 2694 return ERR_PTR(ret); 2695 2696 pinned = !(type & I915_MAP_OVERRIDE); 2697 type &= ~I915_MAP_OVERRIDE; 2698 2699 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2700 if (unlikely(!i915_gem_object_has_pages(obj))) { 2701 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2702 2703 ret = ____i915_gem_object_get_pages(obj); 2704 if (ret) 2705 goto err_unlock; 2706 2707 smp_mb__before_atomic(); 2708 } 2709 atomic_inc(&obj->mm.pages_pin_count); 2710 pinned = false; 2711 } 2712 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2713 2714 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2715 if (ptr && has_type != type) { 2716 if (pinned) { 2717 ret = -EBUSY; 2718 goto err_unpin; 2719 } 2720 2721 if (is_vmalloc_addr(ptr)) 2722 vunmap(ptr); 2723 else 2724 kunmap(kmap_to_page(ptr)); 2725 2726 ptr = obj->mm.mapping = NULL; 2727 } 2728 2729 if (!ptr) { 2730 ptr = i915_gem_object_map(obj, type); 2731 if (!ptr) { 2732 ret = -ENOMEM; 2733 goto err_unpin; 2734 } 2735 2736 obj->mm.mapping = page_pack_bits(ptr, type); 2737 } 2738 2739 out_unlock: 2740 mutex_unlock(&obj->mm.lock); 2741 return ptr; 2742 2743 err_unpin: 2744 atomic_dec(&obj->mm.pages_pin_count); 2745 err_unlock: 2746 ptr = ERR_PTR(ret); 2747 goto out_unlock; 2748 } 2749 2750 static int 2751 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2752 const struct drm_i915_gem_pwrite *arg) 2753 { 2754 struct address_space *mapping = obj->base.filp->f_mapping; 2755 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2756 u64 remain, offset; 2757 unsigned int pg; 2758 2759 /* Before we instantiate/pin the backing store for our use, we 2760 * can prepopulate the shmemfs filp efficiently using a write into 2761 * the pagecache. We avoid the penalty of instantiating all the 2762 * pages, important if the user is just writing to a few and never 2763 * uses the object on the GPU, and using a direct write into shmemfs 2764 * allows it to avoid the cost of retrieving a page (either swapin 2765 * or clearing-before-use) before it is overwritten. 2766 */ 2767 if (i915_gem_object_has_pages(obj)) 2768 return -ENODEV; 2769 2770 if (obj->mm.madv != I915_MADV_WILLNEED) 2771 return -EFAULT; 2772 2773 /* Before the pages are instantiated the object is treated as being 2774 * in the CPU domain. The pages will be clflushed as required before 2775 * use, and we can freely write into the pages directly. If userspace 2776 * races pwrite with any other operation; corruption will ensue - 2777 * that is userspace's prerogative! 2778 */ 2779 2780 remain = arg->size; 2781 offset = arg->offset; 2782 pg = offset_in_page(offset); 2783 2784 do { 2785 unsigned int len, unwritten; 2786 struct page *page; 2787 void *data, *vaddr; 2788 int err; 2789 2790 len = PAGE_SIZE - pg; 2791 if (len > remain) 2792 len = remain; 2793 2794 err = pagecache_write_begin(obj->base.filp, mapping, 2795 offset, len, 0, 2796 &page, &data); 2797 if (err < 0) 2798 return err; 2799 2800 vaddr = kmap(page); 2801 unwritten = copy_from_user(vaddr + pg, user_data, len); 2802 kunmap(page); 2803 2804 err = pagecache_write_end(obj->base.filp, mapping, 2805 offset, len, len - unwritten, 2806 page, data); 2807 if (err < 0) 2808 return err; 2809 2810 if (unwritten) 2811 return -EFAULT; 2812 2813 remain -= len; 2814 user_data += len; 2815 offset += len; 2816 pg = 0; 2817 } while (remain); 2818 2819 return 0; 2820 } 2821 2822 static bool ban_context(const struct i915_gem_context *ctx, 2823 unsigned int score) 2824 { 2825 return (i915_gem_context_is_bannable(ctx) && 2826 score >= CONTEXT_SCORE_BAN_THRESHOLD); 2827 } 2828 2829 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2830 { 2831 unsigned int score; 2832 bool banned; 2833 2834 atomic_inc(&ctx->guilty_count); 2835 2836 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 2837 banned = ban_context(ctx, score); 2838 DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", 2839 ctx->name, score, yesno(banned)); 2840 if (!banned) 2841 return; 2842 2843 i915_gem_context_set_banned(ctx); 2844 if (!IS_ERR_OR_NULL(ctx->file_priv)) { 2845 atomic_inc(&ctx->file_priv->context_bans); 2846 DRM_DEBUG_DRIVER("client %s has had %d context banned\n", 2847 ctx->name, atomic_read(&ctx->file_priv->context_bans)); 2848 } 2849 } 2850 2851 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2852 { 2853 atomic_inc(&ctx->active_count); 2854 } 2855 2856 struct drm_i915_gem_request * 2857 i915_gem_find_active_request(struct intel_engine_cs *engine) 2858 { 2859 struct drm_i915_gem_request *request, *active = NULL; 2860 unsigned long flags; 2861 2862 /* We are called by the error capture and reset at a random 2863 * point in time. In particular, note that neither is crucially 2864 * ordered with an interrupt. After a hang, the GPU is dead and we 2865 * assume that no more writes can happen (we waited long enough for 2866 * all writes that were in transaction to be flushed) - adding an 2867 * extra delay for a recent interrupt is pointless. Hence, we do 2868 * not need an engine->irq_seqno_barrier() before the seqno reads. 2869 */ 2870 spin_lock_irqsave(&engine->timeline->lock, flags); 2871 list_for_each_entry(request, &engine->timeline->requests, link) { 2872 if (__i915_gem_request_completed(request, 2873 request->global_seqno)) 2874 continue; 2875 2876 GEM_BUG_ON(request->engine != engine); 2877 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 2878 &request->fence.flags)); 2879 2880 active = request; 2881 break; 2882 } 2883 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2884 2885 return active; 2886 } 2887 2888 static bool engine_stalled(struct intel_engine_cs *engine) 2889 { 2890 if (!engine->hangcheck.stalled) 2891 return false; 2892 2893 /* Check for possible seqno movement after hang declaration */ 2894 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { 2895 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); 2896 return false; 2897 } 2898 2899 return true; 2900 } 2901 2902 /* 2903 * Ensure irq handler finishes, and not run again. 2904 * Also return the active request so that we only search for it once. 2905 */ 2906 struct drm_i915_gem_request * 2907 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 2908 { 2909 struct drm_i915_gem_request *request = NULL; 2910 2911 /* 2912 * During the reset sequence, we must prevent the engine from 2913 * entering RC6. As the context state is undefined until we restart 2914 * the engine, if it does enter RC6 during the reset, the state 2915 * written to the powercontext is undefined and so we may lose 2916 * GPU state upon resume, i.e. fail to restart after a reset. 2917 */ 2918 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 2919 2920 /* 2921 * Prevent the signaler thread from updating the request 2922 * state (by calling dma_fence_signal) as we are processing 2923 * the reset. The write from the GPU of the seqno is 2924 * asynchronous and the signaler thread may see a different 2925 * value to us and declare the request complete, even though 2926 * the reset routine have picked that request as the active 2927 * (incomplete) request. This conflict is not handled 2928 * gracefully! 2929 */ 2930 kthread_park(engine->breadcrumbs.signaler); 2931 2932 /* 2933 * Prevent request submission to the hardware until we have 2934 * completed the reset in i915_gem_reset_finish(). If a request 2935 * is completed by one engine, it may then queue a request 2936 * to a second via its execlists->tasklet *just* as we are 2937 * calling engine->init_hw() and also writing the ELSP. 2938 * Turning off the execlists->tasklet until the reset is over 2939 * prevents the race. 2940 */ 2941 tasklet_kill(&engine->execlists.tasklet); 2942 tasklet_disable(&engine->execlists.tasklet); 2943 2944 /* 2945 * We're using worker to queue preemption requests from the tasklet in 2946 * GuC submission mode. 2947 * Even though tasklet was disabled, we may still have a worker queued. 2948 * Let's make sure that all workers scheduled before disabling the 2949 * tasklet are completed before continuing with the reset. 2950 */ 2951 if (engine->i915->guc.preempt_wq) 2952 flush_workqueue(engine->i915->guc.preempt_wq); 2953 2954 if (engine->irq_seqno_barrier) 2955 engine->irq_seqno_barrier(engine); 2956 2957 request = i915_gem_find_active_request(engine); 2958 if (request && request->fence.error == -EIO) 2959 request = ERR_PTR(-EIO); /* Previous reset failed! */ 2960 2961 return request; 2962 } 2963 2964 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 2965 { 2966 struct intel_engine_cs *engine; 2967 struct drm_i915_gem_request *request; 2968 enum intel_engine_id id; 2969 int err = 0; 2970 2971 for_each_engine(engine, dev_priv, id) { 2972 request = i915_gem_reset_prepare_engine(engine); 2973 if (IS_ERR(request)) { 2974 err = PTR_ERR(request); 2975 continue; 2976 } 2977 2978 engine->hangcheck.active_request = request; 2979 } 2980 2981 i915_gem_revoke_fences(dev_priv); 2982 2983 return err; 2984 } 2985 2986 static void skip_request(struct drm_i915_gem_request *request) 2987 { 2988 void *vaddr = request->ring->vaddr; 2989 u32 head; 2990 2991 /* As this request likely depends on state from the lost 2992 * context, clear out all the user operations leaving the 2993 * breadcrumb at the end (so we get the fence notifications). 2994 */ 2995 head = request->head; 2996 if (request->postfix < head) { 2997 memset(vaddr + head, 0, request->ring->size - head); 2998 head = 0; 2999 } 3000 memset(vaddr + head, 0, request->postfix - head); 3001 3002 dma_fence_set_error(&request->fence, -EIO); 3003 } 3004 3005 static void engine_skip_context(struct drm_i915_gem_request *request) 3006 { 3007 struct intel_engine_cs *engine = request->engine; 3008 struct i915_gem_context *hung_ctx = request->ctx; 3009 struct intel_timeline *timeline; 3010 unsigned long flags; 3011 3012 timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); 3013 3014 spin_lock_irqsave(&engine->timeline->lock, flags); 3015 spin_lock(&timeline->lock); 3016 3017 list_for_each_entry_continue(request, &engine->timeline->requests, link) 3018 if (request->ctx == hung_ctx) 3019 skip_request(request); 3020 3021 list_for_each_entry(request, &timeline->requests, link) 3022 skip_request(request); 3023 3024 spin_unlock(&timeline->lock); 3025 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3026 } 3027 3028 /* Returns the request if it was guilty of the hang */ 3029 static struct drm_i915_gem_request * 3030 i915_gem_reset_request(struct intel_engine_cs *engine, 3031 struct drm_i915_gem_request *request) 3032 { 3033 /* The guilty request will get skipped on a hung engine. 3034 * 3035 * Users of client default contexts do not rely on logical 3036 * state preserved between batches so it is safe to execute 3037 * queued requests following the hang. Non default contexts 3038 * rely on preserved state, so skipping a batch loses the 3039 * evolution of the state and it needs to be considered corrupted. 3040 * Executing more queued batches on top of corrupted state is 3041 * risky. But we take the risk by trying to advance through 3042 * the queued requests in order to make the client behaviour 3043 * more predictable around resets, by not throwing away random 3044 * amount of batches it has prepared for execution. Sophisticated 3045 * clients can use gem_reset_stats_ioctl and dma fence status 3046 * (exported via sync_file info ioctl on explicit fences) to observe 3047 * when it loses the context state and should rebuild accordingly. 3048 * 3049 * The context ban, and ultimately the client ban, mechanism are safety 3050 * valves if client submission ends up resulting in nothing more than 3051 * subsequent hangs. 3052 */ 3053 3054 if (engine_stalled(engine)) { 3055 i915_gem_context_mark_guilty(request->ctx); 3056 skip_request(request); 3057 3058 /* If this context is now banned, skip all pending requests. */ 3059 if (i915_gem_context_is_banned(request->ctx)) 3060 engine_skip_context(request); 3061 } else { 3062 /* 3063 * Since this is not the hung engine, it may have advanced 3064 * since the hang declaration. Double check by refinding 3065 * the active request at the time of the reset. 3066 */ 3067 request = i915_gem_find_active_request(engine); 3068 if (request) { 3069 i915_gem_context_mark_innocent(request->ctx); 3070 dma_fence_set_error(&request->fence, -EAGAIN); 3071 3072 /* Rewind the engine to replay the incomplete rq */ 3073 spin_lock_irq(&engine->timeline->lock); 3074 request = list_prev_entry(request, link); 3075 if (&request->link == &engine->timeline->requests) 3076 request = NULL; 3077 spin_unlock_irq(&engine->timeline->lock); 3078 } 3079 } 3080 3081 return request; 3082 } 3083 3084 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3085 struct drm_i915_gem_request *request) 3086 { 3087 engine->irq_posted = 0; 3088 3089 if (request) 3090 request = i915_gem_reset_request(engine, request); 3091 3092 if (request) { 3093 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3094 engine->name, request->global_seqno); 3095 } 3096 3097 /* Setup the CS to resume from the breadcrumb of the hung request */ 3098 engine->reset_hw(engine, request); 3099 } 3100 3101 void i915_gem_reset(struct drm_i915_private *dev_priv) 3102 { 3103 struct intel_engine_cs *engine; 3104 enum intel_engine_id id; 3105 3106 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3107 3108 i915_gem_retire_requests(dev_priv); 3109 3110 for_each_engine(engine, dev_priv, id) { 3111 struct i915_gem_context *ctx; 3112 3113 i915_gem_reset_engine(engine, engine->hangcheck.active_request); 3114 ctx = fetch_and_zero(&engine->last_retired_context); 3115 if (ctx) 3116 engine->context_unpin(engine, ctx); 3117 } 3118 3119 i915_gem_restore_fences(dev_priv); 3120 3121 if (dev_priv->gt.awake) { 3122 intel_sanitize_gt_powersave(dev_priv); 3123 intel_enable_gt_powersave(dev_priv); 3124 if (INTEL_GEN(dev_priv) >= 6) 3125 gen6_rps_busy(dev_priv); 3126 } 3127 } 3128 3129 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3130 { 3131 tasklet_enable(&engine->execlists.tasklet); 3132 kthread_unpark(engine->breadcrumbs.signaler); 3133 3134 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3135 } 3136 3137 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3138 { 3139 struct intel_engine_cs *engine; 3140 enum intel_engine_id id; 3141 3142 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3143 3144 for_each_engine(engine, dev_priv, id) { 3145 engine->hangcheck.active_request = NULL; 3146 i915_gem_reset_finish_engine(engine); 3147 } 3148 } 3149 3150 static void nop_submit_request(struct drm_i915_gem_request *request) 3151 { 3152 dma_fence_set_error(&request->fence, -EIO); 3153 3154 i915_gem_request_submit(request); 3155 } 3156 3157 static void nop_complete_submit_request(struct drm_i915_gem_request *request) 3158 { 3159 unsigned long flags; 3160 3161 dma_fence_set_error(&request->fence, -EIO); 3162 3163 spin_lock_irqsave(&request->engine->timeline->lock, flags); 3164 __i915_gem_request_submit(request); 3165 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3166 spin_unlock_irqrestore(&request->engine->timeline->lock, flags); 3167 } 3168 3169 void i915_gem_set_wedged(struct drm_i915_private *i915) 3170 { 3171 struct intel_engine_cs *engine; 3172 enum intel_engine_id id; 3173 3174 /* 3175 * First, stop submission to hw, but do not yet complete requests by 3176 * rolling the global seqno forward (since this would complete requests 3177 * for which we haven't set the fence error to EIO yet). 3178 */ 3179 for_each_engine(engine, i915, id) 3180 engine->submit_request = nop_submit_request; 3181 3182 /* 3183 * Make sure no one is running the old callback before we proceed with 3184 * cancelling requests and resetting the completion tracking. Otherwise 3185 * we might submit a request to the hardware which never completes. 3186 */ 3187 synchronize_rcu(); 3188 3189 for_each_engine(engine, i915, id) { 3190 /* Mark all executing requests as skipped */ 3191 engine->cancel_requests(engine); 3192 3193 /* 3194 * Only once we've force-cancelled all in-flight requests can we 3195 * start to complete all requests. 3196 */ 3197 engine->submit_request = nop_complete_submit_request; 3198 } 3199 3200 /* 3201 * Make sure no request can slip through without getting completed by 3202 * either this call here to intel_engine_init_global_seqno, or the one 3203 * in nop_complete_submit_request. 3204 */ 3205 synchronize_rcu(); 3206 3207 for_each_engine(engine, i915, id) { 3208 unsigned long flags; 3209 3210 /* Mark all pending requests as complete so that any concurrent 3211 * (lockless) lookup doesn't try and wait upon the request as we 3212 * reset it. 3213 */ 3214 spin_lock_irqsave(&engine->timeline->lock, flags); 3215 intel_engine_init_global_seqno(engine, 3216 intel_engine_last_submit(engine)); 3217 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3218 } 3219 3220 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3221 wake_up_all(&i915->gpu_error.reset_queue); 3222 } 3223 3224 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3225 { 3226 struct i915_gem_timeline *tl; 3227 int i; 3228 3229 lockdep_assert_held(&i915->drm.struct_mutex); 3230 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3231 return true; 3232 3233 /* Before unwedging, make sure that all pending operations 3234 * are flushed and errored out - we may have requests waiting upon 3235 * third party fences. We marked all inflight requests as EIO, and 3236 * every execbuf since returned EIO, for consistency we want all 3237 * the currently pending requests to also be marked as EIO, which 3238 * is done inside our nop_submit_request - and so we must wait. 3239 * 3240 * No more can be submitted until we reset the wedged bit. 3241 */ 3242 list_for_each_entry(tl, &i915->gt.timelines, link) { 3243 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3244 struct drm_i915_gem_request *rq; 3245 3246 rq = i915_gem_active_peek(&tl->engine[i].last_request, 3247 &i915->drm.struct_mutex); 3248 if (!rq) 3249 continue; 3250 3251 /* We can't use our normal waiter as we want to 3252 * avoid recursively trying to handle the current 3253 * reset. The basic dma_fence_default_wait() installs 3254 * a callback for dma_fence_signal(), which is 3255 * triggered by our nop handler (indirectly, the 3256 * callback enables the signaler thread which is 3257 * woken by the nop_submit_request() advancing the seqno 3258 * and when the seqno passes the fence, the signaler 3259 * then signals the fence waking us up). 3260 */ 3261 if (dma_fence_default_wait(&rq->fence, true, 3262 MAX_SCHEDULE_TIMEOUT) < 0) 3263 return false; 3264 } 3265 } 3266 3267 /* Undo nop_submit_request. We prevent all new i915 requests from 3268 * being queued (by disallowing execbuf whilst wedged) so having 3269 * waited for all active requests above, we know the system is idle 3270 * and do not have to worry about a thread being inside 3271 * engine->submit_request() as we swap over. So unlike installing 3272 * the nop_submit_request on reset, we can do this from normal 3273 * context and do not require stop_machine(). 3274 */ 3275 intel_engines_reset_default_submission(i915); 3276 i915_gem_contexts_lost(i915); 3277 3278 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3279 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3280 3281 return true; 3282 } 3283 3284 static void 3285 i915_gem_retire_work_handler(struct work_struct *work) 3286 { 3287 struct drm_i915_private *dev_priv = 3288 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3289 struct drm_device *dev = &dev_priv->drm; 3290 3291 /* Come back later if the device is busy... */ 3292 if (mutex_trylock(&dev->struct_mutex)) { 3293 i915_gem_retire_requests(dev_priv); 3294 mutex_unlock(&dev->struct_mutex); 3295 } 3296 3297 /* Keep the retire handler running until we are finally idle. 3298 * We do not need to do this test under locking as in the worst-case 3299 * we queue the retire worker once too often. 3300 */ 3301 if (READ_ONCE(dev_priv->gt.awake)) { 3302 i915_queue_hangcheck(dev_priv); 3303 queue_delayed_work(dev_priv->wq, 3304 &dev_priv->gt.retire_work, 3305 round_jiffies_up_relative(HZ)); 3306 } 3307 } 3308 3309 static inline bool 3310 new_requests_since_last_retire(const struct drm_i915_private *i915) 3311 { 3312 return (READ_ONCE(i915->gt.active_requests) || 3313 work_pending(&i915->gt.idle_work.work)); 3314 } 3315 3316 static void 3317 i915_gem_idle_work_handler(struct work_struct *work) 3318 { 3319 struct drm_i915_private *dev_priv = 3320 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3321 bool rearm_hangcheck; 3322 ktime_t end; 3323 3324 if (!READ_ONCE(dev_priv->gt.awake)) 3325 return; 3326 3327 /* 3328 * Wait for last execlists context complete, but bail out in case a 3329 * new request is submitted. 3330 */ 3331 end = ktime_add_ms(ktime_get(), 200); 3332 do { 3333 if (new_requests_since_last_retire(dev_priv)) 3334 return; 3335 3336 if (intel_engines_are_idle(dev_priv)) 3337 break; 3338 3339 usleep_range(100, 500); 3340 } while (ktime_before(ktime_get(), end)); 3341 3342 rearm_hangcheck = 3343 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3344 3345 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3346 /* Currently busy, come back later */ 3347 mod_delayed_work(dev_priv->wq, 3348 &dev_priv->gt.idle_work, 3349 msecs_to_jiffies(50)); 3350 goto out_rearm; 3351 } 3352 3353 /* 3354 * New request retired after this work handler started, extend active 3355 * period until next instance of the work. 3356 */ 3357 if (new_requests_since_last_retire(dev_priv)) 3358 goto out_unlock; 3359 3360 /* 3361 * Be paranoid and flush a concurrent interrupt to make sure 3362 * we don't reactivate any irq tasklets after parking. 3363 * 3364 * FIXME: Note that even though we have waited for execlists to be idle, 3365 * there may still be an in-flight interrupt even though the CSB 3366 * is now empty. synchronize_irq() makes sure that a residual interrupt 3367 * is completed before we continue, but it doesn't prevent the HW from 3368 * raising a spurious interrupt later. To complete the shield we should 3369 * coordinate disabling the CS irq with flushing the interrupts. 3370 */ 3371 synchronize_irq(dev_priv->drm.irq); 3372 3373 intel_engines_park(dev_priv); 3374 i915_gem_timelines_mark_idle(dev_priv); 3375 3376 GEM_BUG_ON(!dev_priv->gt.awake); 3377 dev_priv->gt.awake = false; 3378 rearm_hangcheck = false; 3379 3380 if (INTEL_GEN(dev_priv) >= 6) 3381 gen6_rps_idle(dev_priv); 3382 intel_runtime_pm_put(dev_priv); 3383 out_unlock: 3384 mutex_unlock(&dev_priv->drm.struct_mutex); 3385 3386 out_rearm: 3387 if (rearm_hangcheck) { 3388 GEM_BUG_ON(!dev_priv->gt.awake); 3389 i915_queue_hangcheck(dev_priv); 3390 } 3391 } 3392 3393 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3394 { 3395 struct drm_i915_private *i915 = to_i915(gem->dev); 3396 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3397 struct drm_i915_file_private *fpriv = file->driver_priv; 3398 struct i915_lut_handle *lut, *ln; 3399 3400 mutex_lock(&i915->drm.struct_mutex); 3401 3402 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3403 struct i915_gem_context *ctx = lut->ctx; 3404 struct i915_vma *vma; 3405 3406 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3407 if (ctx->file_priv != fpriv) 3408 continue; 3409 3410 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3411 GEM_BUG_ON(vma->obj != obj); 3412 3413 /* We allow the process to have multiple handles to the same 3414 * vma, in the same fd namespace, by virtue of flink/open. 3415 */ 3416 GEM_BUG_ON(!vma->open_count); 3417 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3418 i915_vma_close(vma); 3419 3420 list_del(&lut->obj_link); 3421 list_del(&lut->ctx_link); 3422 3423 kmem_cache_free(i915->luts, lut); 3424 __i915_gem_object_release_unless_active(obj); 3425 } 3426 3427 mutex_unlock(&i915->drm.struct_mutex); 3428 } 3429 3430 static unsigned long to_wait_timeout(s64 timeout_ns) 3431 { 3432 if (timeout_ns < 0) 3433 return MAX_SCHEDULE_TIMEOUT; 3434 3435 if (timeout_ns == 0) 3436 return 0; 3437 3438 return nsecs_to_jiffies_timeout(timeout_ns); 3439 } 3440 3441 /** 3442 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3443 * @dev: drm device pointer 3444 * @data: ioctl data blob 3445 * @file: drm file pointer 3446 * 3447 * Returns 0 if successful, else an error is returned with the remaining time in 3448 * the timeout parameter. 3449 * -ETIME: object is still busy after timeout 3450 * -ERESTARTSYS: signal interrupted the wait 3451 * -ENONENT: object doesn't exist 3452 * Also possible, but rare: 3453 * -EAGAIN: incomplete, restart syscall 3454 * -ENOMEM: damn 3455 * -ENODEV: Internal IRQ fail 3456 * -E?: The add request failed 3457 * 3458 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3459 * non-zero timeout parameter the wait ioctl will wait for the given number of 3460 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3461 * without holding struct_mutex the object may become re-busied before this 3462 * function completes. A similar but shorter * race condition exists in the busy 3463 * ioctl 3464 */ 3465 int 3466 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3467 { 3468 struct drm_i915_gem_wait *args = data; 3469 struct drm_i915_gem_object *obj; 3470 ktime_t start; 3471 long ret; 3472 3473 if (args->flags != 0) 3474 return -EINVAL; 3475 3476 obj = i915_gem_object_lookup(file, args->bo_handle); 3477 if (!obj) 3478 return -ENOENT; 3479 3480 start = ktime_get(); 3481 3482 ret = i915_gem_object_wait(obj, 3483 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3484 to_wait_timeout(args->timeout_ns), 3485 to_rps_client(file)); 3486 3487 if (args->timeout_ns > 0) { 3488 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3489 if (args->timeout_ns < 0) 3490 args->timeout_ns = 0; 3491 3492 /* 3493 * Apparently ktime isn't accurate enough and occasionally has a 3494 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3495 * things up to make the test happy. We allow up to 1 jiffy. 3496 * 3497 * This is a regression from the timespec->ktime conversion. 3498 */ 3499 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3500 args->timeout_ns = 0; 3501 3502 /* Asked to wait beyond the jiffie/scheduler precision? */ 3503 if (ret == -ETIME && args->timeout_ns) 3504 ret = -EAGAIN; 3505 } 3506 3507 i915_gem_object_put(obj); 3508 return ret; 3509 } 3510 3511 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) 3512 { 3513 int ret, i; 3514 3515 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3516 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); 3517 if (ret) 3518 return ret; 3519 } 3520 3521 return 0; 3522 } 3523 3524 static int wait_for_engines(struct drm_i915_private *i915) 3525 { 3526 if (wait_for(intel_engines_are_idle(i915), 50)) { 3527 DRM_ERROR("Failed to idle engines, declaring wedged!\n"); 3528 i915_gem_set_wedged(i915); 3529 return -EIO; 3530 } 3531 3532 return 0; 3533 } 3534 3535 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3536 { 3537 int ret; 3538 3539 /* If the device is asleep, we have no requests outstanding */ 3540 if (!READ_ONCE(i915->gt.awake)) 3541 return 0; 3542 3543 if (flags & I915_WAIT_LOCKED) { 3544 struct i915_gem_timeline *tl; 3545 3546 lockdep_assert_held(&i915->drm.struct_mutex); 3547 3548 list_for_each_entry(tl, &i915->gt.timelines, link) { 3549 ret = wait_for_timeline(tl, flags); 3550 if (ret) 3551 return ret; 3552 } 3553 3554 i915_gem_retire_requests(i915); 3555 GEM_BUG_ON(i915->gt.active_requests); 3556 3557 ret = wait_for_engines(i915); 3558 } else { 3559 ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3560 } 3561 3562 return ret; 3563 } 3564 3565 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3566 { 3567 /* 3568 * We manually flush the CPU domain so that we can override and 3569 * force the flush for the display, and perform it asyncrhonously. 3570 */ 3571 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3572 if (obj->cache_dirty) 3573 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3574 obj->base.write_domain = 0; 3575 } 3576 3577 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3578 { 3579 if (!READ_ONCE(obj->pin_global)) 3580 return; 3581 3582 mutex_lock(&obj->base.dev->struct_mutex); 3583 __i915_gem_object_flush_for_display(obj); 3584 mutex_unlock(&obj->base.dev->struct_mutex); 3585 } 3586 3587 /** 3588 * Moves a single object to the WC read, and possibly write domain. 3589 * @obj: object to act on 3590 * @write: ask for write access or read only 3591 * 3592 * This function returns when the move is complete, including waiting on 3593 * flushes to occur. 3594 */ 3595 int 3596 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3597 { 3598 int ret; 3599 3600 lockdep_assert_held(&obj->base.dev->struct_mutex); 3601 3602 ret = i915_gem_object_wait(obj, 3603 I915_WAIT_INTERRUPTIBLE | 3604 I915_WAIT_LOCKED | 3605 (write ? I915_WAIT_ALL : 0), 3606 MAX_SCHEDULE_TIMEOUT, 3607 NULL); 3608 if (ret) 3609 return ret; 3610 3611 if (obj->base.write_domain == I915_GEM_DOMAIN_WC) 3612 return 0; 3613 3614 /* Flush and acquire obj->pages so that we are coherent through 3615 * direct access in memory with previous cached writes through 3616 * shmemfs and that our cache domain tracking remains valid. 3617 * For example, if the obj->filp was moved to swap without us 3618 * being notified and releasing the pages, we would mistakenly 3619 * continue to assume that the obj remained out of the CPU cached 3620 * domain. 3621 */ 3622 ret = i915_gem_object_pin_pages(obj); 3623 if (ret) 3624 return ret; 3625 3626 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3627 3628 /* Serialise direct access to this object with the barriers for 3629 * coherent writes from the GPU, by effectively invalidating the 3630 * WC domain upon first access. 3631 */ 3632 if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) 3633 mb(); 3634 3635 /* It should now be out of any other write domains, and we can update 3636 * the domain values for our changes. 3637 */ 3638 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3639 obj->base.read_domains |= I915_GEM_DOMAIN_WC; 3640 if (write) { 3641 obj->base.read_domains = I915_GEM_DOMAIN_WC; 3642 obj->base.write_domain = I915_GEM_DOMAIN_WC; 3643 obj->mm.dirty = true; 3644 } 3645 3646 i915_gem_object_unpin_pages(obj); 3647 return 0; 3648 } 3649 3650 /** 3651 * Moves a single object to the GTT read, and possibly write domain. 3652 * @obj: object to act on 3653 * @write: ask for write access or read only 3654 * 3655 * This function returns when the move is complete, including waiting on 3656 * flushes to occur. 3657 */ 3658 int 3659 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3660 { 3661 int ret; 3662 3663 lockdep_assert_held(&obj->base.dev->struct_mutex); 3664 3665 ret = i915_gem_object_wait(obj, 3666 I915_WAIT_INTERRUPTIBLE | 3667 I915_WAIT_LOCKED | 3668 (write ? I915_WAIT_ALL : 0), 3669 MAX_SCHEDULE_TIMEOUT, 3670 NULL); 3671 if (ret) 3672 return ret; 3673 3674 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3675 return 0; 3676 3677 /* Flush and acquire obj->pages so that we are coherent through 3678 * direct access in memory with previous cached writes through 3679 * shmemfs and that our cache domain tracking remains valid. 3680 * For example, if the obj->filp was moved to swap without us 3681 * being notified and releasing the pages, we would mistakenly 3682 * continue to assume that the obj remained out of the CPU cached 3683 * domain. 3684 */ 3685 ret = i915_gem_object_pin_pages(obj); 3686 if (ret) 3687 return ret; 3688 3689 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3690 3691 /* Serialise direct access to this object with the barriers for 3692 * coherent writes from the GPU, by effectively invalidating the 3693 * GTT domain upon first access. 3694 */ 3695 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3696 mb(); 3697 3698 /* It should now be out of any other write domains, and we can update 3699 * the domain values for our changes. 3700 */ 3701 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3702 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3703 if (write) { 3704 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3705 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3706 obj->mm.dirty = true; 3707 } 3708 3709 i915_gem_object_unpin_pages(obj); 3710 return 0; 3711 } 3712 3713 /** 3714 * Changes the cache-level of an object across all VMA. 3715 * @obj: object to act on 3716 * @cache_level: new cache level to set for the object 3717 * 3718 * After this function returns, the object will be in the new cache-level 3719 * across all GTT and the contents of the backing storage will be coherent, 3720 * with respect to the new cache-level. In order to keep the backing storage 3721 * coherent for all users, we only allow a single cache level to be set 3722 * globally on the object and prevent it from being changed whilst the 3723 * hardware is reading from the object. That is if the object is currently 3724 * on the scanout it will be set to uncached (or equivalent display 3725 * cache coherency) and all non-MOCS GPU access will also be uncached so 3726 * that all direct access to the scanout remains coherent. 3727 */ 3728 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3729 enum i915_cache_level cache_level) 3730 { 3731 struct i915_vma *vma; 3732 int ret; 3733 3734 lockdep_assert_held(&obj->base.dev->struct_mutex); 3735 3736 if (obj->cache_level == cache_level) 3737 return 0; 3738 3739 /* Inspect the list of currently bound VMA and unbind any that would 3740 * be invalid given the new cache-level. This is principally to 3741 * catch the issue of the CS prefetch crossing page boundaries and 3742 * reading an invalid PTE on older architectures. 3743 */ 3744 restart: 3745 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3746 if (!drm_mm_node_allocated(&vma->node)) 3747 continue; 3748 3749 if (i915_vma_is_pinned(vma)) { 3750 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3751 return -EBUSY; 3752 } 3753 3754 if (i915_gem_valid_gtt_space(vma, cache_level)) 3755 continue; 3756 3757 ret = i915_vma_unbind(vma); 3758 if (ret) 3759 return ret; 3760 3761 /* As unbinding may affect other elements in the 3762 * obj->vma_list (due to side-effects from retiring 3763 * an active vma), play safe and restart the iterator. 3764 */ 3765 goto restart; 3766 } 3767 3768 /* We can reuse the existing drm_mm nodes but need to change the 3769 * cache-level on the PTE. We could simply unbind them all and 3770 * rebind with the correct cache-level on next use. However since 3771 * we already have a valid slot, dma mapping, pages etc, we may as 3772 * rewrite the PTE in the belief that doing so tramples upon less 3773 * state and so involves less work. 3774 */ 3775 if (obj->bind_count) { 3776 /* Before we change the PTE, the GPU must not be accessing it. 3777 * If we wait upon the object, we know that all the bound 3778 * VMA are no longer active. 3779 */ 3780 ret = i915_gem_object_wait(obj, 3781 I915_WAIT_INTERRUPTIBLE | 3782 I915_WAIT_LOCKED | 3783 I915_WAIT_ALL, 3784 MAX_SCHEDULE_TIMEOUT, 3785 NULL); 3786 if (ret) 3787 return ret; 3788 3789 if (!HAS_LLC(to_i915(obj->base.dev)) && 3790 cache_level != I915_CACHE_NONE) { 3791 /* Access to snoopable pages through the GTT is 3792 * incoherent and on some machines causes a hard 3793 * lockup. Relinquish the CPU mmaping to force 3794 * userspace to refault in the pages and we can 3795 * then double check if the GTT mapping is still 3796 * valid for that pointer access. 3797 */ 3798 i915_gem_release_mmap(obj); 3799 3800 /* As we no longer need a fence for GTT access, 3801 * we can relinquish it now (and so prevent having 3802 * to steal a fence from someone else on the next 3803 * fence request). Note GPU activity would have 3804 * dropped the fence as all snoopable access is 3805 * supposed to be linear. 3806 */ 3807 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3808 ret = i915_vma_put_fence(vma); 3809 if (ret) 3810 return ret; 3811 } 3812 } else { 3813 /* We either have incoherent backing store and 3814 * so no GTT access or the architecture is fully 3815 * coherent. In such cases, existing GTT mmaps 3816 * ignore the cache bit in the PTE and we can 3817 * rewrite it without confusing the GPU or having 3818 * to force userspace to fault back in its mmaps. 3819 */ 3820 } 3821 3822 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3823 if (!drm_mm_node_allocated(&vma->node)) 3824 continue; 3825 3826 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3827 if (ret) 3828 return ret; 3829 } 3830 } 3831 3832 list_for_each_entry(vma, &obj->vma_list, obj_link) 3833 vma->node.color = cache_level; 3834 i915_gem_object_set_cache_coherency(obj, cache_level); 3835 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3836 3837 return 0; 3838 } 3839 3840 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3841 struct drm_file *file) 3842 { 3843 struct drm_i915_gem_caching *args = data; 3844 struct drm_i915_gem_object *obj; 3845 int err = 0; 3846 3847 rcu_read_lock(); 3848 obj = i915_gem_object_lookup_rcu(file, args->handle); 3849 if (!obj) { 3850 err = -ENOENT; 3851 goto out; 3852 } 3853 3854 switch (obj->cache_level) { 3855 case I915_CACHE_LLC: 3856 case I915_CACHE_L3_LLC: 3857 args->caching = I915_CACHING_CACHED; 3858 break; 3859 3860 case I915_CACHE_WT: 3861 args->caching = I915_CACHING_DISPLAY; 3862 break; 3863 3864 default: 3865 args->caching = I915_CACHING_NONE; 3866 break; 3867 } 3868 out: 3869 rcu_read_unlock(); 3870 return err; 3871 } 3872 3873 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3874 struct drm_file *file) 3875 { 3876 struct drm_i915_private *i915 = to_i915(dev); 3877 struct drm_i915_gem_caching *args = data; 3878 struct drm_i915_gem_object *obj; 3879 enum i915_cache_level level; 3880 int ret = 0; 3881 3882 switch (args->caching) { 3883 case I915_CACHING_NONE: 3884 level = I915_CACHE_NONE; 3885 break; 3886 case I915_CACHING_CACHED: 3887 /* 3888 * Due to a HW issue on BXT A stepping, GPU stores via a 3889 * snooped mapping may leave stale data in a corresponding CPU 3890 * cacheline, whereas normally such cachelines would get 3891 * invalidated. 3892 */ 3893 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 3894 return -ENODEV; 3895 3896 level = I915_CACHE_LLC; 3897 break; 3898 case I915_CACHING_DISPLAY: 3899 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 3900 break; 3901 default: 3902 return -EINVAL; 3903 } 3904 3905 obj = i915_gem_object_lookup(file, args->handle); 3906 if (!obj) 3907 return -ENOENT; 3908 3909 /* 3910 * The caching mode of proxy object is handled by its generator, and 3911 * not allowed to be changed by userspace. 3912 */ 3913 if (i915_gem_object_is_proxy(obj)) { 3914 ret = -ENXIO; 3915 goto out; 3916 } 3917 3918 if (obj->cache_level == level) 3919 goto out; 3920 3921 ret = i915_gem_object_wait(obj, 3922 I915_WAIT_INTERRUPTIBLE, 3923 MAX_SCHEDULE_TIMEOUT, 3924 to_rps_client(file)); 3925 if (ret) 3926 goto out; 3927 3928 ret = i915_mutex_lock_interruptible(dev); 3929 if (ret) 3930 goto out; 3931 3932 ret = i915_gem_object_set_cache_level(obj, level); 3933 mutex_unlock(&dev->struct_mutex); 3934 3935 out: 3936 i915_gem_object_put(obj); 3937 return ret; 3938 } 3939 3940 /* 3941 * Prepare buffer for display plane (scanout, cursors, etc). 3942 * Can be called from an uninterruptible phase (modesetting) and allows 3943 * any flushes to be pipelined (for pageflips). 3944 */ 3945 struct i915_vma * 3946 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3947 u32 alignment, 3948 const struct i915_ggtt_view *view) 3949 { 3950 struct i915_vma *vma; 3951 int ret; 3952 3953 lockdep_assert_held(&obj->base.dev->struct_mutex); 3954 3955 /* Mark the global pin early so that we account for the 3956 * display coherency whilst setting up the cache domains. 3957 */ 3958 obj->pin_global++; 3959 3960 /* The display engine is not coherent with the LLC cache on gen6. As 3961 * a result, we make sure that the pinning that is about to occur is 3962 * done with uncached PTEs. This is lowest common denominator for all 3963 * chipsets. 3964 * 3965 * However for gen6+, we could do better by using the GFDT bit instead 3966 * of uncaching, which would allow us to flush all the LLC-cached data 3967 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3968 */ 3969 ret = i915_gem_object_set_cache_level(obj, 3970 HAS_WT(to_i915(obj->base.dev)) ? 3971 I915_CACHE_WT : I915_CACHE_NONE); 3972 if (ret) { 3973 vma = ERR_PTR(ret); 3974 goto err_unpin_global; 3975 } 3976 3977 /* As the user may map the buffer once pinned in the display plane 3978 * (e.g. libkms for the bootup splash), we have to ensure that we 3979 * always use map_and_fenceable for all scanout buffers. However, 3980 * it may simply be too big to fit into mappable, in which case 3981 * put it anyway and hope that userspace can cope (but always first 3982 * try to preserve the existing ABI). 3983 */ 3984 vma = ERR_PTR(-ENOSPC); 3985 if (!view || view->type == I915_GGTT_VIEW_NORMAL) 3986 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3987 PIN_MAPPABLE | PIN_NONBLOCK); 3988 if (IS_ERR(vma)) { 3989 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3990 unsigned int flags; 3991 3992 /* Valleyview is definitely limited to scanning out the first 3993 * 512MiB. Lets presume this behaviour was inherited from the 3994 * g4x display engine and that all earlier gen are similarly 3995 * limited. Testing suggests that it is a little more 3996 * complicated than this. For example, Cherryview appears quite 3997 * happy to scanout from anywhere within its global aperture. 3998 */ 3999 flags = 0; 4000 if (HAS_GMCH_DISPLAY(i915)) 4001 flags = PIN_MAPPABLE; 4002 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4003 } 4004 if (IS_ERR(vma)) 4005 goto err_unpin_global; 4006 4007 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4008 4009 /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ 4010 __i915_gem_object_flush_for_display(obj); 4011 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); 4012 4013 /* It should now be out of any other write domains, and we can update 4014 * the domain values for our changes. 4015 */ 4016 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4017 4018 return vma; 4019 4020 err_unpin_global: 4021 obj->pin_global--; 4022 return vma; 4023 } 4024 4025 void 4026 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4027 { 4028 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4029 4030 if (WARN_ON(vma->obj->pin_global == 0)) 4031 return; 4032 4033 if (--vma->obj->pin_global == 0) 4034 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4035 4036 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4037 i915_gem_object_bump_inactive_ggtt(vma->obj); 4038 4039 i915_vma_unpin(vma); 4040 } 4041 4042 /** 4043 * Moves a single object to the CPU read, and possibly write domain. 4044 * @obj: object to act on 4045 * @write: requesting write or read-only access 4046 * 4047 * This function returns when the move is complete, including waiting on 4048 * flushes to occur. 4049 */ 4050 int 4051 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4052 { 4053 int ret; 4054 4055 lockdep_assert_held(&obj->base.dev->struct_mutex); 4056 4057 ret = i915_gem_object_wait(obj, 4058 I915_WAIT_INTERRUPTIBLE | 4059 I915_WAIT_LOCKED | 4060 (write ? I915_WAIT_ALL : 0), 4061 MAX_SCHEDULE_TIMEOUT, 4062 NULL); 4063 if (ret) 4064 return ret; 4065 4066 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4067 4068 /* Flush the CPU cache if it's still invalid. */ 4069 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4070 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4071 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4072 } 4073 4074 /* It should now be out of any other write domains, and we can update 4075 * the domain values for our changes. 4076 */ 4077 GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 4078 4079 /* If we're writing through the CPU, then the GPU read domains will 4080 * need to be invalidated at next use. 4081 */ 4082 if (write) 4083 __start_cpu_write(obj); 4084 4085 return 0; 4086 } 4087 4088 /* Throttle our rendering by waiting until the ring has completed our requests 4089 * emitted over 20 msec ago. 4090 * 4091 * Note that if we were to use the current jiffies each time around the loop, 4092 * we wouldn't escape the function with any frames outstanding if the time to 4093 * render a frame was over 20ms. 4094 * 4095 * This should get us reasonable parallelism between CPU and GPU but also 4096 * relatively low latency when blocking on a particular request to finish. 4097 */ 4098 static int 4099 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4100 { 4101 struct drm_i915_private *dev_priv = to_i915(dev); 4102 struct drm_i915_file_private *file_priv = file->driver_priv; 4103 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4104 struct drm_i915_gem_request *request, *target = NULL; 4105 long ret; 4106 4107 /* ABI: return -EIO if already wedged */ 4108 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4109 return -EIO; 4110 4111 spin_lock(&file_priv->mm.lock); 4112 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4113 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4114 break; 4115 4116 if (target) { 4117 list_del(&target->client_link); 4118 target->file_priv = NULL; 4119 } 4120 4121 target = request; 4122 } 4123 if (target) 4124 i915_gem_request_get(target); 4125 spin_unlock(&file_priv->mm.lock); 4126 4127 if (target == NULL) 4128 return 0; 4129 4130 ret = i915_wait_request(target, 4131 I915_WAIT_INTERRUPTIBLE, 4132 MAX_SCHEDULE_TIMEOUT); 4133 i915_gem_request_put(target); 4134 4135 return ret < 0 ? ret : 0; 4136 } 4137 4138 struct i915_vma * 4139 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4140 const struct i915_ggtt_view *view, 4141 u64 size, 4142 u64 alignment, 4143 u64 flags) 4144 { 4145 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4146 struct i915_address_space *vm = &dev_priv->ggtt.base; 4147 struct i915_vma *vma; 4148 int ret; 4149 4150 lockdep_assert_held(&obj->base.dev->struct_mutex); 4151 4152 if (!view && flags & PIN_MAPPABLE) { 4153 /* If the required space is larger than the available 4154 * aperture, we will not able to find a slot for the 4155 * object and unbinding the object now will be in 4156 * vain. Worse, doing so may cause us to ping-pong 4157 * the object in and out of the Global GTT and 4158 * waste a lot of cycles under the mutex. 4159 */ 4160 if (obj->base.size > dev_priv->ggtt.mappable_end) 4161 return ERR_PTR(-E2BIG); 4162 4163 /* If NONBLOCK is set the caller is optimistically 4164 * trying to cache the full object within the mappable 4165 * aperture, and *must* have a fallback in place for 4166 * situations where we cannot bind the object. We 4167 * can be a little more lax here and use the fallback 4168 * more often to avoid costly migrations of ourselves 4169 * and other objects within the aperture. 4170 * 4171 * Half-the-aperture is used as a simple heuristic. 4172 * More interesting would to do search for a free 4173 * block prior to making the commitment to unbind. 4174 * That caters for the self-harm case, and with a 4175 * little more heuristics (e.g. NOFAULT, NOEVICT) 4176 * we could try to minimise harm to others. 4177 */ 4178 if (flags & PIN_NONBLOCK && 4179 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4180 return ERR_PTR(-ENOSPC); 4181 } 4182 4183 vma = i915_vma_instance(obj, vm, view); 4184 if (unlikely(IS_ERR(vma))) 4185 return vma; 4186 4187 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4188 if (flags & PIN_NONBLOCK) { 4189 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4190 return ERR_PTR(-ENOSPC); 4191 4192 if (flags & PIN_MAPPABLE && 4193 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4194 return ERR_PTR(-ENOSPC); 4195 } 4196 4197 WARN(i915_vma_is_pinned(vma), 4198 "bo is already pinned in ggtt with incorrect alignment:" 4199 " offset=%08x, req.alignment=%llx," 4200 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4201 i915_ggtt_offset(vma), alignment, 4202 !!(flags & PIN_MAPPABLE), 4203 i915_vma_is_map_and_fenceable(vma)); 4204 ret = i915_vma_unbind(vma); 4205 if (ret) 4206 return ERR_PTR(ret); 4207 } 4208 4209 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4210 if (ret) 4211 return ERR_PTR(ret); 4212 4213 return vma; 4214 } 4215 4216 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4217 { 4218 /* Note that we could alias engines in the execbuf API, but 4219 * that would be very unwise as it prevents userspace from 4220 * fine control over engine selection. Ahem. 4221 * 4222 * This should be something like EXEC_MAX_ENGINE instead of 4223 * I915_NUM_ENGINES. 4224 */ 4225 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4226 return 0x10000 << id; 4227 } 4228 4229 static __always_inline unsigned int __busy_write_id(unsigned int id) 4230 { 4231 /* The uABI guarantees an active writer is also amongst the read 4232 * engines. This would be true if we accessed the activity tracking 4233 * under the lock, but as we perform the lookup of the object and 4234 * its activity locklessly we can not guarantee that the last_write 4235 * being active implies that we have set the same engine flag from 4236 * last_read - hence we always set both read and write busy for 4237 * last_write. 4238 */ 4239 return id | __busy_read_flag(id); 4240 } 4241 4242 static __always_inline unsigned int 4243 __busy_set_if_active(const struct dma_fence *fence, 4244 unsigned int (*flag)(unsigned int id)) 4245 { 4246 struct drm_i915_gem_request *rq; 4247 4248 /* We have to check the current hw status of the fence as the uABI 4249 * guarantees forward progress. We could rely on the idle worker 4250 * to eventually flush us, but to minimise latency just ask the 4251 * hardware. 4252 * 4253 * Note we only report on the status of native fences. 4254 */ 4255 if (!dma_fence_is_i915(fence)) 4256 return 0; 4257 4258 /* opencode to_request() in order to avoid const warnings */ 4259 rq = container_of(fence, struct drm_i915_gem_request, fence); 4260 if (i915_gem_request_completed(rq)) 4261 return 0; 4262 4263 return flag(rq->engine->uabi_id); 4264 } 4265 4266 static __always_inline unsigned int 4267 busy_check_reader(const struct dma_fence *fence) 4268 { 4269 return __busy_set_if_active(fence, __busy_read_flag); 4270 } 4271 4272 static __always_inline unsigned int 4273 busy_check_writer(const struct dma_fence *fence) 4274 { 4275 if (!fence) 4276 return 0; 4277 4278 return __busy_set_if_active(fence, __busy_write_id); 4279 } 4280 4281 int 4282 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4283 struct drm_file *file) 4284 { 4285 struct drm_i915_gem_busy *args = data; 4286 struct drm_i915_gem_object *obj; 4287 struct reservation_object_list *list; 4288 unsigned int seq; 4289 int err; 4290 4291 err = -ENOENT; 4292 rcu_read_lock(); 4293 obj = i915_gem_object_lookup_rcu(file, args->handle); 4294 if (!obj) 4295 goto out; 4296 4297 /* A discrepancy here is that we do not report the status of 4298 * non-i915 fences, i.e. even though we may report the object as idle, 4299 * a call to set-domain may still stall waiting for foreign rendering. 4300 * This also means that wait-ioctl may report an object as busy, 4301 * where busy-ioctl considers it idle. 4302 * 4303 * We trade the ability to warn of foreign fences to report on which 4304 * i915 engines are active for the object. 4305 * 4306 * Alternatively, we can trade that extra information on read/write 4307 * activity with 4308 * args->busy = 4309 * !reservation_object_test_signaled_rcu(obj->resv, true); 4310 * to report the overall busyness. This is what the wait-ioctl does. 4311 * 4312 */ 4313 retry: 4314 seq = raw_read_seqcount(&obj->resv->seq); 4315 4316 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4317 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4318 4319 /* Translate shared fences to READ set of engines */ 4320 list = rcu_dereference(obj->resv->fence); 4321 if (list) { 4322 unsigned int shared_count = list->shared_count, i; 4323 4324 for (i = 0; i < shared_count; ++i) { 4325 struct dma_fence *fence = 4326 rcu_dereference(list->shared[i]); 4327 4328 args->busy |= busy_check_reader(fence); 4329 } 4330 } 4331 4332 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4333 goto retry; 4334 4335 err = 0; 4336 out: 4337 rcu_read_unlock(); 4338 return err; 4339 } 4340 4341 int 4342 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4343 struct drm_file *file_priv) 4344 { 4345 return i915_gem_ring_throttle(dev, file_priv); 4346 } 4347 4348 int 4349 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4350 struct drm_file *file_priv) 4351 { 4352 struct drm_i915_private *dev_priv = to_i915(dev); 4353 struct drm_i915_gem_madvise *args = data; 4354 struct drm_i915_gem_object *obj; 4355 int err; 4356 4357 switch (args->madv) { 4358 case I915_MADV_DONTNEED: 4359 case I915_MADV_WILLNEED: 4360 break; 4361 default: 4362 return -EINVAL; 4363 } 4364 4365 obj = i915_gem_object_lookup(file_priv, args->handle); 4366 if (!obj) 4367 return -ENOENT; 4368 4369 err = mutex_lock_interruptible(&obj->mm.lock); 4370 if (err) 4371 goto out; 4372 4373 if (i915_gem_object_has_pages(obj) && 4374 i915_gem_object_is_tiled(obj) && 4375 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4376 if (obj->mm.madv == I915_MADV_WILLNEED) { 4377 GEM_BUG_ON(!obj->mm.quirked); 4378 __i915_gem_object_unpin_pages(obj); 4379 obj->mm.quirked = false; 4380 } 4381 if (args->madv == I915_MADV_WILLNEED) { 4382 GEM_BUG_ON(obj->mm.quirked); 4383 __i915_gem_object_pin_pages(obj); 4384 obj->mm.quirked = true; 4385 } 4386 } 4387 4388 if (obj->mm.madv != __I915_MADV_PURGED) 4389 obj->mm.madv = args->madv; 4390 4391 /* if the object is no longer attached, discard its backing storage */ 4392 if (obj->mm.madv == I915_MADV_DONTNEED && 4393 !i915_gem_object_has_pages(obj)) 4394 i915_gem_object_truncate(obj); 4395 4396 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4397 mutex_unlock(&obj->mm.lock); 4398 4399 out: 4400 i915_gem_object_put(obj); 4401 return err; 4402 } 4403 4404 static void 4405 frontbuffer_retire(struct i915_gem_active *active, 4406 struct drm_i915_gem_request *request) 4407 { 4408 struct drm_i915_gem_object *obj = 4409 container_of(active, typeof(*obj), frontbuffer_write); 4410 4411 intel_fb_obj_flush(obj, ORIGIN_CS); 4412 } 4413 4414 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4415 const struct drm_i915_gem_object_ops *ops) 4416 { 4417 mutex_init(&obj->mm.lock); 4418 4419 INIT_LIST_HEAD(&obj->vma_list); 4420 INIT_LIST_HEAD(&obj->lut_list); 4421 INIT_LIST_HEAD(&obj->batch_pool_link); 4422 4423 obj->ops = ops; 4424 4425 reservation_object_init(&obj->__builtin_resv); 4426 obj->resv = &obj->__builtin_resv; 4427 4428 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4429 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4430 4431 obj->mm.madv = I915_MADV_WILLNEED; 4432 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4433 mutex_init(&obj->mm.get_page.lock); 4434 4435 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4436 } 4437 4438 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4439 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4440 I915_GEM_OBJECT_IS_SHRINKABLE, 4441 4442 .get_pages = i915_gem_object_get_pages_gtt, 4443 .put_pages = i915_gem_object_put_pages_gtt, 4444 4445 .pwrite = i915_gem_object_pwrite_gtt, 4446 }; 4447 4448 static int i915_gem_object_create_shmem(struct drm_device *dev, 4449 struct drm_gem_object *obj, 4450 size_t size) 4451 { 4452 struct drm_i915_private *i915 = to_i915(dev); 4453 unsigned long flags = VM_NORESERVE; 4454 struct file *filp; 4455 4456 drm_gem_private_object_init(dev, obj, size); 4457 4458 if (i915->mm.gemfs) 4459 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4460 flags); 4461 else 4462 filp = shmem_file_setup("i915", size, flags); 4463 4464 if (IS_ERR(filp)) 4465 return PTR_ERR(filp); 4466 4467 obj->filp = filp; 4468 4469 return 0; 4470 } 4471 4472 struct drm_i915_gem_object * 4473 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4474 { 4475 struct drm_i915_gem_object *obj; 4476 struct address_space *mapping; 4477 unsigned int cache_level; 4478 gfp_t mask; 4479 int ret; 4480 4481 /* There is a prevalence of the assumption that we fit the object's 4482 * page count inside a 32bit _signed_ variable. Let's document this and 4483 * catch if we ever need to fix it. In the meantime, if you do spot 4484 * such a local variable, please consider fixing! 4485 */ 4486 if (size >> PAGE_SHIFT > INT_MAX) 4487 return ERR_PTR(-E2BIG); 4488 4489 if (overflows_type(size, obj->base.size)) 4490 return ERR_PTR(-E2BIG); 4491 4492 obj = i915_gem_object_alloc(dev_priv); 4493 if (obj == NULL) 4494 return ERR_PTR(-ENOMEM); 4495 4496 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4497 if (ret) 4498 goto fail; 4499 4500 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4501 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4502 /* 965gm cannot relocate objects above 4GiB. */ 4503 mask &= ~__GFP_HIGHMEM; 4504 mask |= __GFP_DMA32; 4505 } 4506 4507 mapping = obj->base.filp->f_mapping; 4508 mapping_set_gfp_mask(mapping, mask); 4509 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4510 4511 i915_gem_object_init(obj, &i915_gem_object_ops); 4512 4513 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4514 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4515 4516 if (HAS_LLC(dev_priv)) 4517 /* On some devices, we can have the GPU use the LLC (the CPU 4518 * cache) for about a 10% performance improvement 4519 * compared to uncached. Graphics requests other than 4520 * display scanout are coherent with the CPU in 4521 * accessing this cache. This means in this mode we 4522 * don't need to clflush on the CPU side, and on the 4523 * GPU side we only need to flush internal caches to 4524 * get data visible to the CPU. 4525 * 4526 * However, we maintain the display planes as UC, and so 4527 * need to rebind when first used as such. 4528 */ 4529 cache_level = I915_CACHE_LLC; 4530 else 4531 cache_level = I915_CACHE_NONE; 4532 4533 i915_gem_object_set_cache_coherency(obj, cache_level); 4534 4535 trace_i915_gem_object_create(obj); 4536 4537 return obj; 4538 4539 fail: 4540 i915_gem_object_free(obj); 4541 return ERR_PTR(ret); 4542 } 4543 4544 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4545 { 4546 /* If we are the last user of the backing storage (be it shmemfs 4547 * pages or stolen etc), we know that the pages are going to be 4548 * immediately released. In this case, we can then skip copying 4549 * back the contents from the GPU. 4550 */ 4551 4552 if (obj->mm.madv != I915_MADV_WILLNEED) 4553 return false; 4554 4555 if (obj->base.filp == NULL) 4556 return true; 4557 4558 /* At first glance, this looks racy, but then again so would be 4559 * userspace racing mmap against close. However, the first external 4560 * reference to the filp can only be obtained through the 4561 * i915_gem_mmap_ioctl() which safeguards us against the user 4562 * acquiring such a reference whilst we are in the middle of 4563 * freeing the object. 4564 */ 4565 return atomic_long_read(&obj->base.filp->f_count) == 1; 4566 } 4567 4568 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4569 struct llist_node *freed) 4570 { 4571 struct drm_i915_gem_object *obj, *on; 4572 4573 intel_runtime_pm_get(i915); 4574 llist_for_each_entry_safe(obj, on, freed, freed) { 4575 struct i915_vma *vma, *vn; 4576 4577 trace_i915_gem_object_destroy(obj); 4578 4579 mutex_lock(&i915->drm.struct_mutex); 4580 4581 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4582 list_for_each_entry_safe(vma, vn, 4583 &obj->vma_list, obj_link) { 4584 GEM_BUG_ON(i915_vma_is_active(vma)); 4585 vma->flags &= ~I915_VMA_PIN_MASK; 4586 i915_vma_close(vma); 4587 } 4588 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4589 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4590 4591 /* This serializes freeing with the shrinker. Since the free 4592 * is delayed, first by RCU then by the workqueue, we want the 4593 * shrinker to be able to free pages of unreferenced objects, 4594 * or else we may oom whilst there are plenty of deferred 4595 * freed objects. 4596 */ 4597 if (i915_gem_object_has_pages(obj)) { 4598 spin_lock(&i915->mm.obj_lock); 4599 list_del_init(&obj->mm.link); 4600 spin_unlock(&i915->mm.obj_lock); 4601 } 4602 4603 mutex_unlock(&i915->drm.struct_mutex); 4604 4605 GEM_BUG_ON(obj->bind_count); 4606 GEM_BUG_ON(obj->userfault_count); 4607 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4608 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4609 4610 if (obj->ops->release) 4611 obj->ops->release(obj); 4612 4613 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4614 atomic_set(&obj->mm.pages_pin_count, 0); 4615 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4616 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4617 4618 if (obj->base.import_attach) 4619 drm_prime_gem_destroy(&obj->base, NULL); 4620 4621 reservation_object_fini(&obj->__builtin_resv); 4622 drm_gem_object_release(&obj->base); 4623 i915_gem_info_remove_obj(i915, obj->base.size); 4624 4625 kfree(obj->bit_17); 4626 i915_gem_object_free(obj); 4627 4628 if (on) 4629 cond_resched(); 4630 } 4631 intel_runtime_pm_put(i915); 4632 } 4633 4634 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4635 { 4636 struct llist_node *freed; 4637 4638 /* Free the oldest, most stale object to keep the free_list short */ 4639 freed = NULL; 4640 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4641 /* Only one consumer of llist_del_first() allowed */ 4642 spin_lock(&i915->mm.free_lock); 4643 freed = llist_del_first(&i915->mm.free_list); 4644 spin_unlock(&i915->mm.free_lock); 4645 } 4646 if (unlikely(freed)) { 4647 freed->next = NULL; 4648 __i915_gem_free_objects(i915, freed); 4649 } 4650 } 4651 4652 static void __i915_gem_free_work(struct work_struct *work) 4653 { 4654 struct drm_i915_private *i915 = 4655 container_of(work, struct drm_i915_private, mm.free_work); 4656 struct llist_node *freed; 4657 4658 /* All file-owned VMA should have been released by this point through 4659 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4660 * However, the object may also be bound into the global GTT (e.g. 4661 * older GPUs without per-process support, or for direct access through 4662 * the GTT either for the user or for scanout). Those VMA still need to 4663 * unbound now. 4664 */ 4665 4666 spin_lock(&i915->mm.free_lock); 4667 while ((freed = llist_del_all(&i915->mm.free_list))) { 4668 spin_unlock(&i915->mm.free_lock); 4669 4670 __i915_gem_free_objects(i915, freed); 4671 if (need_resched()) 4672 return; 4673 4674 spin_lock(&i915->mm.free_lock); 4675 } 4676 spin_unlock(&i915->mm.free_lock); 4677 } 4678 4679 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4680 { 4681 struct drm_i915_gem_object *obj = 4682 container_of(head, typeof(*obj), rcu); 4683 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4684 4685 /* We can't simply use call_rcu() from i915_gem_free_object() 4686 * as we need to block whilst unbinding, and the call_rcu 4687 * task may be called from softirq context. So we take a 4688 * detour through a worker. 4689 */ 4690 if (llist_add(&obj->freed, &i915->mm.free_list)) 4691 schedule_work(&i915->mm.free_work); 4692 } 4693 4694 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4695 { 4696 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4697 4698 if (obj->mm.quirked) 4699 __i915_gem_object_unpin_pages(obj); 4700 4701 if (discard_backing_storage(obj)) 4702 obj->mm.madv = I915_MADV_DONTNEED; 4703 4704 /* Before we free the object, make sure any pure RCU-only 4705 * read-side critical sections are complete, e.g. 4706 * i915_gem_busy_ioctl(). For the corresponding synchronized 4707 * lookup see i915_gem_object_lookup_rcu(). 4708 */ 4709 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4710 } 4711 4712 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4713 { 4714 lockdep_assert_held(&obj->base.dev->struct_mutex); 4715 4716 if (!i915_gem_object_has_active_reference(obj) && 4717 i915_gem_object_is_active(obj)) 4718 i915_gem_object_set_active_reference(obj); 4719 else 4720 i915_gem_object_put(obj); 4721 } 4722 4723 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 4724 { 4725 struct i915_gem_context *kernel_context = i915->kernel_context; 4726 struct intel_engine_cs *engine; 4727 enum intel_engine_id id; 4728 4729 for_each_engine(engine, i915, id) { 4730 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); 4731 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4732 } 4733 } 4734 4735 void i915_gem_sanitize(struct drm_i915_private *i915) 4736 { 4737 if (i915_terminally_wedged(&i915->gpu_error)) { 4738 mutex_lock(&i915->drm.struct_mutex); 4739 i915_gem_unset_wedged(i915); 4740 mutex_unlock(&i915->drm.struct_mutex); 4741 } 4742 4743 /* 4744 * If we inherit context state from the BIOS or earlier occupants 4745 * of the GPU, the GPU may be in an inconsistent state when we 4746 * try to take over. The only way to remove the earlier state 4747 * is by resetting. However, resetting on earlier gen is tricky as 4748 * it may impact the display and we are uncertain about the stability 4749 * of the reset, so this could be applied to even earlier gen. 4750 */ 4751 if (INTEL_GEN(i915) >= 5) { 4752 int reset = intel_gpu_reset(i915, ALL_ENGINES); 4753 WARN_ON(reset && reset != -ENODEV); 4754 } 4755 } 4756 4757 int i915_gem_suspend(struct drm_i915_private *dev_priv) 4758 { 4759 struct drm_device *dev = &dev_priv->drm; 4760 int ret; 4761 4762 intel_runtime_pm_get(dev_priv); 4763 intel_suspend_gt_powersave(dev_priv); 4764 4765 mutex_lock(&dev->struct_mutex); 4766 4767 /* We have to flush all the executing contexts to main memory so 4768 * that they can saved in the hibernation image. To ensure the last 4769 * context image is coherent, we have to switch away from it. That 4770 * leaves the dev_priv->kernel_context still active when 4771 * we actually suspend, and its image in memory may not match the GPU 4772 * state. Fortunately, the kernel_context is disposable and we do 4773 * not rely on its state. 4774 */ 4775 ret = i915_gem_switch_to_kernel_context(dev_priv); 4776 if (ret) 4777 goto err_unlock; 4778 4779 ret = i915_gem_wait_for_idle(dev_priv, 4780 I915_WAIT_INTERRUPTIBLE | 4781 I915_WAIT_LOCKED); 4782 if (ret && ret != -EIO) 4783 goto err_unlock; 4784 4785 assert_kernel_context_is_current(dev_priv); 4786 i915_gem_contexts_lost(dev_priv); 4787 mutex_unlock(&dev->struct_mutex); 4788 4789 intel_guc_suspend(dev_priv); 4790 4791 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4792 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4793 4794 /* As the idle_work is rearming if it detects a race, play safe and 4795 * repeat the flush until it is definitely idle. 4796 */ 4797 drain_delayed_work(&dev_priv->gt.idle_work); 4798 4799 /* Assert that we sucessfully flushed all the work and 4800 * reset the GPU back to its idle, low power state. 4801 */ 4802 WARN_ON(dev_priv->gt.awake); 4803 if (WARN_ON(!intel_engines_are_idle(dev_priv))) 4804 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ 4805 4806 /* 4807 * Neither the BIOS, ourselves or any other kernel 4808 * expects the system to be in execlists mode on startup, 4809 * so we need to reset the GPU back to legacy mode. And the only 4810 * known way to disable logical contexts is through a GPU reset. 4811 * 4812 * So in order to leave the system in a known default configuration, 4813 * always reset the GPU upon unload and suspend. Afterwards we then 4814 * clean up the GEM state tracking, flushing off the requests and 4815 * leaving the system in a known idle state. 4816 * 4817 * Note that is of the upmost importance that the GPU is idle and 4818 * all stray writes are flushed *before* we dismantle the backing 4819 * storage for the pinned objects. 4820 * 4821 * However, since we are uncertain that resetting the GPU on older 4822 * machines is a good idea, we don't - just in case it leaves the 4823 * machine in an unusable condition. 4824 */ 4825 i915_gem_sanitize(dev_priv); 4826 4827 intel_runtime_pm_put(dev_priv); 4828 return 0; 4829 4830 err_unlock: 4831 mutex_unlock(&dev->struct_mutex); 4832 intel_runtime_pm_put(dev_priv); 4833 return ret; 4834 } 4835 4836 void i915_gem_resume(struct drm_i915_private *i915) 4837 { 4838 WARN_ON(i915->gt.awake); 4839 4840 mutex_lock(&i915->drm.struct_mutex); 4841 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4842 4843 i915_gem_restore_gtt_mappings(i915); 4844 i915_gem_restore_fences(i915); 4845 4846 /* As we didn't flush the kernel context before suspend, we cannot 4847 * guarantee that the context image is complete. So let's just reset 4848 * it and start again. 4849 */ 4850 i915->gt.resume(i915); 4851 4852 if (i915_gem_init_hw(i915)) 4853 goto err_wedged; 4854 4855 intel_guc_resume(i915); 4856 4857 /* Always reload a context for powersaving. */ 4858 if (i915_gem_switch_to_kernel_context(i915)) 4859 goto err_wedged; 4860 4861 out_unlock: 4862 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 4863 mutex_unlock(&i915->drm.struct_mutex); 4864 return; 4865 4866 err_wedged: 4867 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 4868 i915_gem_set_wedged(i915); 4869 goto out_unlock; 4870 } 4871 4872 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 4873 { 4874 if (INTEL_GEN(dev_priv) < 5 || 4875 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4876 return; 4877 4878 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4879 DISP_TILE_SURFACE_SWIZZLING); 4880 4881 if (IS_GEN5(dev_priv)) 4882 return; 4883 4884 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4885 if (IS_GEN6(dev_priv)) 4886 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4887 else if (IS_GEN7(dev_priv)) 4888 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4889 else if (IS_GEN8(dev_priv)) 4890 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4891 else 4892 BUG(); 4893 } 4894 4895 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 4896 { 4897 I915_WRITE(RING_CTL(base), 0); 4898 I915_WRITE(RING_HEAD(base), 0); 4899 I915_WRITE(RING_TAIL(base), 0); 4900 I915_WRITE(RING_START(base), 0); 4901 } 4902 4903 static void init_unused_rings(struct drm_i915_private *dev_priv) 4904 { 4905 if (IS_I830(dev_priv)) { 4906 init_unused_ring(dev_priv, PRB1_BASE); 4907 init_unused_ring(dev_priv, SRB0_BASE); 4908 init_unused_ring(dev_priv, SRB1_BASE); 4909 init_unused_ring(dev_priv, SRB2_BASE); 4910 init_unused_ring(dev_priv, SRB3_BASE); 4911 } else if (IS_GEN2(dev_priv)) { 4912 init_unused_ring(dev_priv, SRB0_BASE); 4913 init_unused_ring(dev_priv, SRB1_BASE); 4914 } else if (IS_GEN3(dev_priv)) { 4915 init_unused_ring(dev_priv, PRB1_BASE); 4916 init_unused_ring(dev_priv, PRB2_BASE); 4917 } 4918 } 4919 4920 static int __i915_gem_restart_engines(void *data) 4921 { 4922 struct drm_i915_private *i915 = data; 4923 struct intel_engine_cs *engine; 4924 enum intel_engine_id id; 4925 int err; 4926 4927 for_each_engine(engine, i915, id) { 4928 err = engine->init_hw(engine); 4929 if (err) 4930 return err; 4931 } 4932 4933 return 0; 4934 } 4935 4936 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 4937 { 4938 int ret; 4939 4940 dev_priv->gt.last_init_time = ktime_get(); 4941 4942 /* Double layer security blanket, see i915_gem_init() */ 4943 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4944 4945 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 4946 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4947 4948 if (IS_HASWELL(dev_priv)) 4949 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 4950 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4951 4952 if (HAS_PCH_NOP(dev_priv)) { 4953 if (IS_IVYBRIDGE(dev_priv)) { 4954 u32 temp = I915_READ(GEN7_MSG_CTL); 4955 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4956 I915_WRITE(GEN7_MSG_CTL, temp); 4957 } else if (INTEL_GEN(dev_priv) >= 7) { 4958 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4959 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4960 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4961 } 4962 } 4963 4964 i915_gem_init_swizzling(dev_priv); 4965 4966 /* 4967 * At least 830 can leave some of the unused rings 4968 * "active" (ie. head != tail) after resume which 4969 * will prevent c3 entry. Makes sure all unused rings 4970 * are totally idle. 4971 */ 4972 init_unused_rings(dev_priv); 4973 4974 BUG_ON(!dev_priv->kernel_context); 4975 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 4976 ret = -EIO; 4977 goto out; 4978 } 4979 4980 ret = i915_ppgtt_init_hw(dev_priv); 4981 if (ret) { 4982 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4983 goto out; 4984 } 4985 4986 /* We can't enable contexts until all firmware is loaded */ 4987 ret = intel_uc_init_hw(dev_priv); 4988 if (ret) 4989 goto out; 4990 4991 intel_mocs_init_l3cc_table(dev_priv); 4992 4993 /* Only when the HW is re-initialised, can we replay the requests */ 4994 ret = __i915_gem_restart_engines(dev_priv); 4995 out: 4996 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4997 return ret; 4998 } 4999 5000 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) 5001 { 5002 if (INTEL_INFO(dev_priv)->gen < 6) 5003 return false; 5004 5005 /* TODO: make semaphores and Execlists play nicely together */ 5006 if (i915_modparams.enable_execlists) 5007 return false; 5008 5009 if (value >= 0) 5010 return value; 5011 5012 /* Enable semaphores on SNB when IO remapping is off */ 5013 if (IS_GEN6(dev_priv) && intel_vtd_active()) 5014 return false; 5015 5016 return true; 5017 } 5018 5019 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5020 { 5021 struct i915_gem_context *ctx; 5022 struct intel_engine_cs *engine; 5023 enum intel_engine_id id; 5024 int err; 5025 5026 /* 5027 * As we reset the gpu during very early sanitisation, the current 5028 * register state on the GPU should reflect its defaults values. 5029 * We load a context onto the hw (with restore-inhibit), then switch 5030 * over to a second context to save that default register state. We 5031 * can then prime every new context with that state so they all start 5032 * from the same default HW values. 5033 */ 5034 5035 ctx = i915_gem_context_create_kernel(i915, 0); 5036 if (IS_ERR(ctx)) 5037 return PTR_ERR(ctx); 5038 5039 for_each_engine(engine, i915, id) { 5040 struct drm_i915_gem_request *rq; 5041 5042 rq = i915_gem_request_alloc(engine, ctx); 5043 if (IS_ERR(rq)) { 5044 err = PTR_ERR(rq); 5045 goto out_ctx; 5046 } 5047 5048 err = i915_switch_context(rq); 5049 if (engine->init_context) 5050 err = engine->init_context(rq); 5051 5052 __i915_add_request(rq, true); 5053 if (err) 5054 goto err_active; 5055 } 5056 5057 err = i915_gem_switch_to_kernel_context(i915); 5058 if (err) 5059 goto err_active; 5060 5061 err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); 5062 if (err) 5063 goto err_active; 5064 5065 assert_kernel_context_is_current(i915); 5066 5067 for_each_engine(engine, i915, id) { 5068 struct i915_vma *state; 5069 5070 state = ctx->engine[id].state; 5071 if (!state) 5072 continue; 5073 5074 /* 5075 * As we will hold a reference to the logical state, it will 5076 * not be torn down with the context, and importantly the 5077 * object will hold onto its vma (making it possible for a 5078 * stray GTT write to corrupt our defaults). Unmap the vma 5079 * from the GTT to prevent such accidents and reclaim the 5080 * space. 5081 */ 5082 err = i915_vma_unbind(state); 5083 if (err) 5084 goto err_active; 5085 5086 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5087 if (err) 5088 goto err_active; 5089 5090 engine->default_state = i915_gem_object_get(state->obj); 5091 } 5092 5093 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5094 unsigned int found = intel_engines_has_context_isolation(i915); 5095 5096 /* 5097 * Make sure that classes with multiple engine instances all 5098 * share the same basic configuration. 5099 */ 5100 for_each_engine(engine, i915, id) { 5101 unsigned int bit = BIT(engine->uabi_class); 5102 unsigned int expected = engine->default_state ? bit : 0; 5103 5104 if ((found & bit) != expected) { 5105 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5106 engine->uabi_class, engine->name); 5107 } 5108 } 5109 } 5110 5111 out_ctx: 5112 i915_gem_context_set_closed(ctx); 5113 i915_gem_context_put(ctx); 5114 return err; 5115 5116 err_active: 5117 /* 5118 * If we have to abandon now, we expect the engines to be idle 5119 * and ready to be torn-down. First try to flush any remaining 5120 * request, ensure we are pointing at the kernel context and 5121 * then remove it. 5122 */ 5123 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5124 goto out_ctx; 5125 5126 if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) 5127 goto out_ctx; 5128 5129 i915_gem_contexts_lost(i915); 5130 goto out_ctx; 5131 } 5132 5133 int i915_gem_init(struct drm_i915_private *dev_priv) 5134 { 5135 int ret; 5136 5137 mutex_lock(&dev_priv->drm.struct_mutex); 5138 5139 /* 5140 * We need to fallback to 4K pages since gvt gtt handling doesn't 5141 * support huge page entries - we will need to check either hypervisor 5142 * mm can support huge guest page or just do emulation in gvt. 5143 */ 5144 if (intel_vgpu_active(dev_priv)) 5145 mkwrite_device_info(dev_priv)->page_sizes = 5146 I915_GTT_PAGE_SIZE_4K; 5147 5148 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5149 5150 if (!i915_modparams.enable_execlists) { 5151 dev_priv->gt.resume = intel_legacy_submission_resume; 5152 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5153 } else { 5154 dev_priv->gt.resume = intel_lr_context_resume; 5155 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5156 } 5157 5158 /* This is just a security blanket to placate dragons. 5159 * On some systems, we very sporadically observe that the first TLBs 5160 * used by the CS may be stale, despite us poking the TLB reset. If 5161 * we hold the forcewake during initialisation these problems 5162 * just magically go away. 5163 */ 5164 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5165 5166 ret = i915_gem_init_userptr(dev_priv); 5167 if (ret) 5168 goto out_unlock; 5169 5170 ret = i915_gem_init_ggtt(dev_priv); 5171 if (ret) 5172 goto out_unlock; 5173 5174 ret = i915_gem_contexts_init(dev_priv); 5175 if (ret) 5176 goto out_unlock; 5177 5178 ret = intel_engines_init(dev_priv); 5179 if (ret) 5180 goto out_unlock; 5181 5182 intel_init_gt_powersave(dev_priv); 5183 5184 ret = i915_gem_init_hw(dev_priv); 5185 if (ret) 5186 goto out_unlock; 5187 5188 /* 5189 * Despite its name intel_init_clock_gating applies both display 5190 * clock gating workarounds; GT mmio workarounds and the occasional 5191 * GT power context workaround. Worse, sometimes it includes a context 5192 * register workaround which we need to apply before we record the 5193 * default HW state for all contexts. 5194 * 5195 * FIXME: break up the workarounds and apply them at the right time! 5196 */ 5197 intel_init_clock_gating(dev_priv); 5198 5199 ret = __intel_engines_record_defaults(dev_priv); 5200 out_unlock: 5201 if (ret == -EIO) { 5202 /* Allow engine initialisation to fail by marking the GPU as 5203 * wedged. But we only want to do this where the GPU is angry, 5204 * for all other failure, such as an allocation failure, bail. 5205 */ 5206 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5207 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5208 i915_gem_set_wedged(dev_priv); 5209 } 5210 ret = 0; 5211 } 5212 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5213 mutex_unlock(&dev_priv->drm.struct_mutex); 5214 5215 return ret; 5216 } 5217 5218 void i915_gem_init_mmio(struct drm_i915_private *i915) 5219 { 5220 i915_gem_sanitize(i915); 5221 } 5222 5223 void 5224 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5225 { 5226 struct intel_engine_cs *engine; 5227 enum intel_engine_id id; 5228 5229 for_each_engine(engine, dev_priv, id) 5230 dev_priv->gt.cleanup_engine(engine); 5231 } 5232 5233 void 5234 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5235 { 5236 int i; 5237 5238 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5239 !IS_CHERRYVIEW(dev_priv)) 5240 dev_priv->num_fence_regs = 32; 5241 else if (INTEL_INFO(dev_priv)->gen >= 4 || 5242 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5243 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5244 dev_priv->num_fence_regs = 16; 5245 else 5246 dev_priv->num_fence_regs = 8; 5247 5248 if (intel_vgpu_active(dev_priv)) 5249 dev_priv->num_fence_regs = 5250 I915_READ(vgtif_reg(avail_rs.fence_num)); 5251 5252 /* Initialize fence registers to zero */ 5253 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5254 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5255 5256 fence->i915 = dev_priv; 5257 fence->id = i; 5258 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5259 } 5260 i915_gem_restore_fences(dev_priv); 5261 5262 i915_gem_detect_bit_6_swizzle(dev_priv); 5263 } 5264 5265 static void i915_gem_init__mm(struct drm_i915_private *i915) 5266 { 5267 spin_lock_init(&i915->mm.object_stat_lock); 5268 spin_lock_init(&i915->mm.obj_lock); 5269 spin_lock_init(&i915->mm.free_lock); 5270 5271 init_llist_head(&i915->mm.free_list); 5272 5273 INIT_LIST_HEAD(&i915->mm.unbound_list); 5274 INIT_LIST_HEAD(&i915->mm.bound_list); 5275 INIT_LIST_HEAD(&i915->mm.fence_list); 5276 INIT_LIST_HEAD(&i915->mm.userfault_list); 5277 5278 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5279 } 5280 5281 int 5282 i915_gem_load_init(struct drm_i915_private *dev_priv) 5283 { 5284 int err = -ENOMEM; 5285 5286 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5287 if (!dev_priv->objects) 5288 goto err_out; 5289 5290 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5291 if (!dev_priv->vmas) 5292 goto err_objects; 5293 5294 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5295 if (!dev_priv->luts) 5296 goto err_vmas; 5297 5298 dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, 5299 SLAB_HWCACHE_ALIGN | 5300 SLAB_RECLAIM_ACCOUNT | 5301 SLAB_TYPESAFE_BY_RCU); 5302 if (!dev_priv->requests) 5303 goto err_luts; 5304 5305 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5306 SLAB_HWCACHE_ALIGN | 5307 SLAB_RECLAIM_ACCOUNT); 5308 if (!dev_priv->dependencies) 5309 goto err_requests; 5310 5311 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5312 if (!dev_priv->priorities) 5313 goto err_dependencies; 5314 5315 mutex_lock(&dev_priv->drm.struct_mutex); 5316 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5317 err = i915_gem_timeline_init__global(dev_priv); 5318 mutex_unlock(&dev_priv->drm.struct_mutex); 5319 if (err) 5320 goto err_priorities; 5321 5322 i915_gem_init__mm(dev_priv); 5323 5324 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5325 i915_gem_retire_work_handler); 5326 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5327 i915_gem_idle_work_handler); 5328 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5329 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5330 5331 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5332 5333 spin_lock_init(&dev_priv->fb_tracking.lock); 5334 5335 err = i915_gemfs_init(dev_priv); 5336 if (err) 5337 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5338 5339 return 0; 5340 5341 err_priorities: 5342 kmem_cache_destroy(dev_priv->priorities); 5343 err_dependencies: 5344 kmem_cache_destroy(dev_priv->dependencies); 5345 err_requests: 5346 kmem_cache_destroy(dev_priv->requests); 5347 err_luts: 5348 kmem_cache_destroy(dev_priv->luts); 5349 err_vmas: 5350 kmem_cache_destroy(dev_priv->vmas); 5351 err_objects: 5352 kmem_cache_destroy(dev_priv->objects); 5353 err_out: 5354 return err; 5355 } 5356 5357 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) 5358 { 5359 i915_gem_drain_freed_objects(dev_priv); 5360 WARN_ON(!llist_empty(&dev_priv->mm.free_list)); 5361 WARN_ON(dev_priv->mm.object_count); 5362 5363 mutex_lock(&dev_priv->drm.struct_mutex); 5364 i915_gem_timeline_fini(&dev_priv->gt.global_timeline); 5365 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5366 mutex_unlock(&dev_priv->drm.struct_mutex); 5367 5368 kmem_cache_destroy(dev_priv->priorities); 5369 kmem_cache_destroy(dev_priv->dependencies); 5370 kmem_cache_destroy(dev_priv->requests); 5371 kmem_cache_destroy(dev_priv->luts); 5372 kmem_cache_destroy(dev_priv->vmas); 5373 kmem_cache_destroy(dev_priv->objects); 5374 5375 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5376 rcu_barrier(); 5377 5378 i915_gemfs_fini(dev_priv); 5379 } 5380 5381 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5382 { 5383 /* Discard all purgeable objects, let userspace recover those as 5384 * required after resuming. 5385 */ 5386 i915_gem_shrink_all(dev_priv); 5387 5388 return 0; 5389 } 5390 5391 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5392 { 5393 struct drm_i915_gem_object *obj; 5394 struct list_head *phases[] = { 5395 &dev_priv->mm.unbound_list, 5396 &dev_priv->mm.bound_list, 5397 NULL 5398 }, **p; 5399 5400 /* Called just before we write the hibernation image. 5401 * 5402 * We need to update the domain tracking to reflect that the CPU 5403 * will be accessing all the pages to create and restore from the 5404 * hibernation, and so upon restoration those pages will be in the 5405 * CPU domain. 5406 * 5407 * To make sure the hibernation image contains the latest state, 5408 * we update that state just before writing out the image. 5409 * 5410 * To try and reduce the hibernation image, we manually shrink 5411 * the objects as well, see i915_gem_freeze() 5412 */ 5413 5414 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); 5415 i915_gem_drain_freed_objects(dev_priv); 5416 5417 spin_lock(&dev_priv->mm.obj_lock); 5418 for (p = phases; *p; p++) { 5419 list_for_each_entry(obj, *p, mm.link) 5420 __start_cpu_write(obj); 5421 } 5422 spin_unlock(&dev_priv->mm.obj_lock); 5423 5424 return 0; 5425 } 5426 5427 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5428 { 5429 struct drm_i915_file_private *file_priv = file->driver_priv; 5430 struct drm_i915_gem_request *request; 5431 5432 /* Clean up our request list when the client is going away, so that 5433 * later retire_requests won't dereference our soon-to-be-gone 5434 * file_priv. 5435 */ 5436 spin_lock(&file_priv->mm.lock); 5437 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5438 request->file_priv = NULL; 5439 spin_unlock(&file_priv->mm.lock); 5440 } 5441 5442 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5443 { 5444 struct drm_i915_file_private *file_priv; 5445 int ret; 5446 5447 DRM_DEBUG("\n"); 5448 5449 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5450 if (!file_priv) 5451 return -ENOMEM; 5452 5453 file->driver_priv = file_priv; 5454 file_priv->dev_priv = i915; 5455 file_priv->file = file; 5456 5457 spin_lock_init(&file_priv->mm.lock); 5458 INIT_LIST_HEAD(&file_priv->mm.request_list); 5459 5460 file_priv->bsd_engine = -1; 5461 5462 ret = i915_gem_context_open(i915, file); 5463 if (ret) 5464 kfree(file_priv); 5465 5466 return ret; 5467 } 5468 5469 /** 5470 * i915_gem_track_fb - update frontbuffer tracking 5471 * @old: current GEM buffer for the frontbuffer slots 5472 * @new: new GEM buffer for the frontbuffer slots 5473 * @frontbuffer_bits: bitmask of frontbuffer slots 5474 * 5475 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5476 * from @old and setting them in @new. Both @old and @new can be NULL. 5477 */ 5478 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5479 struct drm_i915_gem_object *new, 5480 unsigned frontbuffer_bits) 5481 { 5482 /* Control of individual bits within the mask are guarded by 5483 * the owning plane->mutex, i.e. we can never see concurrent 5484 * manipulation of individual bits. But since the bitfield as a whole 5485 * is updated using RMW, we need to use atomics in order to update 5486 * the bits. 5487 */ 5488 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5489 sizeof(atomic_t) * BITS_PER_BYTE); 5490 5491 if (old) { 5492 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5493 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5494 } 5495 5496 if (new) { 5497 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5498 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5499 } 5500 } 5501 5502 /* Allocate a new GEM object and fill it with the supplied data */ 5503 struct drm_i915_gem_object * 5504 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5505 const void *data, size_t size) 5506 { 5507 struct drm_i915_gem_object *obj; 5508 struct file *file; 5509 size_t offset; 5510 int err; 5511 5512 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5513 if (IS_ERR(obj)) 5514 return obj; 5515 5516 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 5517 5518 file = obj->base.filp; 5519 offset = 0; 5520 do { 5521 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5522 struct page *page; 5523 void *pgdata, *vaddr; 5524 5525 err = pagecache_write_begin(file, file->f_mapping, 5526 offset, len, 0, 5527 &page, &pgdata); 5528 if (err < 0) 5529 goto fail; 5530 5531 vaddr = kmap(page); 5532 memcpy(vaddr, data, len); 5533 kunmap(page); 5534 5535 err = pagecache_write_end(file, file->f_mapping, 5536 offset, len, len, 5537 page, pgdata); 5538 if (err < 0) 5539 goto fail; 5540 5541 size -= len; 5542 data += len; 5543 offset += len; 5544 } while (size); 5545 5546 return obj; 5547 5548 fail: 5549 i915_gem_object_put(obj); 5550 return ERR_PTR(err); 5551 } 5552 5553 struct scatterlist * 5554 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5555 unsigned int n, 5556 unsigned int *offset) 5557 { 5558 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5559 struct scatterlist *sg; 5560 unsigned int idx, count; 5561 5562 might_sleep(); 5563 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5564 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5565 5566 /* As we iterate forward through the sg, we record each entry in a 5567 * radixtree for quick repeated (backwards) lookups. If we have seen 5568 * this index previously, we will have an entry for it. 5569 * 5570 * Initial lookup is O(N), but this is amortized to O(1) for 5571 * sequential page access (where each new request is consecutive 5572 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5573 * i.e. O(1) with a large constant! 5574 */ 5575 if (n < READ_ONCE(iter->sg_idx)) 5576 goto lookup; 5577 5578 mutex_lock(&iter->lock); 5579 5580 /* We prefer to reuse the last sg so that repeated lookup of this 5581 * (or the subsequent) sg are fast - comparing against the last 5582 * sg is faster than going through the radixtree. 5583 */ 5584 5585 sg = iter->sg_pos; 5586 idx = iter->sg_idx; 5587 count = __sg_page_count(sg); 5588 5589 while (idx + count <= n) { 5590 unsigned long exception, i; 5591 int ret; 5592 5593 /* If we cannot allocate and insert this entry, or the 5594 * individual pages from this range, cancel updating the 5595 * sg_idx so that on this lookup we are forced to linearly 5596 * scan onwards, but on future lookups we will try the 5597 * insertion again (in which case we need to be careful of 5598 * the error return reporting that we have already inserted 5599 * this index). 5600 */ 5601 ret = radix_tree_insert(&iter->radix, idx, sg); 5602 if (ret && ret != -EEXIST) 5603 goto scan; 5604 5605 exception = 5606 RADIX_TREE_EXCEPTIONAL_ENTRY | 5607 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5608 for (i = 1; i < count; i++) { 5609 ret = radix_tree_insert(&iter->radix, idx + i, 5610 (void *)exception); 5611 if (ret && ret != -EEXIST) 5612 goto scan; 5613 } 5614 5615 idx += count; 5616 sg = ____sg_next(sg); 5617 count = __sg_page_count(sg); 5618 } 5619 5620 scan: 5621 iter->sg_pos = sg; 5622 iter->sg_idx = idx; 5623 5624 mutex_unlock(&iter->lock); 5625 5626 if (unlikely(n < idx)) /* insertion completed by another thread */ 5627 goto lookup; 5628 5629 /* In case we failed to insert the entry into the radixtree, we need 5630 * to look beyond the current sg. 5631 */ 5632 while (idx + count <= n) { 5633 idx += count; 5634 sg = ____sg_next(sg); 5635 count = __sg_page_count(sg); 5636 } 5637 5638 *offset = n - idx; 5639 return sg; 5640 5641 lookup: 5642 rcu_read_lock(); 5643 5644 sg = radix_tree_lookup(&iter->radix, n); 5645 GEM_BUG_ON(!sg); 5646 5647 /* If this index is in the middle of multi-page sg entry, 5648 * the radixtree will contain an exceptional entry that points 5649 * to the start of that range. We will return the pointer to 5650 * the base page and the offset of this page within the 5651 * sg entry's range. 5652 */ 5653 *offset = 0; 5654 if (unlikely(radix_tree_exception(sg))) { 5655 unsigned long base = 5656 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5657 5658 sg = radix_tree_lookup(&iter->radix, base); 5659 GEM_BUG_ON(!sg); 5660 5661 *offset = n - base; 5662 } 5663 5664 rcu_read_unlock(); 5665 5666 return sg; 5667 } 5668 5669 struct page * 5670 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5671 { 5672 struct scatterlist *sg; 5673 unsigned int offset; 5674 5675 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5676 5677 sg = i915_gem_object_get_sg(obj, n, &offset); 5678 return nth_page(sg_page(sg), offset); 5679 } 5680 5681 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5682 struct page * 5683 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5684 unsigned int n) 5685 { 5686 struct page *page; 5687 5688 page = i915_gem_object_get_page(obj, n); 5689 if (!obj->mm.dirty) 5690 set_page_dirty(page); 5691 5692 return page; 5693 } 5694 5695 dma_addr_t 5696 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5697 unsigned long n) 5698 { 5699 struct scatterlist *sg; 5700 unsigned int offset; 5701 5702 sg = i915_gem_object_get_sg(obj, n, &offset); 5703 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5704 } 5705 5706 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5707 { 5708 struct sg_table *pages; 5709 int err; 5710 5711 if (align > obj->base.size) 5712 return -EINVAL; 5713 5714 if (obj->ops == &i915_gem_phys_ops) 5715 return 0; 5716 5717 if (obj->ops != &i915_gem_object_ops) 5718 return -EINVAL; 5719 5720 err = i915_gem_object_unbind(obj); 5721 if (err) 5722 return err; 5723 5724 mutex_lock(&obj->mm.lock); 5725 5726 if (obj->mm.madv != I915_MADV_WILLNEED) { 5727 err = -EFAULT; 5728 goto err_unlock; 5729 } 5730 5731 if (obj->mm.quirked) { 5732 err = -EFAULT; 5733 goto err_unlock; 5734 } 5735 5736 if (obj->mm.mapping) { 5737 err = -EBUSY; 5738 goto err_unlock; 5739 } 5740 5741 pages = fetch_and_zero(&obj->mm.pages); 5742 if (pages) { 5743 struct drm_i915_private *i915 = to_i915(obj->base.dev); 5744 5745 __i915_gem_object_reset_page_iter(obj); 5746 5747 spin_lock(&i915->mm.obj_lock); 5748 list_del(&obj->mm.link); 5749 spin_unlock(&i915->mm.obj_lock); 5750 } 5751 5752 obj->ops = &i915_gem_phys_ops; 5753 5754 err = ____i915_gem_object_get_pages(obj); 5755 if (err) 5756 goto err_xfer; 5757 5758 /* Perma-pin (until release) the physical set of pages */ 5759 __i915_gem_object_pin_pages(obj); 5760 5761 if (!IS_ERR_OR_NULL(pages)) 5762 i915_gem_object_ops.put_pages(obj, pages); 5763 mutex_unlock(&obj->mm.lock); 5764 return 0; 5765 5766 err_xfer: 5767 obj->ops = &i915_gem_object_ops; 5768 obj->mm.pages = pages; 5769 err_unlock: 5770 mutex_unlock(&obj->mm.lock); 5771 return err; 5772 } 5773 5774 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5775 #include "selftests/scatterlist.c" 5776 #include "selftests/mock_gem_device.c" 5777 #include "selftests/huge_gem_object.c" 5778 #include "selftests/huge_pages.c" 5779 #include "selftests/i915_gem_object.c" 5780 #include "selftests/i915_gem_coherency.c" 5781 #endif 5782