1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 9 #include "i915_drv.h" 10 #include "i915_gem_clflush.h" 11 #include "i915_gem_gtt.h" 12 #include "i915_gem_ioctls.h" 13 #include "i915_gem_object.h" 14 #include "i915_vma.h" 15 #include "i915_gem_lmem.h" 16 #include "i915_gem_mman.h" 17 18 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 19 { 20 /* 21 * We manually flush the CPU domain so that we can override and 22 * force the flush for the display, and perform it asyncrhonously. 23 */ 24 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 25 if (obj->cache_dirty) 26 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 27 obj->write_domain = 0; 28 } 29 30 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 31 { 32 if (!i915_gem_object_is_framebuffer(obj)) 33 return; 34 35 i915_gem_object_lock(obj); 36 __i915_gem_object_flush_for_display(obj); 37 i915_gem_object_unlock(obj); 38 } 39 40 /** 41 * Moves a single object to the WC read, and possibly write domain. 42 * @obj: object to act on 43 * @write: ask for write access or read only 44 * 45 * This function returns when the move is complete, including waiting on 46 * flushes to occur. 47 */ 48 int 49 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 50 { 51 int ret; 52 53 assert_object_held(obj); 54 55 ret = i915_gem_object_wait(obj, 56 I915_WAIT_INTERRUPTIBLE | 57 (write ? I915_WAIT_ALL : 0), 58 MAX_SCHEDULE_TIMEOUT); 59 if (ret) 60 return ret; 61 62 if (obj->write_domain == I915_GEM_DOMAIN_WC) 63 return 0; 64 65 /* Flush and acquire obj->pages so that we are coherent through 66 * direct access in memory with previous cached writes through 67 * shmemfs and that our cache domain tracking remains valid. 68 * For example, if the obj->filp was moved to swap without us 69 * being notified and releasing the pages, we would mistakenly 70 * continue to assume that the obj remained out of the CPU cached 71 * domain. 72 */ 73 ret = i915_gem_object_pin_pages(obj); 74 if (ret) 75 return ret; 76 77 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 78 79 /* Serialise direct access to this object with the barriers for 80 * coherent writes from the GPU, by effectively invalidating the 81 * WC domain upon first access. 82 */ 83 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 84 mb(); 85 86 /* It should now be out of any other write domains, and we can update 87 * the domain values for our changes. 88 */ 89 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 90 obj->read_domains |= I915_GEM_DOMAIN_WC; 91 if (write) { 92 obj->read_domains = I915_GEM_DOMAIN_WC; 93 obj->write_domain = I915_GEM_DOMAIN_WC; 94 obj->mm.dirty = true; 95 } 96 97 i915_gem_object_unpin_pages(obj); 98 return 0; 99 } 100 101 /** 102 * Moves a single object to the GTT read, and possibly write domain. 103 * @obj: object to act on 104 * @write: ask for write access or read only 105 * 106 * This function returns when the move is complete, including waiting on 107 * flushes to occur. 108 */ 109 int 110 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 111 { 112 int ret; 113 114 assert_object_held(obj); 115 116 ret = i915_gem_object_wait(obj, 117 I915_WAIT_INTERRUPTIBLE | 118 (write ? I915_WAIT_ALL : 0), 119 MAX_SCHEDULE_TIMEOUT); 120 if (ret) 121 return ret; 122 123 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 124 return 0; 125 126 /* Flush and acquire obj->pages so that we are coherent through 127 * direct access in memory with previous cached writes through 128 * shmemfs and that our cache domain tracking remains valid. 129 * For example, if the obj->filp was moved to swap without us 130 * being notified and releasing the pages, we would mistakenly 131 * continue to assume that the obj remained out of the CPU cached 132 * domain. 133 */ 134 ret = i915_gem_object_pin_pages(obj); 135 if (ret) 136 return ret; 137 138 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 139 140 /* Serialise direct access to this object with the barriers for 141 * coherent writes from the GPU, by effectively invalidating the 142 * GTT domain upon first access. 143 */ 144 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 145 mb(); 146 147 /* It should now be out of any other write domains, and we can update 148 * the domain values for our changes. 149 */ 150 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 151 obj->read_domains |= I915_GEM_DOMAIN_GTT; 152 if (write) { 153 struct i915_vma *vma; 154 155 obj->read_domains = I915_GEM_DOMAIN_GTT; 156 obj->write_domain = I915_GEM_DOMAIN_GTT; 157 obj->mm.dirty = true; 158 159 spin_lock(&obj->vma.lock); 160 for_each_ggtt_vma(vma, obj) 161 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 162 i915_vma_set_ggtt_write(vma); 163 spin_unlock(&obj->vma.lock); 164 } 165 166 i915_gem_object_unpin_pages(obj); 167 return 0; 168 } 169 170 /** 171 * Changes the cache-level of an object across all VMA. 172 * @obj: object to act on 173 * @cache_level: new cache level to set for the object 174 * 175 * After this function returns, the object will be in the new cache-level 176 * across all GTT and the contents of the backing storage will be coherent, 177 * with respect to the new cache-level. In order to keep the backing storage 178 * coherent for all users, we only allow a single cache level to be set 179 * globally on the object and prevent it from being changed whilst the 180 * hardware is reading from the object. That is if the object is currently 181 * on the scanout it will be set to uncached (or equivalent display 182 * cache coherency) and all non-MOCS GPU access will also be uncached so 183 * that all direct access to the scanout remains coherent. 184 */ 185 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 186 enum i915_cache_level cache_level) 187 { 188 int ret; 189 190 if (obj->cache_level == cache_level) 191 return 0; 192 193 ret = i915_gem_object_wait(obj, 194 I915_WAIT_INTERRUPTIBLE | 195 I915_WAIT_ALL, 196 MAX_SCHEDULE_TIMEOUT); 197 if (ret) 198 return ret; 199 200 ret = i915_gem_object_lock_interruptible(obj); 201 if (ret) 202 return ret; 203 204 /* Always invalidate stale cachelines */ 205 if (obj->cache_level != cache_level) { 206 i915_gem_object_set_cache_coherency(obj, cache_level); 207 obj->cache_dirty = true; 208 } 209 210 i915_gem_object_unlock(obj); 211 212 /* The cache-level will be applied when each vma is rebound. */ 213 return i915_gem_object_unbind(obj, 214 I915_GEM_OBJECT_UNBIND_ACTIVE | 215 I915_GEM_OBJECT_UNBIND_BARRIER); 216 } 217 218 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 219 struct drm_file *file) 220 { 221 struct drm_i915_gem_caching *args = data; 222 struct drm_i915_gem_object *obj; 223 int err = 0; 224 225 rcu_read_lock(); 226 obj = i915_gem_object_lookup_rcu(file, args->handle); 227 if (!obj) { 228 err = -ENOENT; 229 goto out; 230 } 231 232 switch (obj->cache_level) { 233 case I915_CACHE_LLC: 234 case I915_CACHE_L3_LLC: 235 args->caching = I915_CACHING_CACHED; 236 break; 237 238 case I915_CACHE_WT: 239 args->caching = I915_CACHING_DISPLAY; 240 break; 241 242 default: 243 args->caching = I915_CACHING_NONE; 244 break; 245 } 246 out: 247 rcu_read_unlock(); 248 return err; 249 } 250 251 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 252 struct drm_file *file) 253 { 254 struct drm_i915_private *i915 = to_i915(dev); 255 struct drm_i915_gem_caching *args = data; 256 struct drm_i915_gem_object *obj; 257 enum i915_cache_level level; 258 int ret = 0; 259 260 switch (args->caching) { 261 case I915_CACHING_NONE: 262 level = I915_CACHE_NONE; 263 break; 264 case I915_CACHING_CACHED: 265 /* 266 * Due to a HW issue on BXT A stepping, GPU stores via a 267 * snooped mapping may leave stale data in a corresponding CPU 268 * cacheline, whereas normally such cachelines would get 269 * invalidated. 270 */ 271 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 272 return -ENODEV; 273 274 level = I915_CACHE_LLC; 275 break; 276 case I915_CACHING_DISPLAY: 277 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 278 break; 279 default: 280 return -EINVAL; 281 } 282 283 obj = i915_gem_object_lookup(file, args->handle); 284 if (!obj) 285 return -ENOENT; 286 287 /* 288 * The caching mode of proxy object is handled by its generator, and 289 * not allowed to be changed by userspace. 290 */ 291 if (i915_gem_object_is_proxy(obj)) { 292 ret = -ENXIO; 293 goto out; 294 } 295 296 ret = i915_gem_object_set_cache_level(obj, level); 297 298 out: 299 i915_gem_object_put(obj); 300 return ret; 301 } 302 303 /* 304 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 305 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 306 * (for pageflips). We only flush the caches while preparing the buffer for 307 * display, the callers are responsible for frontbuffer flush. 308 */ 309 struct i915_vma * 310 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 311 u32 alignment, 312 const struct i915_ggtt_view *view, 313 unsigned int flags) 314 { 315 struct drm_i915_private *i915 = to_i915(obj->base.dev); 316 struct i915_vma *vma; 317 int ret; 318 319 /* Frame buffer must be in LMEM (no migration yet) */ 320 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 321 return ERR_PTR(-EINVAL); 322 323 /* 324 * The display engine is not coherent with the LLC cache on gen6. As 325 * a result, we make sure that the pinning that is about to occur is 326 * done with uncached PTEs. This is lowest common denominator for all 327 * chipsets. 328 * 329 * However for gen6+, we could do better by using the GFDT bit instead 330 * of uncaching, which would allow us to flush all the LLC-cached data 331 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 332 */ 333 ret = i915_gem_object_set_cache_level(obj, 334 HAS_WT(i915) ? 335 I915_CACHE_WT : I915_CACHE_NONE); 336 if (ret) 337 return ERR_PTR(ret); 338 339 /* 340 * As the user may map the buffer once pinned in the display plane 341 * (e.g. libkms for the bootup splash), we have to ensure that we 342 * always use map_and_fenceable for all scanout buffers. However, 343 * it may simply be too big to fit into mappable, in which case 344 * put it anyway and hope that userspace can cope (but always first 345 * try to preserve the existing ABI). 346 */ 347 vma = ERR_PTR(-ENOSPC); 348 if ((flags & PIN_MAPPABLE) == 0 && 349 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 350 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 351 flags | 352 PIN_MAPPABLE | 353 PIN_NONBLOCK); 354 if (IS_ERR(vma)) 355 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 356 if (IS_ERR(vma)) 357 return vma; 358 359 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 360 361 i915_gem_object_flush_if_display(obj); 362 363 return vma; 364 } 365 366 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 367 { 368 struct drm_i915_private *i915 = to_i915(obj->base.dev); 369 struct i915_vma *vma; 370 371 if (list_empty(&obj->vma.list)) 372 return; 373 374 mutex_lock(&i915->ggtt.vm.mutex); 375 spin_lock(&obj->vma.lock); 376 for_each_ggtt_vma(vma, obj) { 377 if (!drm_mm_node_allocated(&vma->node)) 378 continue; 379 380 GEM_BUG_ON(vma->vm != &i915->ggtt.vm); 381 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 382 } 383 spin_unlock(&obj->vma.lock); 384 mutex_unlock(&i915->ggtt.vm.mutex); 385 386 if (i915_gem_object_is_shrinkable(obj)) { 387 unsigned long flags; 388 389 spin_lock_irqsave(&i915->mm.obj_lock, flags); 390 391 if (obj->mm.madv == I915_MADV_WILLNEED && 392 !atomic_read(&obj->mm.shrink_pin)) 393 list_move_tail(&obj->mm.link, &i915->mm.shrink_list); 394 395 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 396 } 397 } 398 399 void 400 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 401 { 402 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 403 i915_gem_object_bump_inactive_ggtt(vma->obj); 404 405 i915_vma_unpin(vma); 406 } 407 408 /** 409 * Moves a single object to the CPU read, and possibly write domain. 410 * @obj: object to act on 411 * @write: requesting write or read-only access 412 * 413 * This function returns when the move is complete, including waiting on 414 * flushes to occur. 415 */ 416 int 417 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 418 { 419 int ret; 420 421 assert_object_held(obj); 422 423 ret = i915_gem_object_wait(obj, 424 I915_WAIT_INTERRUPTIBLE | 425 (write ? I915_WAIT_ALL : 0), 426 MAX_SCHEDULE_TIMEOUT); 427 if (ret) 428 return ret; 429 430 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 431 432 /* Flush the CPU cache if it's still invalid. */ 433 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 434 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 435 obj->read_domains |= I915_GEM_DOMAIN_CPU; 436 } 437 438 /* It should now be out of any other write domains, and we can update 439 * the domain values for our changes. 440 */ 441 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 442 443 /* If we're writing through the CPU, then the GPU read domains will 444 * need to be invalidated at next use. 445 */ 446 if (write) 447 __start_cpu_write(obj); 448 449 return 0; 450 } 451 452 /** 453 * Called when user space prepares to use an object with the CPU, either 454 * through the mmap ioctl's mapping or a GTT mapping. 455 * @dev: drm device 456 * @data: ioctl data blob 457 * @file: drm file 458 */ 459 int 460 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 461 struct drm_file *file) 462 { 463 struct drm_i915_gem_set_domain *args = data; 464 struct drm_i915_gem_object *obj; 465 u32 read_domains = args->read_domains; 466 u32 write_domain = args->write_domain; 467 int err; 468 469 /* Only handle setting domains to types used by the CPU. */ 470 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 471 return -EINVAL; 472 473 /* 474 * Having something in the write domain implies it's in the read 475 * domain, and only that read domain. Enforce that in the request. 476 */ 477 if (write_domain && read_domains != write_domain) 478 return -EINVAL; 479 480 if (!read_domains) 481 return 0; 482 483 obj = i915_gem_object_lookup(file, args->handle); 484 if (!obj) 485 return -ENOENT; 486 487 /* 488 * Already in the desired write domain? Nothing for us to do! 489 * 490 * We apply a little bit of cunning here to catch a broader set of 491 * no-ops. If obj->write_domain is set, we must be in the same 492 * obj->read_domains, and only that domain. Therefore, if that 493 * obj->write_domain matches the request read_domains, we are 494 * already in the same read/write domain and can skip the operation, 495 * without having to further check the requested write_domain. 496 */ 497 if (READ_ONCE(obj->write_domain) == read_domains) { 498 err = 0; 499 goto out; 500 } 501 502 /* 503 * Try to flush the object off the GPU without holding the lock. 504 * We will repeat the flush holding the lock in the normal manner 505 * to catch cases where we are gazumped. 506 */ 507 err = i915_gem_object_wait(obj, 508 I915_WAIT_INTERRUPTIBLE | 509 I915_WAIT_PRIORITY | 510 (write_domain ? I915_WAIT_ALL : 0), 511 MAX_SCHEDULE_TIMEOUT); 512 if (err) 513 goto out; 514 515 /* 516 * Proxy objects do not control access to the backing storage, ergo 517 * they cannot be used as a means to manipulate the cache domain 518 * tracking for that backing storage. The proxy object is always 519 * considered to be outside of any cache domain. 520 */ 521 if (i915_gem_object_is_proxy(obj)) { 522 err = -ENXIO; 523 goto out; 524 } 525 526 /* 527 * Flush and acquire obj->pages so that we are coherent through 528 * direct access in memory with previous cached writes through 529 * shmemfs and that our cache domain tracking remains valid. 530 * For example, if the obj->filp was moved to swap without us 531 * being notified and releasing the pages, we would mistakenly 532 * continue to assume that the obj remained out of the CPU cached 533 * domain. 534 */ 535 err = i915_gem_object_pin_pages(obj); 536 if (err) 537 goto out; 538 539 err = i915_gem_object_lock_interruptible(obj); 540 if (err) 541 goto out_unpin; 542 543 if (read_domains & I915_GEM_DOMAIN_WC) 544 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 545 else if (read_domains & I915_GEM_DOMAIN_GTT) 546 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 547 else 548 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 549 550 /* And bump the LRU for this access */ 551 i915_gem_object_bump_inactive_ggtt(obj); 552 553 i915_gem_object_unlock(obj); 554 555 if (write_domain) 556 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 557 558 out_unpin: 559 i915_gem_object_unpin_pages(obj); 560 out: 561 i915_gem_object_put(obj); 562 return err; 563 } 564 565 /* 566 * Pins the specified object's pages and synchronizes the object with 567 * GPU accesses. Sets needs_clflush to non-zero if the caller should 568 * flush the object from the CPU cache. 569 */ 570 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 571 unsigned int *needs_clflush) 572 { 573 int ret; 574 575 *needs_clflush = 0; 576 if (!i915_gem_object_has_struct_page(obj)) 577 return -ENODEV; 578 579 ret = i915_gem_object_lock_interruptible(obj); 580 if (ret) 581 return ret; 582 583 ret = i915_gem_object_wait(obj, 584 I915_WAIT_INTERRUPTIBLE, 585 MAX_SCHEDULE_TIMEOUT); 586 if (ret) 587 goto err_unlock; 588 589 ret = i915_gem_object_pin_pages(obj); 590 if (ret) 591 goto err_unlock; 592 593 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 594 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 595 ret = i915_gem_object_set_to_cpu_domain(obj, false); 596 if (ret) 597 goto err_unpin; 598 else 599 goto out; 600 } 601 602 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 603 604 /* If we're not in the cpu read domain, set ourself into the gtt 605 * read domain and manually flush cachelines (if required). This 606 * optimizes for the case when the gpu will dirty the data 607 * anyway again before the next pread happens. 608 */ 609 if (!obj->cache_dirty && 610 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 611 *needs_clflush = CLFLUSH_BEFORE; 612 613 out: 614 /* return with the pages pinned */ 615 return 0; 616 617 err_unpin: 618 i915_gem_object_unpin_pages(obj); 619 err_unlock: 620 i915_gem_object_unlock(obj); 621 return ret; 622 } 623 624 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 625 unsigned int *needs_clflush) 626 { 627 int ret; 628 629 *needs_clflush = 0; 630 if (!i915_gem_object_has_struct_page(obj)) 631 return -ENODEV; 632 633 ret = i915_gem_object_lock_interruptible(obj); 634 if (ret) 635 return ret; 636 637 ret = i915_gem_object_wait(obj, 638 I915_WAIT_INTERRUPTIBLE | 639 I915_WAIT_ALL, 640 MAX_SCHEDULE_TIMEOUT); 641 if (ret) 642 goto err_unlock; 643 644 ret = i915_gem_object_pin_pages(obj); 645 if (ret) 646 goto err_unlock; 647 648 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 649 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 650 ret = i915_gem_object_set_to_cpu_domain(obj, true); 651 if (ret) 652 goto err_unpin; 653 else 654 goto out; 655 } 656 657 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 658 659 /* If we're not in the cpu write domain, set ourself into the 660 * gtt write domain and manually flush cachelines (as required). 661 * This optimizes for the case when the gpu will use the data 662 * right away and we therefore have to clflush anyway. 663 */ 664 if (!obj->cache_dirty) { 665 *needs_clflush |= CLFLUSH_AFTER; 666 667 /* 668 * Same trick applies to invalidate partially written 669 * cachelines read before writing. 670 */ 671 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 672 *needs_clflush |= CLFLUSH_BEFORE; 673 } 674 675 out: 676 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 677 obj->mm.dirty = true; 678 /* return with the pages pinned */ 679 return 0; 680 681 err_unpin: 682 i915_gem_object_unpin_pages(obj); 683 err_unlock: 684 i915_gem_object_unlock(obj); 685 return ret; 686 } 687