1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_gtt.h" 13 #include "i915_gem_ioctls.h" 14 #include "i915_gem_object.h" 15 #include "i915_vma.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 20 { 21 return !(obj->cache_level == I915_CACHE_NONE || 22 obj->cache_level == I915_CACHE_WT); 23 } 24 25 static void 26 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 27 { 28 struct i915_vma *vma; 29 30 assert_object_held(obj); 31 32 if (!(obj->write_domain & flush_domains)) 33 return; 34 35 switch (obj->write_domain) { 36 case I915_GEM_DOMAIN_GTT: 37 spin_lock(&obj->vma.lock); 38 for_each_ggtt_vma(vma, obj) { 39 if (i915_vma_unset_ggtt_write(vma)) 40 intel_gt_flush_ggtt_writes(vma->vm->gt); 41 } 42 spin_unlock(&obj->vma.lock); 43 44 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 45 break; 46 47 case I915_GEM_DOMAIN_WC: 48 wmb(); 49 break; 50 51 case I915_GEM_DOMAIN_CPU: 52 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 53 break; 54 55 case I915_GEM_DOMAIN_RENDER: 56 if (gpu_write_needs_clflush(obj)) 57 obj->cache_dirty = true; 58 break; 59 } 60 61 obj->write_domain = 0; 62 } 63 64 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 65 { 66 /* 67 * We manually flush the CPU domain so that we can override and 68 * force the flush for the display, and perform it asyncrhonously. 69 */ 70 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 71 if (obj->cache_dirty) 72 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 73 obj->write_domain = 0; 74 } 75 76 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 77 { 78 if (!i915_gem_object_is_framebuffer(obj)) 79 return; 80 81 i915_gem_object_lock(obj, NULL); 82 __i915_gem_object_flush_for_display(obj); 83 i915_gem_object_unlock(obj); 84 } 85 86 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 87 { 88 if (i915_gem_object_is_framebuffer(obj)) 89 __i915_gem_object_flush_for_display(obj); 90 } 91 92 /** 93 * Moves a single object to the WC read, and possibly write domain. 94 * @obj: object to act on 95 * @write: ask for write access or read only 96 * 97 * This function returns when the move is complete, including waiting on 98 * flushes to occur. 99 */ 100 int 101 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 102 { 103 int ret; 104 105 assert_object_held(obj); 106 107 ret = i915_gem_object_wait(obj, 108 I915_WAIT_INTERRUPTIBLE | 109 (write ? I915_WAIT_ALL : 0), 110 MAX_SCHEDULE_TIMEOUT); 111 if (ret) 112 return ret; 113 114 if (obj->write_domain == I915_GEM_DOMAIN_WC) 115 return 0; 116 117 /* Flush and acquire obj->pages so that we are coherent through 118 * direct access in memory with previous cached writes through 119 * shmemfs and that our cache domain tracking remains valid. 120 * For example, if the obj->filp was moved to swap without us 121 * being notified and releasing the pages, we would mistakenly 122 * continue to assume that the obj remained out of the CPU cached 123 * domain. 124 */ 125 ret = i915_gem_object_pin_pages(obj); 126 if (ret) 127 return ret; 128 129 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 130 131 /* Serialise direct access to this object with the barriers for 132 * coherent writes from the GPU, by effectively invalidating the 133 * WC domain upon first access. 134 */ 135 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 136 mb(); 137 138 /* It should now be out of any other write domains, and we can update 139 * the domain values for our changes. 140 */ 141 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 142 obj->read_domains |= I915_GEM_DOMAIN_WC; 143 if (write) { 144 obj->read_domains = I915_GEM_DOMAIN_WC; 145 obj->write_domain = I915_GEM_DOMAIN_WC; 146 obj->mm.dirty = true; 147 } 148 149 i915_gem_object_unpin_pages(obj); 150 return 0; 151 } 152 153 /** 154 * Moves a single object to the GTT read, and possibly write domain. 155 * @obj: object to act on 156 * @write: ask for write access or read only 157 * 158 * This function returns when the move is complete, including waiting on 159 * flushes to occur. 160 */ 161 int 162 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 163 { 164 int ret; 165 166 assert_object_held(obj); 167 168 ret = i915_gem_object_wait(obj, 169 I915_WAIT_INTERRUPTIBLE | 170 (write ? I915_WAIT_ALL : 0), 171 MAX_SCHEDULE_TIMEOUT); 172 if (ret) 173 return ret; 174 175 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 176 return 0; 177 178 /* Flush and acquire obj->pages so that we are coherent through 179 * direct access in memory with previous cached writes through 180 * shmemfs and that our cache domain tracking remains valid. 181 * For example, if the obj->filp was moved to swap without us 182 * being notified and releasing the pages, we would mistakenly 183 * continue to assume that the obj remained out of the CPU cached 184 * domain. 185 */ 186 ret = i915_gem_object_pin_pages(obj); 187 if (ret) 188 return ret; 189 190 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 191 192 /* Serialise direct access to this object with the barriers for 193 * coherent writes from the GPU, by effectively invalidating the 194 * GTT domain upon first access. 195 */ 196 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 197 mb(); 198 199 /* It should now be out of any other write domains, and we can update 200 * the domain values for our changes. 201 */ 202 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 203 obj->read_domains |= I915_GEM_DOMAIN_GTT; 204 if (write) { 205 struct i915_vma *vma; 206 207 obj->read_domains = I915_GEM_DOMAIN_GTT; 208 obj->write_domain = I915_GEM_DOMAIN_GTT; 209 obj->mm.dirty = true; 210 211 spin_lock(&obj->vma.lock); 212 for_each_ggtt_vma(vma, obj) 213 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 214 i915_vma_set_ggtt_write(vma); 215 spin_unlock(&obj->vma.lock); 216 } 217 218 i915_gem_object_unpin_pages(obj); 219 return 0; 220 } 221 222 /** 223 * Changes the cache-level of an object across all VMA. 224 * @obj: object to act on 225 * @cache_level: new cache level to set for the object 226 * 227 * After this function returns, the object will be in the new cache-level 228 * across all GTT and the contents of the backing storage will be coherent, 229 * with respect to the new cache-level. In order to keep the backing storage 230 * coherent for all users, we only allow a single cache level to be set 231 * globally on the object and prevent it from being changed whilst the 232 * hardware is reading from the object. That is if the object is currently 233 * on the scanout it will be set to uncached (or equivalent display 234 * cache coherency) and all non-MOCS GPU access will also be uncached so 235 * that all direct access to the scanout remains coherent. 236 */ 237 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 238 enum i915_cache_level cache_level) 239 { 240 int ret; 241 242 if (obj->cache_level == cache_level) 243 return 0; 244 245 ret = i915_gem_object_wait(obj, 246 I915_WAIT_INTERRUPTIBLE | 247 I915_WAIT_ALL, 248 MAX_SCHEDULE_TIMEOUT); 249 if (ret) 250 return ret; 251 252 /* Always invalidate stale cachelines */ 253 if (obj->cache_level != cache_level) { 254 i915_gem_object_set_cache_coherency(obj, cache_level); 255 obj->cache_dirty = true; 256 } 257 258 /* The cache-level will be applied when each vma is rebound. */ 259 return i915_gem_object_unbind(obj, 260 I915_GEM_OBJECT_UNBIND_ACTIVE | 261 I915_GEM_OBJECT_UNBIND_BARRIER); 262 } 263 264 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 265 struct drm_file *file) 266 { 267 struct drm_i915_gem_caching *args = data; 268 struct drm_i915_gem_object *obj; 269 int err = 0; 270 271 rcu_read_lock(); 272 obj = i915_gem_object_lookup_rcu(file, args->handle); 273 if (!obj) { 274 err = -ENOENT; 275 goto out; 276 } 277 278 switch (obj->cache_level) { 279 case I915_CACHE_LLC: 280 case I915_CACHE_L3_LLC: 281 args->caching = I915_CACHING_CACHED; 282 break; 283 284 case I915_CACHE_WT: 285 args->caching = I915_CACHING_DISPLAY; 286 break; 287 288 default: 289 args->caching = I915_CACHING_NONE; 290 break; 291 } 292 out: 293 rcu_read_unlock(); 294 return err; 295 } 296 297 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 298 struct drm_file *file) 299 { 300 struct drm_i915_private *i915 = to_i915(dev); 301 struct drm_i915_gem_caching *args = data; 302 struct drm_i915_gem_object *obj; 303 enum i915_cache_level level; 304 int ret = 0; 305 306 switch (args->caching) { 307 case I915_CACHING_NONE: 308 level = I915_CACHE_NONE; 309 break; 310 case I915_CACHING_CACHED: 311 /* 312 * Due to a HW issue on BXT A stepping, GPU stores via a 313 * snooped mapping may leave stale data in a corresponding CPU 314 * cacheline, whereas normally such cachelines would get 315 * invalidated. 316 */ 317 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 318 return -ENODEV; 319 320 level = I915_CACHE_LLC; 321 break; 322 case I915_CACHING_DISPLAY: 323 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 324 break; 325 default: 326 return -EINVAL; 327 } 328 329 obj = i915_gem_object_lookup(file, args->handle); 330 if (!obj) 331 return -ENOENT; 332 333 /* 334 * The caching mode of proxy object is handled by its generator, and 335 * not allowed to be changed by userspace. 336 */ 337 if (i915_gem_object_is_proxy(obj)) { 338 /* 339 * Silently allow cached for userptr; the vulkan driver 340 * sets all objects to cached 341 */ 342 if (!i915_gem_object_is_userptr(obj) || 343 args->caching != I915_CACHING_CACHED) 344 ret = -ENXIO; 345 346 goto out; 347 } 348 349 ret = i915_gem_object_lock_interruptible(obj, NULL); 350 if (ret) 351 goto out; 352 353 ret = i915_gem_object_set_cache_level(obj, level); 354 i915_gem_object_unlock(obj); 355 356 out: 357 i915_gem_object_put(obj); 358 return ret; 359 } 360 361 /* 362 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 363 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 364 * (for pageflips). We only flush the caches while preparing the buffer for 365 * display, the callers are responsible for frontbuffer flush. 366 */ 367 struct i915_vma * 368 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 369 struct i915_gem_ww_ctx *ww, 370 u32 alignment, 371 const struct i915_ggtt_view *view, 372 unsigned int flags) 373 { 374 struct drm_i915_private *i915 = to_i915(obj->base.dev); 375 struct i915_vma *vma; 376 int ret; 377 378 /* Frame buffer must be in LMEM (no migration yet) */ 379 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 380 return ERR_PTR(-EINVAL); 381 382 /* 383 * The display engine is not coherent with the LLC cache on gen6. As 384 * a result, we make sure that the pinning that is about to occur is 385 * done with uncached PTEs. This is lowest common denominator for all 386 * chipsets. 387 * 388 * However for gen6+, we could do better by using the GFDT bit instead 389 * of uncaching, which would allow us to flush all the LLC-cached data 390 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 391 */ 392 ret = i915_gem_object_set_cache_level(obj, 393 HAS_WT(i915) ? 394 I915_CACHE_WT : I915_CACHE_NONE); 395 if (ret) 396 return ERR_PTR(ret); 397 398 /* 399 * As the user may map the buffer once pinned in the display plane 400 * (e.g. libkms for the bootup splash), we have to ensure that we 401 * always use map_and_fenceable for all scanout buffers. However, 402 * it may simply be too big to fit into mappable, in which case 403 * put it anyway and hope that userspace can cope (but always first 404 * try to preserve the existing ABI). 405 */ 406 vma = ERR_PTR(-ENOSPC); 407 if ((flags & PIN_MAPPABLE) == 0 && 408 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 409 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 410 flags | PIN_MAPPABLE | 411 PIN_NONBLOCK); 412 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 413 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 414 alignment, flags); 415 if (IS_ERR(vma)) 416 return vma; 417 418 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 419 i915_vma_mark_scanout(vma); 420 421 i915_gem_object_flush_if_display_locked(obj); 422 423 return vma; 424 } 425 426 /** 427 * Moves a single object to the CPU read, and possibly write domain. 428 * @obj: object to act on 429 * @write: requesting write or read-only access 430 * 431 * This function returns when the move is complete, including waiting on 432 * flushes to occur. 433 */ 434 int 435 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 436 { 437 int ret; 438 439 assert_object_held(obj); 440 441 ret = i915_gem_object_wait(obj, 442 I915_WAIT_INTERRUPTIBLE | 443 (write ? I915_WAIT_ALL : 0), 444 MAX_SCHEDULE_TIMEOUT); 445 if (ret) 446 return ret; 447 448 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 449 450 /* Flush the CPU cache if it's still invalid. */ 451 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 452 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 453 obj->read_domains |= I915_GEM_DOMAIN_CPU; 454 } 455 456 /* It should now be out of any other write domains, and we can update 457 * the domain values for our changes. 458 */ 459 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 460 461 /* If we're writing through the CPU, then the GPU read domains will 462 * need to be invalidated at next use. 463 */ 464 if (write) 465 __start_cpu_write(obj); 466 467 return 0; 468 } 469 470 /** 471 * Called when user space prepares to use an object with the CPU, either 472 * through the mmap ioctl's mapping or a GTT mapping. 473 * @dev: drm device 474 * @data: ioctl data blob 475 * @file: drm file 476 */ 477 int 478 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 479 struct drm_file *file) 480 { 481 struct drm_i915_gem_set_domain *args = data; 482 struct drm_i915_gem_object *obj; 483 u32 read_domains = args->read_domains; 484 u32 write_domain = args->write_domain; 485 int err; 486 487 /* Only handle setting domains to types used by the CPU. */ 488 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 489 return -EINVAL; 490 491 /* 492 * Having something in the write domain implies it's in the read 493 * domain, and only that read domain. Enforce that in the request. 494 */ 495 if (write_domain && read_domains != write_domain) 496 return -EINVAL; 497 498 if (!read_domains) 499 return 0; 500 501 obj = i915_gem_object_lookup(file, args->handle); 502 if (!obj) 503 return -ENOENT; 504 505 /* 506 * Try to flush the object off the GPU without holding the lock. 507 * We will repeat the flush holding the lock in the normal manner 508 * to catch cases where we are gazumped. 509 */ 510 err = i915_gem_object_wait(obj, 511 I915_WAIT_INTERRUPTIBLE | 512 I915_WAIT_PRIORITY | 513 (write_domain ? I915_WAIT_ALL : 0), 514 MAX_SCHEDULE_TIMEOUT); 515 if (err) 516 goto out; 517 518 if (i915_gem_object_is_userptr(obj)) { 519 /* 520 * Try to grab userptr pages, iris uses set_domain to check 521 * userptr validity 522 */ 523 err = i915_gem_object_userptr_validate(obj); 524 if (!err) 525 err = i915_gem_object_wait(obj, 526 I915_WAIT_INTERRUPTIBLE | 527 I915_WAIT_PRIORITY | 528 (write_domain ? I915_WAIT_ALL : 0), 529 MAX_SCHEDULE_TIMEOUT); 530 goto out; 531 } 532 533 /* 534 * Proxy objects do not control access to the backing storage, ergo 535 * they cannot be used as a means to manipulate the cache domain 536 * tracking for that backing storage. The proxy object is always 537 * considered to be outside of any cache domain. 538 */ 539 if (i915_gem_object_is_proxy(obj)) { 540 err = -ENXIO; 541 goto out; 542 } 543 544 err = i915_gem_object_lock_interruptible(obj, NULL); 545 if (err) 546 goto out; 547 548 /* 549 * Flush and acquire obj->pages so that we are coherent through 550 * direct access in memory with previous cached writes through 551 * shmemfs and that our cache domain tracking remains valid. 552 * For example, if the obj->filp was moved to swap without us 553 * being notified and releasing the pages, we would mistakenly 554 * continue to assume that the obj remained out of the CPU cached 555 * domain. 556 */ 557 err = i915_gem_object_pin_pages(obj); 558 if (err) 559 goto out_unlock; 560 561 /* 562 * Already in the desired write domain? Nothing for us to do! 563 * 564 * We apply a little bit of cunning here to catch a broader set of 565 * no-ops. If obj->write_domain is set, we must be in the same 566 * obj->read_domains, and only that domain. Therefore, if that 567 * obj->write_domain matches the request read_domains, we are 568 * already in the same read/write domain and can skip the operation, 569 * without having to further check the requested write_domain. 570 */ 571 if (READ_ONCE(obj->write_domain) == read_domains) 572 goto out_unpin; 573 574 if (read_domains & I915_GEM_DOMAIN_WC) 575 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 576 else if (read_domains & I915_GEM_DOMAIN_GTT) 577 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 578 else 579 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 580 581 out_unpin: 582 i915_gem_object_unpin_pages(obj); 583 584 out_unlock: 585 i915_gem_object_unlock(obj); 586 587 if (!err && write_domain) 588 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 589 590 out: 591 i915_gem_object_put(obj); 592 return err; 593 } 594 595 /* 596 * Pins the specified object's pages and synchronizes the object with 597 * GPU accesses. Sets needs_clflush to non-zero if the caller should 598 * flush the object from the CPU cache. 599 */ 600 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 601 unsigned int *needs_clflush) 602 { 603 int ret; 604 605 *needs_clflush = 0; 606 if (!i915_gem_object_has_struct_page(obj)) 607 return -ENODEV; 608 609 assert_object_held(obj); 610 611 ret = i915_gem_object_wait(obj, 612 I915_WAIT_INTERRUPTIBLE, 613 MAX_SCHEDULE_TIMEOUT); 614 if (ret) 615 return ret; 616 617 ret = i915_gem_object_pin_pages(obj); 618 if (ret) 619 return ret; 620 621 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 622 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 623 ret = i915_gem_object_set_to_cpu_domain(obj, false); 624 if (ret) 625 goto err_unpin; 626 else 627 goto out; 628 } 629 630 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 631 632 /* If we're not in the cpu read domain, set ourself into the gtt 633 * read domain and manually flush cachelines (if required). This 634 * optimizes for the case when the gpu will dirty the data 635 * anyway again before the next pread happens. 636 */ 637 if (!obj->cache_dirty && 638 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 639 *needs_clflush = CLFLUSH_BEFORE; 640 641 out: 642 /* return with the pages pinned */ 643 return 0; 644 645 err_unpin: 646 i915_gem_object_unpin_pages(obj); 647 return ret; 648 } 649 650 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 651 unsigned int *needs_clflush) 652 { 653 int ret; 654 655 *needs_clflush = 0; 656 if (!i915_gem_object_has_struct_page(obj)) 657 return -ENODEV; 658 659 assert_object_held(obj); 660 661 ret = i915_gem_object_wait(obj, 662 I915_WAIT_INTERRUPTIBLE | 663 I915_WAIT_ALL, 664 MAX_SCHEDULE_TIMEOUT); 665 if (ret) 666 return ret; 667 668 ret = i915_gem_object_pin_pages(obj); 669 if (ret) 670 return ret; 671 672 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 673 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 674 ret = i915_gem_object_set_to_cpu_domain(obj, true); 675 if (ret) 676 goto err_unpin; 677 else 678 goto out; 679 } 680 681 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 682 683 /* If we're not in the cpu write domain, set ourself into the 684 * gtt write domain and manually flush cachelines (as required). 685 * This optimizes for the case when the gpu will use the data 686 * right away and we therefore have to clflush anyway. 687 */ 688 if (!obj->cache_dirty) { 689 *needs_clflush |= CLFLUSH_AFTER; 690 691 /* 692 * Same trick applies to invalidate partially written 693 * cachelines read before writing. 694 */ 695 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 696 *needs_clflush |= CLFLUSH_BEFORE; 697 } 698 699 out: 700 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 701 obj->mm.dirty = true; 702 /* return with the pages pinned */ 703 return 0; 704 705 err_unpin: 706 i915_gem_object_unpin_pages(obj); 707 return ret; 708 } 709