1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_gtt.h" 13 #include "i915_gem_ioctls.h" 14 #include "i915_gem_object.h" 15 #include "i915_vma.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 20 { 21 return !(obj->cache_level == I915_CACHE_NONE || 22 obj->cache_level == I915_CACHE_WT); 23 } 24 25 static void 26 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 27 { 28 struct i915_vma *vma; 29 30 assert_object_held(obj); 31 32 if (!(obj->write_domain & flush_domains)) 33 return; 34 35 switch (obj->write_domain) { 36 case I915_GEM_DOMAIN_GTT: 37 spin_lock(&obj->vma.lock); 38 for_each_ggtt_vma(vma, obj) { 39 if (i915_vma_unset_ggtt_write(vma)) 40 intel_gt_flush_ggtt_writes(vma->vm->gt); 41 } 42 spin_unlock(&obj->vma.lock); 43 44 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 45 break; 46 47 case I915_GEM_DOMAIN_WC: 48 wmb(); 49 break; 50 51 case I915_GEM_DOMAIN_CPU: 52 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 53 break; 54 55 case I915_GEM_DOMAIN_RENDER: 56 if (gpu_write_needs_clflush(obj)) 57 obj->cache_dirty = true; 58 break; 59 } 60 61 obj->write_domain = 0; 62 } 63 64 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 65 { 66 /* 67 * We manually flush the CPU domain so that we can override and 68 * force the flush for the display, and perform it asyncrhonously. 69 */ 70 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 71 if (obj->cache_dirty) 72 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 73 obj->write_domain = 0; 74 } 75 76 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 77 { 78 if (!i915_gem_object_is_framebuffer(obj)) 79 return; 80 81 i915_gem_object_lock(obj, NULL); 82 __i915_gem_object_flush_for_display(obj); 83 i915_gem_object_unlock(obj); 84 } 85 86 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 87 { 88 if (i915_gem_object_is_framebuffer(obj)) 89 __i915_gem_object_flush_for_display(obj); 90 } 91 92 /** 93 * Moves a single object to the WC read, and possibly write domain. 94 * @obj: object to act on 95 * @write: ask for write access or read only 96 * 97 * This function returns when the move is complete, including waiting on 98 * flushes to occur. 99 */ 100 int 101 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 102 { 103 int ret; 104 105 assert_object_held(obj); 106 107 ret = i915_gem_object_wait(obj, 108 I915_WAIT_INTERRUPTIBLE | 109 (write ? I915_WAIT_ALL : 0), 110 MAX_SCHEDULE_TIMEOUT); 111 if (ret) 112 return ret; 113 114 if (obj->write_domain == I915_GEM_DOMAIN_WC) 115 return 0; 116 117 /* Flush and acquire obj->pages so that we are coherent through 118 * direct access in memory with previous cached writes through 119 * shmemfs and that our cache domain tracking remains valid. 120 * For example, if the obj->filp was moved to swap without us 121 * being notified and releasing the pages, we would mistakenly 122 * continue to assume that the obj remained out of the CPU cached 123 * domain. 124 */ 125 ret = i915_gem_object_pin_pages(obj); 126 if (ret) 127 return ret; 128 129 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 130 131 /* Serialise direct access to this object with the barriers for 132 * coherent writes from the GPU, by effectively invalidating the 133 * WC domain upon first access. 134 */ 135 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 136 mb(); 137 138 /* It should now be out of any other write domains, and we can update 139 * the domain values for our changes. 140 */ 141 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 142 obj->read_domains |= I915_GEM_DOMAIN_WC; 143 if (write) { 144 obj->read_domains = I915_GEM_DOMAIN_WC; 145 obj->write_domain = I915_GEM_DOMAIN_WC; 146 obj->mm.dirty = true; 147 } 148 149 i915_gem_object_unpin_pages(obj); 150 return 0; 151 } 152 153 /** 154 * Moves a single object to the GTT read, and possibly write domain. 155 * @obj: object to act on 156 * @write: ask for write access or read only 157 * 158 * This function returns when the move is complete, including waiting on 159 * flushes to occur. 160 */ 161 int 162 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 163 { 164 int ret; 165 166 assert_object_held(obj); 167 168 ret = i915_gem_object_wait(obj, 169 I915_WAIT_INTERRUPTIBLE | 170 (write ? I915_WAIT_ALL : 0), 171 MAX_SCHEDULE_TIMEOUT); 172 if (ret) 173 return ret; 174 175 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 176 return 0; 177 178 /* Flush and acquire obj->pages so that we are coherent through 179 * direct access in memory with previous cached writes through 180 * shmemfs and that our cache domain tracking remains valid. 181 * For example, if the obj->filp was moved to swap without us 182 * being notified and releasing the pages, we would mistakenly 183 * continue to assume that the obj remained out of the CPU cached 184 * domain. 185 */ 186 ret = i915_gem_object_pin_pages(obj); 187 if (ret) 188 return ret; 189 190 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 191 192 /* Serialise direct access to this object with the barriers for 193 * coherent writes from the GPU, by effectively invalidating the 194 * GTT domain upon first access. 195 */ 196 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 197 mb(); 198 199 /* It should now be out of any other write domains, and we can update 200 * the domain values for our changes. 201 */ 202 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 203 obj->read_domains |= I915_GEM_DOMAIN_GTT; 204 if (write) { 205 struct i915_vma *vma; 206 207 obj->read_domains = I915_GEM_DOMAIN_GTT; 208 obj->write_domain = I915_GEM_DOMAIN_GTT; 209 obj->mm.dirty = true; 210 211 spin_lock(&obj->vma.lock); 212 for_each_ggtt_vma(vma, obj) 213 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 214 i915_vma_set_ggtt_write(vma); 215 spin_unlock(&obj->vma.lock); 216 } 217 218 i915_gem_object_unpin_pages(obj); 219 return 0; 220 } 221 222 /** 223 * Changes the cache-level of an object across all VMA. 224 * @obj: object to act on 225 * @cache_level: new cache level to set for the object 226 * 227 * After this function returns, the object will be in the new cache-level 228 * across all GTT and the contents of the backing storage will be coherent, 229 * with respect to the new cache-level. In order to keep the backing storage 230 * coherent for all users, we only allow a single cache level to be set 231 * globally on the object and prevent it from being changed whilst the 232 * hardware is reading from the object. That is if the object is currently 233 * on the scanout it will be set to uncached (or equivalent display 234 * cache coherency) and all non-MOCS GPU access will also be uncached so 235 * that all direct access to the scanout remains coherent. 236 */ 237 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 238 enum i915_cache_level cache_level) 239 { 240 int ret; 241 242 if (obj->cache_level == cache_level) 243 return 0; 244 245 ret = i915_gem_object_wait(obj, 246 I915_WAIT_INTERRUPTIBLE | 247 I915_WAIT_ALL, 248 MAX_SCHEDULE_TIMEOUT); 249 if (ret) 250 return ret; 251 252 /* Always invalidate stale cachelines */ 253 if (obj->cache_level != cache_level) { 254 i915_gem_object_set_cache_coherency(obj, cache_level); 255 obj->cache_dirty = true; 256 } 257 258 /* The cache-level will be applied when each vma is rebound. */ 259 return i915_gem_object_unbind(obj, 260 I915_GEM_OBJECT_UNBIND_ACTIVE | 261 I915_GEM_OBJECT_UNBIND_BARRIER); 262 } 263 264 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 265 struct drm_file *file) 266 { 267 struct drm_i915_gem_caching *args = data; 268 struct drm_i915_gem_object *obj; 269 int err = 0; 270 271 if (IS_DGFX(to_i915(dev))) 272 return -ENODEV; 273 274 rcu_read_lock(); 275 obj = i915_gem_object_lookup_rcu(file, args->handle); 276 if (!obj) { 277 err = -ENOENT; 278 goto out; 279 } 280 281 switch (obj->cache_level) { 282 case I915_CACHE_LLC: 283 case I915_CACHE_L3_LLC: 284 args->caching = I915_CACHING_CACHED; 285 break; 286 287 case I915_CACHE_WT: 288 args->caching = I915_CACHING_DISPLAY; 289 break; 290 291 default: 292 args->caching = I915_CACHING_NONE; 293 break; 294 } 295 out: 296 rcu_read_unlock(); 297 return err; 298 } 299 300 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 301 struct drm_file *file) 302 { 303 struct drm_i915_private *i915 = to_i915(dev); 304 struct drm_i915_gem_caching *args = data; 305 struct drm_i915_gem_object *obj; 306 enum i915_cache_level level; 307 int ret = 0; 308 309 if (IS_DGFX(i915)) 310 return -ENODEV; 311 312 switch (args->caching) { 313 case I915_CACHING_NONE: 314 level = I915_CACHE_NONE; 315 break; 316 case I915_CACHING_CACHED: 317 /* 318 * Due to a HW issue on BXT A stepping, GPU stores via a 319 * snooped mapping may leave stale data in a corresponding CPU 320 * cacheline, whereas normally such cachelines would get 321 * invalidated. 322 */ 323 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 324 return -ENODEV; 325 326 level = I915_CACHE_LLC; 327 break; 328 case I915_CACHING_DISPLAY: 329 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 330 break; 331 default: 332 return -EINVAL; 333 } 334 335 obj = i915_gem_object_lookup(file, args->handle); 336 if (!obj) 337 return -ENOENT; 338 339 /* 340 * The caching mode of proxy object is handled by its generator, and 341 * not allowed to be changed by userspace. 342 */ 343 if (i915_gem_object_is_proxy(obj)) { 344 /* 345 * Silently allow cached for userptr; the vulkan driver 346 * sets all objects to cached 347 */ 348 if (!i915_gem_object_is_userptr(obj) || 349 args->caching != I915_CACHING_CACHED) 350 ret = -ENXIO; 351 352 goto out; 353 } 354 355 ret = i915_gem_object_lock_interruptible(obj, NULL); 356 if (ret) 357 goto out; 358 359 ret = i915_gem_object_set_cache_level(obj, level); 360 i915_gem_object_unlock(obj); 361 362 out: 363 i915_gem_object_put(obj); 364 return ret; 365 } 366 367 /* 368 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 369 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 370 * (for pageflips). We only flush the caches while preparing the buffer for 371 * display, the callers are responsible for frontbuffer flush. 372 */ 373 struct i915_vma * 374 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 375 struct i915_gem_ww_ctx *ww, 376 u32 alignment, 377 const struct i915_ggtt_view *view, 378 unsigned int flags) 379 { 380 struct drm_i915_private *i915 = to_i915(obj->base.dev); 381 struct i915_vma *vma; 382 int ret; 383 384 /* Frame buffer must be in LMEM */ 385 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 386 return ERR_PTR(-EINVAL); 387 388 /* 389 * The display engine is not coherent with the LLC cache on gen6. As 390 * a result, we make sure that the pinning that is about to occur is 391 * done with uncached PTEs. This is lowest common denominator for all 392 * chipsets. 393 * 394 * However for gen6+, we could do better by using the GFDT bit instead 395 * of uncaching, which would allow us to flush all the LLC-cached data 396 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 397 */ 398 ret = i915_gem_object_set_cache_level(obj, 399 HAS_WT(i915) ? 400 I915_CACHE_WT : I915_CACHE_NONE); 401 if (ret) 402 return ERR_PTR(ret); 403 404 /* 405 * As the user may map the buffer once pinned in the display plane 406 * (e.g. libkms for the bootup splash), we have to ensure that we 407 * always use map_and_fenceable for all scanout buffers. However, 408 * it may simply be too big to fit into mappable, in which case 409 * put it anyway and hope that userspace can cope (but always first 410 * try to preserve the existing ABI). 411 */ 412 vma = ERR_PTR(-ENOSPC); 413 if ((flags & PIN_MAPPABLE) == 0 && 414 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 415 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 416 flags | PIN_MAPPABLE | 417 PIN_NONBLOCK); 418 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 419 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 420 alignment, flags); 421 if (IS_ERR(vma)) 422 return vma; 423 424 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 425 i915_vma_mark_scanout(vma); 426 427 i915_gem_object_flush_if_display_locked(obj); 428 429 return vma; 430 } 431 432 /** 433 * Moves a single object to the CPU read, and possibly write domain. 434 * @obj: object to act on 435 * @write: requesting write or read-only access 436 * 437 * This function returns when the move is complete, including waiting on 438 * flushes to occur. 439 */ 440 int 441 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 442 { 443 int ret; 444 445 assert_object_held(obj); 446 447 ret = i915_gem_object_wait(obj, 448 I915_WAIT_INTERRUPTIBLE | 449 (write ? I915_WAIT_ALL : 0), 450 MAX_SCHEDULE_TIMEOUT); 451 if (ret) 452 return ret; 453 454 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 455 456 /* Flush the CPU cache if it's still invalid. */ 457 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 458 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 459 obj->read_domains |= I915_GEM_DOMAIN_CPU; 460 } 461 462 /* It should now be out of any other write domains, and we can update 463 * the domain values for our changes. 464 */ 465 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 466 467 /* If we're writing through the CPU, then the GPU read domains will 468 * need to be invalidated at next use. 469 */ 470 if (write) 471 __start_cpu_write(obj); 472 473 return 0; 474 } 475 476 /** 477 * Called when user space prepares to use an object with the CPU, either 478 * through the mmap ioctl's mapping or a GTT mapping. 479 * @dev: drm device 480 * @data: ioctl data blob 481 * @file: drm file 482 */ 483 int 484 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 485 struct drm_file *file) 486 { 487 struct drm_i915_gem_set_domain *args = data; 488 struct drm_i915_gem_object *obj; 489 u32 read_domains = args->read_domains; 490 u32 write_domain = args->write_domain; 491 int err; 492 493 if (IS_DGFX(to_i915(dev))) 494 return -ENODEV; 495 496 /* Only handle setting domains to types used by the CPU. */ 497 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 498 return -EINVAL; 499 500 /* 501 * Having something in the write domain implies it's in the read 502 * domain, and only that read domain. Enforce that in the request. 503 */ 504 if (write_domain && read_domains != write_domain) 505 return -EINVAL; 506 507 if (!read_domains) 508 return 0; 509 510 obj = i915_gem_object_lookup(file, args->handle); 511 if (!obj) 512 return -ENOENT; 513 514 /* 515 * Try to flush the object off the GPU without holding the lock. 516 * We will repeat the flush holding the lock in the normal manner 517 * to catch cases where we are gazumped. 518 */ 519 err = i915_gem_object_wait(obj, 520 I915_WAIT_INTERRUPTIBLE | 521 I915_WAIT_PRIORITY | 522 (write_domain ? I915_WAIT_ALL : 0), 523 MAX_SCHEDULE_TIMEOUT); 524 if (err) 525 goto out; 526 527 if (i915_gem_object_is_userptr(obj)) { 528 /* 529 * Try to grab userptr pages, iris uses set_domain to check 530 * userptr validity 531 */ 532 err = i915_gem_object_userptr_validate(obj); 533 if (!err) 534 err = i915_gem_object_wait(obj, 535 I915_WAIT_INTERRUPTIBLE | 536 I915_WAIT_PRIORITY | 537 (write_domain ? I915_WAIT_ALL : 0), 538 MAX_SCHEDULE_TIMEOUT); 539 goto out; 540 } 541 542 /* 543 * Proxy objects do not control access to the backing storage, ergo 544 * they cannot be used as a means to manipulate the cache domain 545 * tracking for that backing storage. The proxy object is always 546 * considered to be outside of any cache domain. 547 */ 548 if (i915_gem_object_is_proxy(obj)) { 549 err = -ENXIO; 550 goto out; 551 } 552 553 err = i915_gem_object_lock_interruptible(obj, NULL); 554 if (err) 555 goto out; 556 557 /* 558 * Flush and acquire obj->pages so that we are coherent through 559 * direct access in memory with previous cached writes through 560 * shmemfs and that our cache domain tracking remains valid. 561 * For example, if the obj->filp was moved to swap without us 562 * being notified and releasing the pages, we would mistakenly 563 * continue to assume that the obj remained out of the CPU cached 564 * domain. 565 */ 566 err = i915_gem_object_pin_pages(obj); 567 if (err) 568 goto out_unlock; 569 570 /* 571 * Already in the desired write domain? Nothing for us to do! 572 * 573 * We apply a little bit of cunning here to catch a broader set of 574 * no-ops. If obj->write_domain is set, we must be in the same 575 * obj->read_domains, and only that domain. Therefore, if that 576 * obj->write_domain matches the request read_domains, we are 577 * already in the same read/write domain and can skip the operation, 578 * without having to further check the requested write_domain. 579 */ 580 if (READ_ONCE(obj->write_domain) == read_domains) 581 goto out_unpin; 582 583 if (read_domains & I915_GEM_DOMAIN_WC) 584 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 585 else if (read_domains & I915_GEM_DOMAIN_GTT) 586 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 587 else 588 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 589 590 out_unpin: 591 i915_gem_object_unpin_pages(obj); 592 593 out_unlock: 594 i915_gem_object_unlock(obj); 595 596 if (!err && write_domain) 597 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 598 599 out: 600 i915_gem_object_put(obj); 601 return err; 602 } 603 604 /* 605 * Pins the specified object's pages and synchronizes the object with 606 * GPU accesses. Sets needs_clflush to non-zero if the caller should 607 * flush the object from the CPU cache. 608 */ 609 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 610 unsigned int *needs_clflush) 611 { 612 int ret; 613 614 *needs_clflush = 0; 615 if (!i915_gem_object_has_struct_page(obj)) 616 return -ENODEV; 617 618 assert_object_held(obj); 619 620 ret = i915_gem_object_wait(obj, 621 I915_WAIT_INTERRUPTIBLE, 622 MAX_SCHEDULE_TIMEOUT); 623 if (ret) 624 return ret; 625 626 ret = i915_gem_object_pin_pages(obj); 627 if (ret) 628 return ret; 629 630 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 631 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 632 ret = i915_gem_object_set_to_cpu_domain(obj, false); 633 if (ret) 634 goto err_unpin; 635 else 636 goto out; 637 } 638 639 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 640 641 /* If we're not in the cpu read domain, set ourself into the gtt 642 * read domain and manually flush cachelines (if required). This 643 * optimizes for the case when the gpu will dirty the data 644 * anyway again before the next pread happens. 645 */ 646 if (!obj->cache_dirty && 647 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 648 *needs_clflush = CLFLUSH_BEFORE; 649 650 out: 651 /* return with the pages pinned */ 652 return 0; 653 654 err_unpin: 655 i915_gem_object_unpin_pages(obj); 656 return ret; 657 } 658 659 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 660 unsigned int *needs_clflush) 661 { 662 int ret; 663 664 *needs_clflush = 0; 665 if (!i915_gem_object_has_struct_page(obj)) 666 return -ENODEV; 667 668 assert_object_held(obj); 669 670 ret = i915_gem_object_wait(obj, 671 I915_WAIT_INTERRUPTIBLE | 672 I915_WAIT_ALL, 673 MAX_SCHEDULE_TIMEOUT); 674 if (ret) 675 return ret; 676 677 ret = i915_gem_object_pin_pages(obj); 678 if (ret) 679 return ret; 680 681 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 682 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 683 ret = i915_gem_object_set_to_cpu_domain(obj, true); 684 if (ret) 685 goto err_unpin; 686 else 687 goto out; 688 } 689 690 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 691 692 /* If we're not in the cpu write domain, set ourself into the 693 * gtt write domain and manually flush cachelines (as required). 694 * This optimizes for the case when the gpu will use the data 695 * right away and we therefore have to clflush anyway. 696 */ 697 if (!obj->cache_dirty) { 698 *needs_clflush |= CLFLUSH_AFTER; 699 700 /* 701 * Same trick applies to invalidate partially written 702 * cachelines read before writing. 703 */ 704 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 705 *needs_clflush |= CLFLUSH_BEFORE; 706 } 707 708 out: 709 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 710 obj->mm.dirty = true; 711 /* return with the pages pinned */ 712 return 0; 713 714 err_unpin: 715 i915_gem_object_unpin_pages(obj); 716 return ret; 717 } 718