1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_gtt.h" 13 #include "i915_gem_ioctls.h" 14 #include "i915_gem_object.h" 15 #include "i915_vma.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 20 { 21 return !(obj->cache_level == I915_CACHE_NONE || 22 obj->cache_level == I915_CACHE_WT); 23 } 24 25 static void 26 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 27 { 28 struct i915_vma *vma; 29 30 assert_object_held(obj); 31 32 if (!(obj->write_domain & flush_domains)) 33 return; 34 35 switch (obj->write_domain) { 36 case I915_GEM_DOMAIN_GTT: 37 spin_lock(&obj->vma.lock); 38 for_each_ggtt_vma(vma, obj) { 39 if (i915_vma_unset_ggtt_write(vma)) 40 intel_gt_flush_ggtt_writes(vma->vm->gt); 41 } 42 spin_unlock(&obj->vma.lock); 43 44 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 45 break; 46 47 case I915_GEM_DOMAIN_WC: 48 wmb(); 49 break; 50 51 case I915_GEM_DOMAIN_CPU: 52 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 53 break; 54 55 case I915_GEM_DOMAIN_RENDER: 56 if (gpu_write_needs_clflush(obj)) 57 obj->cache_dirty = true; 58 break; 59 } 60 61 obj->write_domain = 0; 62 } 63 64 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 65 { 66 /* 67 * We manually flush the CPU domain so that we can override and 68 * force the flush for the display, and perform it asyncrhonously. 69 */ 70 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 71 if (obj->cache_dirty) 72 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 73 obj->write_domain = 0; 74 } 75 76 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 77 { 78 if (!i915_gem_object_is_framebuffer(obj)) 79 return; 80 81 i915_gem_object_lock(obj, NULL); 82 __i915_gem_object_flush_for_display(obj); 83 i915_gem_object_unlock(obj); 84 } 85 86 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 87 { 88 if (i915_gem_object_is_framebuffer(obj)) 89 __i915_gem_object_flush_for_display(obj); 90 } 91 92 /** 93 * Moves a single object to the WC read, and possibly write domain. 94 * @obj: object to act on 95 * @write: ask for write access or read only 96 * 97 * This function returns when the move is complete, including waiting on 98 * flushes to occur. 99 */ 100 int 101 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 102 { 103 int ret; 104 105 assert_object_held(obj); 106 107 ret = i915_gem_object_wait(obj, 108 I915_WAIT_INTERRUPTIBLE | 109 (write ? I915_WAIT_ALL : 0), 110 MAX_SCHEDULE_TIMEOUT); 111 if (ret) 112 return ret; 113 114 if (obj->write_domain == I915_GEM_DOMAIN_WC) 115 return 0; 116 117 /* Flush and acquire obj->pages so that we are coherent through 118 * direct access in memory with previous cached writes through 119 * shmemfs and that our cache domain tracking remains valid. 120 * For example, if the obj->filp was moved to swap without us 121 * being notified and releasing the pages, we would mistakenly 122 * continue to assume that the obj remained out of the CPU cached 123 * domain. 124 */ 125 ret = i915_gem_object_pin_pages(obj); 126 if (ret) 127 return ret; 128 129 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 130 131 /* Serialise direct access to this object with the barriers for 132 * coherent writes from the GPU, by effectively invalidating the 133 * WC domain upon first access. 134 */ 135 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 136 mb(); 137 138 /* It should now be out of any other write domains, and we can update 139 * the domain values for our changes. 140 */ 141 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 142 obj->read_domains |= I915_GEM_DOMAIN_WC; 143 if (write) { 144 obj->read_domains = I915_GEM_DOMAIN_WC; 145 obj->write_domain = I915_GEM_DOMAIN_WC; 146 obj->mm.dirty = true; 147 } 148 149 i915_gem_object_unpin_pages(obj); 150 return 0; 151 } 152 153 /** 154 * Moves a single object to the GTT read, and possibly write domain. 155 * @obj: object to act on 156 * @write: ask for write access or read only 157 * 158 * This function returns when the move is complete, including waiting on 159 * flushes to occur. 160 */ 161 int 162 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 163 { 164 int ret; 165 166 assert_object_held(obj); 167 168 ret = i915_gem_object_wait(obj, 169 I915_WAIT_INTERRUPTIBLE | 170 (write ? I915_WAIT_ALL : 0), 171 MAX_SCHEDULE_TIMEOUT); 172 if (ret) 173 return ret; 174 175 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 176 return 0; 177 178 /* Flush and acquire obj->pages so that we are coherent through 179 * direct access in memory with previous cached writes through 180 * shmemfs and that our cache domain tracking remains valid. 181 * For example, if the obj->filp was moved to swap without us 182 * being notified and releasing the pages, we would mistakenly 183 * continue to assume that the obj remained out of the CPU cached 184 * domain. 185 */ 186 ret = i915_gem_object_pin_pages(obj); 187 if (ret) 188 return ret; 189 190 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 191 192 /* Serialise direct access to this object with the barriers for 193 * coherent writes from the GPU, by effectively invalidating the 194 * GTT domain upon first access. 195 */ 196 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 197 mb(); 198 199 /* It should now be out of any other write domains, and we can update 200 * the domain values for our changes. 201 */ 202 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 203 obj->read_domains |= I915_GEM_DOMAIN_GTT; 204 if (write) { 205 struct i915_vma *vma; 206 207 obj->read_domains = I915_GEM_DOMAIN_GTT; 208 obj->write_domain = I915_GEM_DOMAIN_GTT; 209 obj->mm.dirty = true; 210 211 spin_lock(&obj->vma.lock); 212 for_each_ggtt_vma(vma, obj) 213 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 214 i915_vma_set_ggtt_write(vma); 215 spin_unlock(&obj->vma.lock); 216 } 217 218 i915_gem_object_unpin_pages(obj); 219 return 0; 220 } 221 222 /** 223 * Changes the cache-level of an object across all VMA. 224 * @obj: object to act on 225 * @cache_level: new cache level to set for the object 226 * 227 * After this function returns, the object will be in the new cache-level 228 * across all GTT and the contents of the backing storage will be coherent, 229 * with respect to the new cache-level. In order to keep the backing storage 230 * coherent for all users, we only allow a single cache level to be set 231 * globally on the object and prevent it from being changed whilst the 232 * hardware is reading from the object. That is if the object is currently 233 * on the scanout it will be set to uncached (or equivalent display 234 * cache coherency) and all non-MOCS GPU access will also be uncached so 235 * that all direct access to the scanout remains coherent. 236 */ 237 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 238 enum i915_cache_level cache_level) 239 { 240 int ret; 241 242 if (obj->cache_level == cache_level) 243 return 0; 244 245 ret = i915_gem_object_wait(obj, 246 I915_WAIT_INTERRUPTIBLE | 247 I915_WAIT_ALL, 248 MAX_SCHEDULE_TIMEOUT); 249 if (ret) 250 return ret; 251 252 /* Always invalidate stale cachelines */ 253 if (obj->cache_level != cache_level) { 254 i915_gem_object_set_cache_coherency(obj, cache_level); 255 obj->cache_dirty = true; 256 } 257 258 /* The cache-level will be applied when each vma is rebound. */ 259 return i915_gem_object_unbind(obj, 260 I915_GEM_OBJECT_UNBIND_ACTIVE | 261 I915_GEM_OBJECT_UNBIND_BARRIER); 262 } 263 264 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 265 struct drm_file *file) 266 { 267 struct drm_i915_gem_caching *args = data; 268 struct drm_i915_gem_object *obj; 269 int err = 0; 270 271 rcu_read_lock(); 272 obj = i915_gem_object_lookup_rcu(file, args->handle); 273 if (!obj) { 274 err = -ENOENT; 275 goto out; 276 } 277 278 switch (obj->cache_level) { 279 case I915_CACHE_LLC: 280 case I915_CACHE_L3_LLC: 281 args->caching = I915_CACHING_CACHED; 282 break; 283 284 case I915_CACHE_WT: 285 args->caching = I915_CACHING_DISPLAY; 286 break; 287 288 default: 289 args->caching = I915_CACHING_NONE; 290 break; 291 } 292 out: 293 rcu_read_unlock(); 294 return err; 295 } 296 297 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 298 struct drm_file *file) 299 { 300 struct drm_i915_private *i915 = to_i915(dev); 301 struct drm_i915_gem_caching *args = data; 302 struct drm_i915_gem_object *obj; 303 enum i915_cache_level level; 304 int ret = 0; 305 306 switch (args->caching) { 307 case I915_CACHING_NONE: 308 level = I915_CACHE_NONE; 309 break; 310 case I915_CACHING_CACHED: 311 /* 312 * Due to a HW issue on BXT A stepping, GPU stores via a 313 * snooped mapping may leave stale data in a corresponding CPU 314 * cacheline, whereas normally such cachelines would get 315 * invalidated. 316 */ 317 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 318 return -ENODEV; 319 320 level = I915_CACHE_LLC; 321 break; 322 case I915_CACHING_DISPLAY: 323 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 324 break; 325 default: 326 return -EINVAL; 327 } 328 329 obj = i915_gem_object_lookup(file, args->handle); 330 if (!obj) 331 return -ENOENT; 332 333 /* 334 * The caching mode of proxy object is handled by its generator, and 335 * not allowed to be changed by userspace. 336 */ 337 if (i915_gem_object_is_proxy(obj)) { 338 ret = -ENXIO; 339 goto out; 340 } 341 342 ret = i915_gem_object_lock_interruptible(obj, NULL); 343 if (ret) 344 goto out; 345 346 ret = i915_gem_object_set_cache_level(obj, level); 347 i915_gem_object_unlock(obj); 348 349 out: 350 i915_gem_object_put(obj); 351 return ret; 352 } 353 354 /* 355 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 356 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 357 * (for pageflips). We only flush the caches while preparing the buffer for 358 * display, the callers are responsible for frontbuffer flush. 359 */ 360 struct i915_vma * 361 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 362 u32 alignment, 363 const struct i915_ggtt_view *view, 364 unsigned int flags) 365 { 366 struct drm_i915_private *i915 = to_i915(obj->base.dev); 367 struct i915_gem_ww_ctx ww; 368 struct i915_vma *vma; 369 int ret; 370 371 /* Frame buffer must be in LMEM (no migration yet) */ 372 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 373 return ERR_PTR(-EINVAL); 374 375 i915_gem_ww_ctx_init(&ww, true); 376 retry: 377 ret = i915_gem_object_lock(obj, &ww); 378 if (ret) 379 goto err; 380 /* 381 * The display engine is not coherent with the LLC cache on gen6. As 382 * a result, we make sure that the pinning that is about to occur is 383 * done with uncached PTEs. This is lowest common denominator for all 384 * chipsets. 385 * 386 * However for gen6+, we could do better by using the GFDT bit instead 387 * of uncaching, which would allow us to flush all the LLC-cached data 388 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 389 */ 390 ret = i915_gem_object_set_cache_level(obj, 391 HAS_WT(i915) ? 392 I915_CACHE_WT : I915_CACHE_NONE); 393 if (ret) 394 goto err; 395 396 /* 397 * As the user may map the buffer once pinned in the display plane 398 * (e.g. libkms for the bootup splash), we have to ensure that we 399 * always use map_and_fenceable for all scanout buffers. However, 400 * it may simply be too big to fit into mappable, in which case 401 * put it anyway and hope that userspace can cope (but always first 402 * try to preserve the existing ABI). 403 */ 404 vma = ERR_PTR(-ENOSPC); 405 if ((flags & PIN_MAPPABLE) == 0 && 406 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 407 vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, 408 flags | PIN_MAPPABLE | 409 PIN_NONBLOCK); 410 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 411 vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, 412 alignment, flags); 413 if (IS_ERR(vma)) { 414 ret = PTR_ERR(vma); 415 goto err; 416 } 417 418 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 419 i915_vma_mark_scanout(vma); 420 421 i915_gem_object_flush_if_display_locked(obj); 422 423 err: 424 if (ret == -EDEADLK) { 425 ret = i915_gem_ww_ctx_backoff(&ww); 426 if (!ret) 427 goto retry; 428 } 429 i915_gem_ww_ctx_fini(&ww); 430 431 if (ret) 432 return ERR_PTR(ret); 433 434 return vma; 435 } 436 437 /** 438 * Moves a single object to the CPU read, and possibly write domain. 439 * @obj: object to act on 440 * @write: requesting write or read-only access 441 * 442 * This function returns when the move is complete, including waiting on 443 * flushes to occur. 444 */ 445 int 446 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 447 { 448 int ret; 449 450 assert_object_held(obj); 451 452 ret = i915_gem_object_wait(obj, 453 I915_WAIT_INTERRUPTIBLE | 454 (write ? I915_WAIT_ALL : 0), 455 MAX_SCHEDULE_TIMEOUT); 456 if (ret) 457 return ret; 458 459 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 460 461 /* Flush the CPU cache if it's still invalid. */ 462 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 463 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 464 obj->read_domains |= I915_GEM_DOMAIN_CPU; 465 } 466 467 /* It should now be out of any other write domains, and we can update 468 * the domain values for our changes. 469 */ 470 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 471 472 /* If we're writing through the CPU, then the GPU read domains will 473 * need to be invalidated at next use. 474 */ 475 if (write) 476 __start_cpu_write(obj); 477 478 return 0; 479 } 480 481 /** 482 * Called when user space prepares to use an object with the CPU, either 483 * through the mmap ioctl's mapping or a GTT mapping. 484 * @dev: drm device 485 * @data: ioctl data blob 486 * @file: drm file 487 */ 488 int 489 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 490 struct drm_file *file) 491 { 492 struct drm_i915_gem_set_domain *args = data; 493 struct drm_i915_gem_object *obj; 494 u32 read_domains = args->read_domains; 495 u32 write_domain = args->write_domain; 496 int err; 497 498 /* Only handle setting domains to types used by the CPU. */ 499 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 500 return -EINVAL; 501 502 /* 503 * Having something in the write domain implies it's in the read 504 * domain, and only that read domain. Enforce that in the request. 505 */ 506 if (write_domain && read_domains != write_domain) 507 return -EINVAL; 508 509 if (!read_domains) 510 return 0; 511 512 obj = i915_gem_object_lookup(file, args->handle); 513 if (!obj) 514 return -ENOENT; 515 516 /* 517 * Try to flush the object off the GPU without holding the lock. 518 * We will repeat the flush holding the lock in the normal manner 519 * to catch cases where we are gazumped. 520 */ 521 err = i915_gem_object_wait(obj, 522 I915_WAIT_INTERRUPTIBLE | 523 I915_WAIT_PRIORITY | 524 (write_domain ? I915_WAIT_ALL : 0), 525 MAX_SCHEDULE_TIMEOUT); 526 if (err) 527 goto out; 528 529 /* 530 * Proxy objects do not control access to the backing storage, ergo 531 * they cannot be used as a means to manipulate the cache domain 532 * tracking for that backing storage. The proxy object is always 533 * considered to be outside of any cache domain. 534 */ 535 if (i915_gem_object_is_proxy(obj)) { 536 err = -ENXIO; 537 goto out; 538 } 539 540 /* 541 * Flush and acquire obj->pages so that we are coherent through 542 * direct access in memory with previous cached writes through 543 * shmemfs and that our cache domain tracking remains valid. 544 * For example, if the obj->filp was moved to swap without us 545 * being notified and releasing the pages, we would mistakenly 546 * continue to assume that the obj remained out of the CPU cached 547 * domain. 548 */ 549 err = i915_gem_object_pin_pages(obj); 550 if (err) 551 goto out; 552 553 /* 554 * Already in the desired write domain? Nothing for us to do! 555 * 556 * We apply a little bit of cunning here to catch a broader set of 557 * no-ops. If obj->write_domain is set, we must be in the same 558 * obj->read_domains, and only that domain. Therefore, if that 559 * obj->write_domain matches the request read_domains, we are 560 * already in the same read/write domain and can skip the operation, 561 * without having to further check the requested write_domain. 562 */ 563 if (READ_ONCE(obj->write_domain) == read_domains) 564 goto out_unpin; 565 566 err = i915_gem_object_lock_interruptible(obj, NULL); 567 if (err) 568 goto out_unpin; 569 570 if (read_domains & I915_GEM_DOMAIN_WC) 571 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 572 else if (read_domains & I915_GEM_DOMAIN_GTT) 573 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 574 else 575 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 576 577 i915_gem_object_unlock(obj); 578 579 if (write_domain) 580 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 581 582 out_unpin: 583 i915_gem_object_unpin_pages(obj); 584 out: 585 i915_gem_object_put(obj); 586 return err; 587 } 588 589 /* 590 * Pins the specified object's pages and synchronizes the object with 591 * GPU accesses. Sets needs_clflush to non-zero if the caller should 592 * flush the object from the CPU cache. 593 */ 594 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 595 unsigned int *needs_clflush) 596 { 597 int ret; 598 599 *needs_clflush = 0; 600 if (!i915_gem_object_has_struct_page(obj)) 601 return -ENODEV; 602 603 assert_object_held(obj); 604 605 ret = i915_gem_object_wait(obj, 606 I915_WAIT_INTERRUPTIBLE, 607 MAX_SCHEDULE_TIMEOUT); 608 if (ret) 609 return ret; 610 611 ret = i915_gem_object_pin_pages(obj); 612 if (ret) 613 return ret; 614 615 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 616 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 617 ret = i915_gem_object_set_to_cpu_domain(obj, false); 618 if (ret) 619 goto err_unpin; 620 else 621 goto out; 622 } 623 624 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 625 626 /* If we're not in the cpu read domain, set ourself into the gtt 627 * read domain and manually flush cachelines (if required). This 628 * optimizes for the case when the gpu will dirty the data 629 * anyway again before the next pread happens. 630 */ 631 if (!obj->cache_dirty && 632 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 633 *needs_clflush = CLFLUSH_BEFORE; 634 635 out: 636 /* return with the pages pinned */ 637 return 0; 638 639 err_unpin: 640 i915_gem_object_unpin_pages(obj); 641 return ret; 642 } 643 644 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 645 unsigned int *needs_clflush) 646 { 647 int ret; 648 649 *needs_clflush = 0; 650 if (!i915_gem_object_has_struct_page(obj)) 651 return -ENODEV; 652 653 assert_object_held(obj); 654 655 ret = i915_gem_object_wait(obj, 656 I915_WAIT_INTERRUPTIBLE | 657 I915_WAIT_ALL, 658 MAX_SCHEDULE_TIMEOUT); 659 if (ret) 660 return ret; 661 662 ret = i915_gem_object_pin_pages(obj); 663 if (ret) 664 return ret; 665 666 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 667 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 668 ret = i915_gem_object_set_to_cpu_domain(obj, true); 669 if (ret) 670 goto err_unpin; 671 else 672 goto out; 673 } 674 675 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 676 677 /* If we're not in the cpu write domain, set ourself into the 678 * gtt write domain and manually flush cachelines (as required). 679 * This optimizes for the case when the gpu will use the data 680 * right away and we therefore have to clflush anyway. 681 */ 682 if (!obj->cache_dirty) { 683 *needs_clflush |= CLFLUSH_AFTER; 684 685 /* 686 * Same trick applies to invalidate partially written 687 * cachelines read before writing. 688 */ 689 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 690 *needs_clflush |= CLFLUSH_BEFORE; 691 } 692 693 out: 694 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 695 obj->mm.dirty = true; 696 /* return with the pages pinned */ 697 return 0; 698 699 err_unpin: 700 i915_gem_object_unpin_pages(obj); 701 return ret; 702 } 703