1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 9 #include "i915_drv.h" 10 #include "i915_gem_clflush.h" 11 #include "i915_gem_gtt.h" 12 #include "i915_gem_ioctls.h" 13 #include "i915_gem_object.h" 14 #include "i915_vma.h" 15 16 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 17 { 18 /* 19 * We manually flush the CPU domain so that we can override and 20 * force the flush for the display, and perform it asyncrhonously. 21 */ 22 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 23 if (obj->cache_dirty) 24 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 25 obj->write_domain = 0; 26 } 27 28 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 29 { 30 if (!READ_ONCE(obj->pin_global)) 31 return; 32 33 i915_gem_object_lock(obj); 34 __i915_gem_object_flush_for_display(obj); 35 i915_gem_object_unlock(obj); 36 } 37 38 /** 39 * Moves a single object to the WC read, and possibly write domain. 40 * @obj: object to act on 41 * @write: ask for write access or read only 42 * 43 * This function returns when the move is complete, including waiting on 44 * flushes to occur. 45 */ 46 int 47 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 48 { 49 int ret; 50 51 assert_object_held(obj); 52 53 ret = i915_gem_object_wait(obj, 54 I915_WAIT_INTERRUPTIBLE | 55 (write ? I915_WAIT_ALL : 0), 56 MAX_SCHEDULE_TIMEOUT); 57 if (ret) 58 return ret; 59 60 if (obj->write_domain == I915_GEM_DOMAIN_WC) 61 return 0; 62 63 /* Flush and acquire obj->pages so that we are coherent through 64 * direct access in memory with previous cached writes through 65 * shmemfs and that our cache domain tracking remains valid. 66 * For example, if the obj->filp was moved to swap without us 67 * being notified and releasing the pages, we would mistakenly 68 * continue to assume that the obj remained out of the CPU cached 69 * domain. 70 */ 71 ret = i915_gem_object_pin_pages(obj); 72 if (ret) 73 return ret; 74 75 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 76 77 /* Serialise direct access to this object with the barriers for 78 * coherent writes from the GPU, by effectively invalidating the 79 * WC domain upon first access. 80 */ 81 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 82 mb(); 83 84 /* It should now be out of any other write domains, and we can update 85 * the domain values for our changes. 86 */ 87 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 88 obj->read_domains |= I915_GEM_DOMAIN_WC; 89 if (write) { 90 obj->read_domains = I915_GEM_DOMAIN_WC; 91 obj->write_domain = I915_GEM_DOMAIN_WC; 92 obj->mm.dirty = true; 93 } 94 95 i915_gem_object_unpin_pages(obj); 96 return 0; 97 } 98 99 /** 100 * Moves a single object to the GTT read, and possibly write domain. 101 * @obj: object to act on 102 * @write: ask for write access or read only 103 * 104 * This function returns when the move is complete, including waiting on 105 * flushes to occur. 106 */ 107 int 108 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 109 { 110 int ret; 111 112 assert_object_held(obj); 113 114 ret = i915_gem_object_wait(obj, 115 I915_WAIT_INTERRUPTIBLE | 116 (write ? I915_WAIT_ALL : 0), 117 MAX_SCHEDULE_TIMEOUT); 118 if (ret) 119 return ret; 120 121 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 122 return 0; 123 124 /* Flush and acquire obj->pages so that we are coherent through 125 * direct access in memory with previous cached writes through 126 * shmemfs and that our cache domain tracking remains valid. 127 * For example, if the obj->filp was moved to swap without us 128 * being notified and releasing the pages, we would mistakenly 129 * continue to assume that the obj remained out of the CPU cached 130 * domain. 131 */ 132 ret = i915_gem_object_pin_pages(obj); 133 if (ret) 134 return ret; 135 136 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 137 138 /* Serialise direct access to this object with the barriers for 139 * coherent writes from the GPU, by effectively invalidating the 140 * GTT domain upon first access. 141 */ 142 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 143 mb(); 144 145 /* It should now be out of any other write domains, and we can update 146 * the domain values for our changes. 147 */ 148 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 149 obj->read_domains |= I915_GEM_DOMAIN_GTT; 150 if (write) { 151 obj->read_domains = I915_GEM_DOMAIN_GTT; 152 obj->write_domain = I915_GEM_DOMAIN_GTT; 153 obj->mm.dirty = true; 154 } 155 156 i915_gem_object_unpin_pages(obj); 157 return 0; 158 } 159 160 /** 161 * Changes the cache-level of an object across all VMA. 162 * @obj: object to act on 163 * @cache_level: new cache level to set for the object 164 * 165 * After this function returns, the object will be in the new cache-level 166 * across all GTT and the contents of the backing storage will be coherent, 167 * with respect to the new cache-level. In order to keep the backing storage 168 * coherent for all users, we only allow a single cache level to be set 169 * globally on the object and prevent it from being changed whilst the 170 * hardware is reading from the object. That is if the object is currently 171 * on the scanout it will be set to uncached (or equivalent display 172 * cache coherency) and all non-MOCS GPU access will also be uncached so 173 * that all direct access to the scanout remains coherent. 174 */ 175 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 176 enum i915_cache_level cache_level) 177 { 178 struct i915_vma *vma; 179 int ret; 180 181 assert_object_held(obj); 182 183 if (obj->cache_level == cache_level) 184 return 0; 185 186 /* Inspect the list of currently bound VMA and unbind any that would 187 * be invalid given the new cache-level. This is principally to 188 * catch the issue of the CS prefetch crossing page boundaries and 189 * reading an invalid PTE on older architectures. 190 */ 191 restart: 192 list_for_each_entry(vma, &obj->vma.list, obj_link) { 193 if (!drm_mm_node_allocated(&vma->node)) 194 continue; 195 196 if (i915_vma_is_pinned(vma)) { 197 DRM_DEBUG("can not change the cache level of pinned objects\n"); 198 return -EBUSY; 199 } 200 201 if (!i915_vma_is_closed(vma) && 202 i915_gem_valid_gtt_space(vma, cache_level)) 203 continue; 204 205 ret = i915_vma_unbind(vma); 206 if (ret) 207 return ret; 208 209 /* As unbinding may affect other elements in the 210 * obj->vma_list (due to side-effects from retiring 211 * an active vma), play safe and restart the iterator. 212 */ 213 goto restart; 214 } 215 216 /* We can reuse the existing drm_mm nodes but need to change the 217 * cache-level on the PTE. We could simply unbind them all and 218 * rebind with the correct cache-level on next use. However since 219 * we already have a valid slot, dma mapping, pages etc, we may as 220 * rewrite the PTE in the belief that doing so tramples upon less 221 * state and so involves less work. 222 */ 223 if (atomic_read(&obj->bind_count)) { 224 struct drm_i915_private *i915 = to_i915(obj->base.dev); 225 226 /* Before we change the PTE, the GPU must not be accessing it. 227 * If we wait upon the object, we know that all the bound 228 * VMA are no longer active. 229 */ 230 ret = i915_gem_object_wait(obj, 231 I915_WAIT_INTERRUPTIBLE | 232 I915_WAIT_ALL, 233 MAX_SCHEDULE_TIMEOUT); 234 if (ret) 235 return ret; 236 237 if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { 238 intel_wakeref_t wakeref = 239 intel_runtime_pm_get(&i915->runtime_pm); 240 241 /* 242 * Access to snoopable pages through the GTT is 243 * incoherent and on some machines causes a hard 244 * lockup. Relinquish the CPU mmaping to force 245 * userspace to refault in the pages and we can 246 * then double check if the GTT mapping is still 247 * valid for that pointer access. 248 */ 249 ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); 250 if (ret) { 251 intel_runtime_pm_put(&i915->runtime_pm, 252 wakeref); 253 return ret; 254 } 255 256 if (obj->userfault_count) 257 __i915_gem_object_release_mmap(obj); 258 259 /* 260 * As we no longer need a fence for GTT access, 261 * we can relinquish it now (and so prevent having 262 * to steal a fence from someone else on the next 263 * fence request). Note GPU activity would have 264 * dropped the fence as all snoopable access is 265 * supposed to be linear. 266 */ 267 for_each_ggtt_vma(vma, obj) { 268 ret = i915_vma_revoke_fence(vma); 269 if (ret) 270 break; 271 } 272 mutex_unlock(&i915->ggtt.vm.mutex); 273 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 274 if (ret) 275 return ret; 276 } else { 277 /* 278 * We either have incoherent backing store and 279 * so no GTT access or the architecture is fully 280 * coherent. In such cases, existing GTT mmaps 281 * ignore the cache bit in the PTE and we can 282 * rewrite it without confusing the GPU or having 283 * to force userspace to fault back in its mmaps. 284 */ 285 } 286 287 list_for_each_entry(vma, &obj->vma.list, obj_link) { 288 if (!drm_mm_node_allocated(&vma->node)) 289 continue; 290 291 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 292 if (ret) 293 return ret; 294 } 295 } 296 297 list_for_each_entry(vma, &obj->vma.list, obj_link) 298 vma->node.color = cache_level; 299 i915_gem_object_set_cache_coherency(obj, cache_level); 300 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 301 302 return 0; 303 } 304 305 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 306 struct drm_file *file) 307 { 308 struct drm_i915_gem_caching *args = data; 309 struct drm_i915_gem_object *obj; 310 int err = 0; 311 312 rcu_read_lock(); 313 obj = i915_gem_object_lookup_rcu(file, args->handle); 314 if (!obj) { 315 err = -ENOENT; 316 goto out; 317 } 318 319 switch (obj->cache_level) { 320 case I915_CACHE_LLC: 321 case I915_CACHE_L3_LLC: 322 args->caching = I915_CACHING_CACHED; 323 break; 324 325 case I915_CACHE_WT: 326 args->caching = I915_CACHING_DISPLAY; 327 break; 328 329 default: 330 args->caching = I915_CACHING_NONE; 331 break; 332 } 333 out: 334 rcu_read_unlock(); 335 return err; 336 } 337 338 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 339 struct drm_file *file) 340 { 341 struct drm_i915_private *i915 = to_i915(dev); 342 struct drm_i915_gem_caching *args = data; 343 struct drm_i915_gem_object *obj; 344 enum i915_cache_level level; 345 int ret = 0; 346 347 switch (args->caching) { 348 case I915_CACHING_NONE: 349 level = I915_CACHE_NONE; 350 break; 351 case I915_CACHING_CACHED: 352 /* 353 * Due to a HW issue on BXT A stepping, GPU stores via a 354 * snooped mapping may leave stale data in a corresponding CPU 355 * cacheline, whereas normally such cachelines would get 356 * invalidated. 357 */ 358 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 359 return -ENODEV; 360 361 level = I915_CACHE_LLC; 362 break; 363 case I915_CACHING_DISPLAY: 364 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 365 break; 366 default: 367 return -EINVAL; 368 } 369 370 obj = i915_gem_object_lookup(file, args->handle); 371 if (!obj) 372 return -ENOENT; 373 374 /* 375 * The caching mode of proxy object is handled by its generator, and 376 * not allowed to be changed by userspace. 377 */ 378 if (i915_gem_object_is_proxy(obj)) { 379 ret = -ENXIO; 380 goto out; 381 } 382 383 if (obj->cache_level == level) 384 goto out; 385 386 ret = i915_gem_object_wait(obj, 387 I915_WAIT_INTERRUPTIBLE, 388 MAX_SCHEDULE_TIMEOUT); 389 if (ret) 390 goto out; 391 392 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 393 if (ret) 394 goto out; 395 396 ret = i915_gem_object_lock_interruptible(obj); 397 if (ret == 0) { 398 ret = i915_gem_object_set_cache_level(obj, level); 399 i915_gem_object_unlock(obj); 400 } 401 mutex_unlock(&i915->drm.struct_mutex); 402 403 out: 404 i915_gem_object_put(obj); 405 return ret; 406 } 407 408 /* 409 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 410 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 411 * (for pageflips). We only flush the caches while preparing the buffer for 412 * display, the callers are responsible for frontbuffer flush. 413 */ 414 struct i915_vma * 415 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 416 u32 alignment, 417 const struct i915_ggtt_view *view, 418 unsigned int flags) 419 { 420 struct i915_vma *vma; 421 int ret; 422 423 assert_object_held(obj); 424 425 /* Mark the global pin early so that we account for the 426 * display coherency whilst setting up the cache domains. 427 */ 428 obj->pin_global++; 429 430 /* The display engine is not coherent with the LLC cache on gen6. As 431 * a result, we make sure that the pinning that is about to occur is 432 * done with uncached PTEs. This is lowest common denominator for all 433 * chipsets. 434 * 435 * However for gen6+, we could do better by using the GFDT bit instead 436 * of uncaching, which would allow us to flush all the LLC-cached data 437 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 438 */ 439 ret = i915_gem_object_set_cache_level(obj, 440 HAS_WT(to_i915(obj->base.dev)) ? 441 I915_CACHE_WT : I915_CACHE_NONE); 442 if (ret) { 443 vma = ERR_PTR(ret); 444 goto err_unpin_global; 445 } 446 447 /* As the user may map the buffer once pinned in the display plane 448 * (e.g. libkms for the bootup splash), we have to ensure that we 449 * always use map_and_fenceable for all scanout buffers. However, 450 * it may simply be too big to fit into mappable, in which case 451 * put it anyway and hope that userspace can cope (but always first 452 * try to preserve the existing ABI). 453 */ 454 vma = ERR_PTR(-ENOSPC); 455 if ((flags & PIN_MAPPABLE) == 0 && 456 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 457 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 458 flags | 459 PIN_MAPPABLE | 460 PIN_NONBLOCK); 461 if (IS_ERR(vma)) 462 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 463 if (IS_ERR(vma)) 464 goto err_unpin_global; 465 466 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 467 468 __i915_gem_object_flush_for_display(obj); 469 470 /* It should now be out of any other write domains, and we can update 471 * the domain values for our changes. 472 */ 473 obj->read_domains |= I915_GEM_DOMAIN_GTT; 474 475 return vma; 476 477 err_unpin_global: 478 obj->pin_global--; 479 return vma; 480 } 481 482 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 483 { 484 struct drm_i915_private *i915 = to_i915(obj->base.dev); 485 struct i915_vma *vma; 486 487 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 488 489 mutex_lock(&i915->ggtt.vm.mutex); 490 for_each_ggtt_vma(vma, obj) { 491 if (!drm_mm_node_allocated(&vma->node)) 492 continue; 493 494 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 495 } 496 mutex_unlock(&i915->ggtt.vm.mutex); 497 498 if (i915_gem_object_is_shrinkable(obj)) { 499 unsigned long flags; 500 501 spin_lock_irqsave(&i915->mm.obj_lock, flags); 502 503 if (obj->mm.madv == I915_MADV_WILLNEED) 504 list_move_tail(&obj->mm.link, &i915->mm.shrink_list); 505 506 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 507 } 508 } 509 510 void 511 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 512 { 513 struct drm_i915_gem_object *obj = vma->obj; 514 515 assert_object_held(obj); 516 517 if (WARN_ON(obj->pin_global == 0)) 518 return; 519 520 if (--obj->pin_global == 0) 521 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 522 523 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 524 i915_gem_object_bump_inactive_ggtt(obj); 525 526 i915_vma_unpin(vma); 527 } 528 529 /** 530 * Moves a single object to the CPU read, and possibly write domain. 531 * @obj: object to act on 532 * @write: requesting write or read-only access 533 * 534 * This function returns when the move is complete, including waiting on 535 * flushes to occur. 536 */ 537 int 538 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 539 { 540 int ret; 541 542 assert_object_held(obj); 543 544 ret = i915_gem_object_wait(obj, 545 I915_WAIT_INTERRUPTIBLE | 546 (write ? I915_WAIT_ALL : 0), 547 MAX_SCHEDULE_TIMEOUT); 548 if (ret) 549 return ret; 550 551 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 552 553 /* Flush the CPU cache if it's still invalid. */ 554 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 555 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 556 obj->read_domains |= I915_GEM_DOMAIN_CPU; 557 } 558 559 /* It should now be out of any other write domains, and we can update 560 * the domain values for our changes. 561 */ 562 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 563 564 /* If we're writing through the CPU, then the GPU read domains will 565 * need to be invalidated at next use. 566 */ 567 if (write) 568 __start_cpu_write(obj); 569 570 return 0; 571 } 572 573 /** 574 * Called when user space prepares to use an object with the CPU, either 575 * through the mmap ioctl's mapping or a GTT mapping. 576 * @dev: drm device 577 * @data: ioctl data blob 578 * @file: drm file 579 */ 580 int 581 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 582 struct drm_file *file) 583 { 584 struct drm_i915_gem_set_domain *args = data; 585 struct drm_i915_gem_object *obj; 586 u32 read_domains = args->read_domains; 587 u32 write_domain = args->write_domain; 588 int err; 589 590 /* Only handle setting domains to types used by the CPU. */ 591 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 592 return -EINVAL; 593 594 /* 595 * Having something in the write domain implies it's in the read 596 * domain, and only that read domain. Enforce that in the request. 597 */ 598 if (write_domain && read_domains != write_domain) 599 return -EINVAL; 600 601 if (!read_domains) 602 return 0; 603 604 obj = i915_gem_object_lookup(file, args->handle); 605 if (!obj) 606 return -ENOENT; 607 608 /* 609 * Already in the desired write domain? Nothing for us to do! 610 * 611 * We apply a little bit of cunning here to catch a broader set of 612 * no-ops. If obj->write_domain is set, we must be in the same 613 * obj->read_domains, and only that domain. Therefore, if that 614 * obj->write_domain matches the request read_domains, we are 615 * already in the same read/write domain and can skip the operation, 616 * without having to further check the requested write_domain. 617 */ 618 if (READ_ONCE(obj->write_domain) == read_domains) { 619 err = 0; 620 goto out; 621 } 622 623 /* 624 * Try to flush the object off the GPU without holding the lock. 625 * We will repeat the flush holding the lock in the normal manner 626 * to catch cases where we are gazumped. 627 */ 628 err = i915_gem_object_wait(obj, 629 I915_WAIT_INTERRUPTIBLE | 630 I915_WAIT_PRIORITY | 631 (write_domain ? I915_WAIT_ALL : 0), 632 MAX_SCHEDULE_TIMEOUT); 633 if (err) 634 goto out; 635 636 /* 637 * Proxy objects do not control access to the backing storage, ergo 638 * they cannot be used as a means to manipulate the cache domain 639 * tracking for that backing storage. The proxy object is always 640 * considered to be outside of any cache domain. 641 */ 642 if (i915_gem_object_is_proxy(obj)) { 643 err = -ENXIO; 644 goto out; 645 } 646 647 /* 648 * Flush and acquire obj->pages so that we are coherent through 649 * direct access in memory with previous cached writes through 650 * shmemfs and that our cache domain tracking remains valid. 651 * For example, if the obj->filp was moved to swap without us 652 * being notified and releasing the pages, we would mistakenly 653 * continue to assume that the obj remained out of the CPU cached 654 * domain. 655 */ 656 err = i915_gem_object_pin_pages(obj); 657 if (err) 658 goto out; 659 660 err = i915_gem_object_lock_interruptible(obj); 661 if (err) 662 goto out_unpin; 663 664 if (read_domains & I915_GEM_DOMAIN_WC) 665 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 666 else if (read_domains & I915_GEM_DOMAIN_GTT) 667 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 668 else 669 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 670 671 /* And bump the LRU for this access */ 672 i915_gem_object_bump_inactive_ggtt(obj); 673 674 i915_gem_object_unlock(obj); 675 676 if (write_domain) 677 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 678 679 out_unpin: 680 i915_gem_object_unpin_pages(obj); 681 out: 682 i915_gem_object_put(obj); 683 return err; 684 } 685 686 /* 687 * Pins the specified object's pages and synchronizes the object with 688 * GPU accesses. Sets needs_clflush to non-zero if the caller should 689 * flush the object from the CPU cache. 690 */ 691 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 692 unsigned int *needs_clflush) 693 { 694 int ret; 695 696 *needs_clflush = 0; 697 if (!i915_gem_object_has_struct_page(obj)) 698 return -ENODEV; 699 700 ret = i915_gem_object_lock_interruptible(obj); 701 if (ret) 702 return ret; 703 704 ret = i915_gem_object_wait(obj, 705 I915_WAIT_INTERRUPTIBLE, 706 MAX_SCHEDULE_TIMEOUT); 707 if (ret) 708 goto err_unlock; 709 710 ret = i915_gem_object_pin_pages(obj); 711 if (ret) 712 goto err_unlock; 713 714 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 715 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 716 ret = i915_gem_object_set_to_cpu_domain(obj, false); 717 if (ret) 718 goto err_unpin; 719 else 720 goto out; 721 } 722 723 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 724 725 /* If we're not in the cpu read domain, set ourself into the gtt 726 * read domain and manually flush cachelines (if required). This 727 * optimizes for the case when the gpu will dirty the data 728 * anyway again before the next pread happens. 729 */ 730 if (!obj->cache_dirty && 731 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 732 *needs_clflush = CLFLUSH_BEFORE; 733 734 out: 735 /* return with the pages pinned */ 736 return 0; 737 738 err_unpin: 739 i915_gem_object_unpin_pages(obj); 740 err_unlock: 741 i915_gem_object_unlock(obj); 742 return ret; 743 } 744 745 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 746 unsigned int *needs_clflush) 747 { 748 int ret; 749 750 *needs_clflush = 0; 751 if (!i915_gem_object_has_struct_page(obj)) 752 return -ENODEV; 753 754 ret = i915_gem_object_lock_interruptible(obj); 755 if (ret) 756 return ret; 757 758 ret = i915_gem_object_wait(obj, 759 I915_WAIT_INTERRUPTIBLE | 760 I915_WAIT_ALL, 761 MAX_SCHEDULE_TIMEOUT); 762 if (ret) 763 goto err_unlock; 764 765 ret = i915_gem_object_pin_pages(obj); 766 if (ret) 767 goto err_unlock; 768 769 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 770 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 771 ret = i915_gem_object_set_to_cpu_domain(obj, true); 772 if (ret) 773 goto err_unpin; 774 else 775 goto out; 776 } 777 778 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 779 780 /* If we're not in the cpu write domain, set ourself into the 781 * gtt write domain and manually flush cachelines (as required). 782 * This optimizes for the case when the gpu will use the data 783 * right away and we therefore have to clflush anyway. 784 */ 785 if (!obj->cache_dirty) { 786 *needs_clflush |= CLFLUSH_AFTER; 787 788 /* 789 * Same trick applies to invalidate partially written 790 * cachelines read before writing. 791 */ 792 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 793 *needs_clflush |= CLFLUSH_BEFORE; 794 } 795 796 out: 797 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 798 obj->mm.dirty = true; 799 /* return with the pages pinned */ 800 return 0; 801 802 err_unpin: 803 i915_gem_object_unpin_pages(obj); 804 err_unlock: 805 i915_gem_object_unlock(obj); 806 return ret; 807 } 808