1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 9 #include "i915_drv.h" 10 #include "i915_gem_clflush.h" 11 #include "i915_gem_gtt.h" 12 #include "i915_gem_ioctls.h" 13 #include "i915_gem_object.h" 14 #include "i915_vma.h" 15 16 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 17 { 18 /* 19 * We manually flush the CPU domain so that we can override and 20 * force the flush for the display, and perform it asyncrhonously. 21 */ 22 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 23 if (obj->cache_dirty) 24 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 25 obj->write_domain = 0; 26 } 27 28 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 29 { 30 if (!i915_gem_object_is_framebuffer(obj)) 31 return; 32 33 i915_gem_object_lock(obj); 34 __i915_gem_object_flush_for_display(obj); 35 i915_gem_object_unlock(obj); 36 } 37 38 /** 39 * Moves a single object to the WC read, and possibly write domain. 40 * @obj: object to act on 41 * @write: ask for write access or read only 42 * 43 * This function returns when the move is complete, including waiting on 44 * flushes to occur. 45 */ 46 int 47 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 48 { 49 int ret; 50 51 assert_object_held(obj); 52 53 ret = i915_gem_object_wait(obj, 54 I915_WAIT_INTERRUPTIBLE | 55 (write ? I915_WAIT_ALL : 0), 56 MAX_SCHEDULE_TIMEOUT); 57 if (ret) 58 return ret; 59 60 if (obj->write_domain == I915_GEM_DOMAIN_WC) 61 return 0; 62 63 /* Flush and acquire obj->pages so that we are coherent through 64 * direct access in memory with previous cached writes through 65 * shmemfs and that our cache domain tracking remains valid. 66 * For example, if the obj->filp was moved to swap without us 67 * being notified and releasing the pages, we would mistakenly 68 * continue to assume that the obj remained out of the CPU cached 69 * domain. 70 */ 71 ret = i915_gem_object_pin_pages(obj); 72 if (ret) 73 return ret; 74 75 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 76 77 /* Serialise direct access to this object with the barriers for 78 * coherent writes from the GPU, by effectively invalidating the 79 * WC domain upon first access. 80 */ 81 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 82 mb(); 83 84 /* It should now be out of any other write domains, and we can update 85 * the domain values for our changes. 86 */ 87 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 88 obj->read_domains |= I915_GEM_DOMAIN_WC; 89 if (write) { 90 obj->read_domains = I915_GEM_DOMAIN_WC; 91 obj->write_domain = I915_GEM_DOMAIN_WC; 92 obj->mm.dirty = true; 93 } 94 95 i915_gem_object_unpin_pages(obj); 96 return 0; 97 } 98 99 /** 100 * Moves a single object to the GTT read, and possibly write domain. 101 * @obj: object to act on 102 * @write: ask for write access or read only 103 * 104 * This function returns when the move is complete, including waiting on 105 * flushes to occur. 106 */ 107 int 108 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 109 { 110 int ret; 111 112 assert_object_held(obj); 113 114 ret = i915_gem_object_wait(obj, 115 I915_WAIT_INTERRUPTIBLE | 116 (write ? I915_WAIT_ALL : 0), 117 MAX_SCHEDULE_TIMEOUT); 118 if (ret) 119 return ret; 120 121 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 122 return 0; 123 124 /* Flush and acquire obj->pages so that we are coherent through 125 * direct access in memory with previous cached writes through 126 * shmemfs and that our cache domain tracking remains valid. 127 * For example, if the obj->filp was moved to swap without us 128 * being notified and releasing the pages, we would mistakenly 129 * continue to assume that the obj remained out of the CPU cached 130 * domain. 131 */ 132 ret = i915_gem_object_pin_pages(obj); 133 if (ret) 134 return ret; 135 136 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 137 138 /* Serialise direct access to this object with the barriers for 139 * coherent writes from the GPU, by effectively invalidating the 140 * GTT domain upon first access. 141 */ 142 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 143 mb(); 144 145 /* It should now be out of any other write domains, and we can update 146 * the domain values for our changes. 147 */ 148 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 149 obj->read_domains |= I915_GEM_DOMAIN_GTT; 150 if (write) { 151 obj->read_domains = I915_GEM_DOMAIN_GTT; 152 obj->write_domain = I915_GEM_DOMAIN_GTT; 153 obj->mm.dirty = true; 154 } 155 156 i915_gem_object_unpin_pages(obj); 157 return 0; 158 } 159 160 /** 161 * Changes the cache-level of an object across all VMA. 162 * @obj: object to act on 163 * @cache_level: new cache level to set for the object 164 * 165 * After this function returns, the object will be in the new cache-level 166 * across all GTT and the contents of the backing storage will be coherent, 167 * with respect to the new cache-level. In order to keep the backing storage 168 * coherent for all users, we only allow a single cache level to be set 169 * globally on the object and prevent it from being changed whilst the 170 * hardware is reading from the object. That is if the object is currently 171 * on the scanout it will be set to uncached (or equivalent display 172 * cache coherency) and all non-MOCS GPU access will also be uncached so 173 * that all direct access to the scanout remains coherent. 174 */ 175 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 176 enum i915_cache_level cache_level) 177 { 178 struct i915_vma *vma; 179 int ret; 180 181 assert_object_held(obj); 182 183 if (obj->cache_level == cache_level) 184 return 0; 185 186 /* Inspect the list of currently bound VMA and unbind any that would 187 * be invalid given the new cache-level. This is principally to 188 * catch the issue of the CS prefetch crossing page boundaries and 189 * reading an invalid PTE on older architectures. 190 */ 191 restart: 192 list_for_each_entry(vma, &obj->vma.list, obj_link) { 193 if (!drm_mm_node_allocated(&vma->node)) 194 continue; 195 196 if (i915_vma_is_pinned(vma)) { 197 DRM_DEBUG("can not change the cache level of pinned objects\n"); 198 return -EBUSY; 199 } 200 201 if (!i915_vma_is_closed(vma) && 202 i915_gem_valid_gtt_space(vma, cache_level)) 203 continue; 204 205 ret = i915_vma_unbind(vma); 206 if (ret) 207 return ret; 208 209 /* As unbinding may affect other elements in the 210 * obj->vma_list (due to side-effects from retiring 211 * an active vma), play safe and restart the iterator. 212 */ 213 goto restart; 214 } 215 216 /* We can reuse the existing drm_mm nodes but need to change the 217 * cache-level on the PTE. We could simply unbind them all and 218 * rebind with the correct cache-level on next use. However since 219 * we already have a valid slot, dma mapping, pages etc, we may as 220 * rewrite the PTE in the belief that doing so tramples upon less 221 * state and so involves less work. 222 */ 223 if (atomic_read(&obj->bind_count)) { 224 struct drm_i915_private *i915 = to_i915(obj->base.dev); 225 226 /* Before we change the PTE, the GPU must not be accessing it. 227 * If we wait upon the object, we know that all the bound 228 * VMA are no longer active. 229 */ 230 ret = i915_gem_object_wait(obj, 231 I915_WAIT_INTERRUPTIBLE | 232 I915_WAIT_ALL, 233 MAX_SCHEDULE_TIMEOUT); 234 if (ret) 235 return ret; 236 237 if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { 238 intel_wakeref_t wakeref = 239 intel_runtime_pm_get(&i915->runtime_pm); 240 241 /* 242 * Access to snoopable pages through the GTT is 243 * incoherent and on some machines causes a hard 244 * lockup. Relinquish the CPU mmaping to force 245 * userspace to refault in the pages and we can 246 * then double check if the GTT mapping is still 247 * valid for that pointer access. 248 */ 249 ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); 250 if (ret) { 251 intel_runtime_pm_put(&i915->runtime_pm, 252 wakeref); 253 return ret; 254 } 255 256 if (obj->userfault_count) 257 __i915_gem_object_release_mmap(obj); 258 259 /* 260 * As we no longer need a fence for GTT access, 261 * we can relinquish it now (and so prevent having 262 * to steal a fence from someone else on the next 263 * fence request). Note GPU activity would have 264 * dropped the fence as all snoopable access is 265 * supposed to be linear. 266 */ 267 for_each_ggtt_vma(vma, obj) { 268 ret = i915_vma_revoke_fence(vma); 269 if (ret) 270 break; 271 } 272 mutex_unlock(&i915->ggtt.vm.mutex); 273 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 274 if (ret) 275 return ret; 276 } else { 277 /* 278 * We either have incoherent backing store and 279 * so no GTT access or the architecture is fully 280 * coherent. In such cases, existing GTT mmaps 281 * ignore the cache bit in the PTE and we can 282 * rewrite it without confusing the GPU or having 283 * to force userspace to fault back in its mmaps. 284 */ 285 } 286 287 list_for_each_entry(vma, &obj->vma.list, obj_link) { 288 if (!drm_mm_node_allocated(&vma->node)) 289 continue; 290 291 /* Wait for an earlier async bind, need to rewrite it */ 292 ret = i915_vma_sync(vma); 293 if (ret) 294 return ret; 295 296 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); 297 if (ret) 298 return ret; 299 } 300 } 301 302 list_for_each_entry(vma, &obj->vma.list, obj_link) { 303 if (i915_vm_has_cache_coloring(vma->vm)) 304 vma->node.color = cache_level; 305 } 306 i915_gem_object_set_cache_coherency(obj, cache_level); 307 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 308 309 return 0; 310 } 311 312 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 313 struct drm_file *file) 314 { 315 struct drm_i915_gem_caching *args = data; 316 struct drm_i915_gem_object *obj; 317 int err = 0; 318 319 rcu_read_lock(); 320 obj = i915_gem_object_lookup_rcu(file, args->handle); 321 if (!obj) { 322 err = -ENOENT; 323 goto out; 324 } 325 326 switch (obj->cache_level) { 327 case I915_CACHE_LLC: 328 case I915_CACHE_L3_LLC: 329 args->caching = I915_CACHING_CACHED; 330 break; 331 332 case I915_CACHE_WT: 333 args->caching = I915_CACHING_DISPLAY; 334 break; 335 336 default: 337 args->caching = I915_CACHING_NONE; 338 break; 339 } 340 out: 341 rcu_read_unlock(); 342 return err; 343 } 344 345 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 346 struct drm_file *file) 347 { 348 struct drm_i915_private *i915 = to_i915(dev); 349 struct drm_i915_gem_caching *args = data; 350 struct drm_i915_gem_object *obj; 351 enum i915_cache_level level; 352 int ret = 0; 353 354 switch (args->caching) { 355 case I915_CACHING_NONE: 356 level = I915_CACHE_NONE; 357 break; 358 case I915_CACHING_CACHED: 359 /* 360 * Due to a HW issue on BXT A stepping, GPU stores via a 361 * snooped mapping may leave stale data in a corresponding CPU 362 * cacheline, whereas normally such cachelines would get 363 * invalidated. 364 */ 365 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 366 return -ENODEV; 367 368 level = I915_CACHE_LLC; 369 break; 370 case I915_CACHING_DISPLAY: 371 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 372 break; 373 default: 374 return -EINVAL; 375 } 376 377 obj = i915_gem_object_lookup(file, args->handle); 378 if (!obj) 379 return -ENOENT; 380 381 /* 382 * The caching mode of proxy object is handled by its generator, and 383 * not allowed to be changed by userspace. 384 */ 385 if (i915_gem_object_is_proxy(obj)) { 386 ret = -ENXIO; 387 goto out; 388 } 389 390 if (obj->cache_level == level) 391 goto out; 392 393 ret = i915_gem_object_wait(obj, 394 I915_WAIT_INTERRUPTIBLE, 395 MAX_SCHEDULE_TIMEOUT); 396 if (ret) 397 goto out; 398 399 ret = i915_gem_object_lock_interruptible(obj); 400 if (ret == 0) { 401 ret = i915_gem_object_set_cache_level(obj, level); 402 i915_gem_object_unlock(obj); 403 } 404 405 out: 406 i915_gem_object_put(obj); 407 return ret; 408 } 409 410 /* 411 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 412 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 413 * (for pageflips). We only flush the caches while preparing the buffer for 414 * display, the callers are responsible for frontbuffer flush. 415 */ 416 struct i915_vma * 417 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 418 u32 alignment, 419 const struct i915_ggtt_view *view, 420 unsigned int flags) 421 { 422 struct i915_vma *vma; 423 int ret; 424 425 assert_object_held(obj); 426 427 /* 428 * The display engine is not coherent with the LLC cache on gen6. As 429 * a result, we make sure that the pinning that is about to occur is 430 * done with uncached PTEs. This is lowest common denominator for all 431 * chipsets. 432 * 433 * However for gen6+, we could do better by using the GFDT bit instead 434 * of uncaching, which would allow us to flush all the LLC-cached data 435 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 436 */ 437 ret = i915_gem_object_set_cache_level(obj, 438 HAS_WT(to_i915(obj->base.dev)) ? 439 I915_CACHE_WT : I915_CACHE_NONE); 440 if (ret) 441 return ERR_PTR(ret); 442 443 /* 444 * As the user may map the buffer once pinned in the display plane 445 * (e.g. libkms for the bootup splash), we have to ensure that we 446 * always use map_and_fenceable for all scanout buffers. However, 447 * it may simply be too big to fit into mappable, in which case 448 * put it anyway and hope that userspace can cope (but always first 449 * try to preserve the existing ABI). 450 */ 451 vma = ERR_PTR(-ENOSPC); 452 if ((flags & PIN_MAPPABLE) == 0 && 453 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 454 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 455 flags | 456 PIN_MAPPABLE | 457 PIN_NONBLOCK); 458 if (IS_ERR(vma)) 459 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 460 if (IS_ERR(vma)) 461 return vma; 462 463 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 464 465 __i915_gem_object_flush_for_display(obj); 466 467 /* 468 * It should now be out of any other write domains, and we can update 469 * the domain values for our changes. 470 */ 471 obj->read_domains |= I915_GEM_DOMAIN_GTT; 472 473 return vma; 474 } 475 476 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 477 { 478 struct drm_i915_private *i915 = to_i915(obj->base.dev); 479 struct i915_vma *vma; 480 481 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 482 483 mutex_lock(&i915->ggtt.vm.mutex); 484 for_each_ggtt_vma(vma, obj) { 485 if (!drm_mm_node_allocated(&vma->node)) 486 continue; 487 488 GEM_BUG_ON(vma->vm != &i915->ggtt.vm); 489 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 490 } 491 mutex_unlock(&i915->ggtt.vm.mutex); 492 493 if (i915_gem_object_is_shrinkable(obj)) { 494 unsigned long flags; 495 496 spin_lock_irqsave(&i915->mm.obj_lock, flags); 497 498 if (obj->mm.madv == I915_MADV_WILLNEED && 499 !atomic_read(&obj->mm.shrink_pin)) 500 list_move_tail(&obj->mm.link, &i915->mm.shrink_list); 501 502 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 503 } 504 } 505 506 void 507 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 508 { 509 struct drm_i915_gem_object *obj = vma->obj; 510 511 assert_object_held(obj); 512 513 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 514 i915_gem_object_bump_inactive_ggtt(obj); 515 516 i915_vma_unpin(vma); 517 } 518 519 /** 520 * Moves a single object to the CPU read, and possibly write domain. 521 * @obj: object to act on 522 * @write: requesting write or read-only access 523 * 524 * This function returns when the move is complete, including waiting on 525 * flushes to occur. 526 */ 527 int 528 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 529 { 530 int ret; 531 532 assert_object_held(obj); 533 534 ret = i915_gem_object_wait(obj, 535 I915_WAIT_INTERRUPTIBLE | 536 (write ? I915_WAIT_ALL : 0), 537 MAX_SCHEDULE_TIMEOUT); 538 if (ret) 539 return ret; 540 541 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 542 543 /* Flush the CPU cache if it's still invalid. */ 544 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 545 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 546 obj->read_domains |= I915_GEM_DOMAIN_CPU; 547 } 548 549 /* It should now be out of any other write domains, and we can update 550 * the domain values for our changes. 551 */ 552 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 553 554 /* If we're writing through the CPU, then the GPU read domains will 555 * need to be invalidated at next use. 556 */ 557 if (write) 558 __start_cpu_write(obj); 559 560 return 0; 561 } 562 563 /** 564 * Called when user space prepares to use an object with the CPU, either 565 * through the mmap ioctl's mapping or a GTT mapping. 566 * @dev: drm device 567 * @data: ioctl data blob 568 * @file: drm file 569 */ 570 int 571 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 572 struct drm_file *file) 573 { 574 struct drm_i915_gem_set_domain *args = data; 575 struct drm_i915_gem_object *obj; 576 u32 read_domains = args->read_domains; 577 u32 write_domain = args->write_domain; 578 int err; 579 580 /* Only handle setting domains to types used by the CPU. */ 581 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 582 return -EINVAL; 583 584 /* 585 * Having something in the write domain implies it's in the read 586 * domain, and only that read domain. Enforce that in the request. 587 */ 588 if (write_domain && read_domains != write_domain) 589 return -EINVAL; 590 591 if (!read_domains) 592 return 0; 593 594 obj = i915_gem_object_lookup(file, args->handle); 595 if (!obj) 596 return -ENOENT; 597 598 /* 599 * Already in the desired write domain? Nothing for us to do! 600 * 601 * We apply a little bit of cunning here to catch a broader set of 602 * no-ops. If obj->write_domain is set, we must be in the same 603 * obj->read_domains, and only that domain. Therefore, if that 604 * obj->write_domain matches the request read_domains, we are 605 * already in the same read/write domain and can skip the operation, 606 * without having to further check the requested write_domain. 607 */ 608 if (READ_ONCE(obj->write_domain) == read_domains) { 609 err = 0; 610 goto out; 611 } 612 613 /* 614 * Try to flush the object off the GPU without holding the lock. 615 * We will repeat the flush holding the lock in the normal manner 616 * to catch cases where we are gazumped. 617 */ 618 err = i915_gem_object_wait(obj, 619 I915_WAIT_INTERRUPTIBLE | 620 I915_WAIT_PRIORITY | 621 (write_domain ? I915_WAIT_ALL : 0), 622 MAX_SCHEDULE_TIMEOUT); 623 if (err) 624 goto out; 625 626 /* 627 * Proxy objects do not control access to the backing storage, ergo 628 * they cannot be used as a means to manipulate the cache domain 629 * tracking for that backing storage. The proxy object is always 630 * considered to be outside of any cache domain. 631 */ 632 if (i915_gem_object_is_proxy(obj)) { 633 err = -ENXIO; 634 goto out; 635 } 636 637 /* 638 * Flush and acquire obj->pages so that we are coherent through 639 * direct access in memory with previous cached writes through 640 * shmemfs and that our cache domain tracking remains valid. 641 * For example, if the obj->filp was moved to swap without us 642 * being notified and releasing the pages, we would mistakenly 643 * continue to assume that the obj remained out of the CPU cached 644 * domain. 645 */ 646 err = i915_gem_object_pin_pages(obj); 647 if (err) 648 goto out; 649 650 err = i915_gem_object_lock_interruptible(obj); 651 if (err) 652 goto out_unpin; 653 654 if (read_domains & I915_GEM_DOMAIN_WC) 655 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 656 else if (read_domains & I915_GEM_DOMAIN_GTT) 657 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 658 else 659 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 660 661 /* And bump the LRU for this access */ 662 i915_gem_object_bump_inactive_ggtt(obj); 663 664 i915_gem_object_unlock(obj); 665 666 if (write_domain) 667 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 668 669 out_unpin: 670 i915_gem_object_unpin_pages(obj); 671 out: 672 i915_gem_object_put(obj); 673 return err; 674 } 675 676 /* 677 * Pins the specified object's pages and synchronizes the object with 678 * GPU accesses. Sets needs_clflush to non-zero if the caller should 679 * flush the object from the CPU cache. 680 */ 681 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 682 unsigned int *needs_clflush) 683 { 684 int ret; 685 686 *needs_clflush = 0; 687 if (!i915_gem_object_has_struct_page(obj)) 688 return -ENODEV; 689 690 ret = i915_gem_object_lock_interruptible(obj); 691 if (ret) 692 return ret; 693 694 ret = i915_gem_object_wait(obj, 695 I915_WAIT_INTERRUPTIBLE, 696 MAX_SCHEDULE_TIMEOUT); 697 if (ret) 698 goto err_unlock; 699 700 ret = i915_gem_object_pin_pages(obj); 701 if (ret) 702 goto err_unlock; 703 704 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 705 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 706 ret = i915_gem_object_set_to_cpu_domain(obj, false); 707 if (ret) 708 goto err_unpin; 709 else 710 goto out; 711 } 712 713 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 714 715 /* If we're not in the cpu read domain, set ourself into the gtt 716 * read domain and manually flush cachelines (if required). This 717 * optimizes for the case when the gpu will dirty the data 718 * anyway again before the next pread happens. 719 */ 720 if (!obj->cache_dirty && 721 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 722 *needs_clflush = CLFLUSH_BEFORE; 723 724 out: 725 /* return with the pages pinned */ 726 return 0; 727 728 err_unpin: 729 i915_gem_object_unpin_pages(obj); 730 err_unlock: 731 i915_gem_object_unlock(obj); 732 return ret; 733 } 734 735 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 736 unsigned int *needs_clflush) 737 { 738 int ret; 739 740 *needs_clflush = 0; 741 if (!i915_gem_object_has_struct_page(obj)) 742 return -ENODEV; 743 744 ret = i915_gem_object_lock_interruptible(obj); 745 if (ret) 746 return ret; 747 748 ret = i915_gem_object_wait(obj, 749 I915_WAIT_INTERRUPTIBLE | 750 I915_WAIT_ALL, 751 MAX_SCHEDULE_TIMEOUT); 752 if (ret) 753 goto err_unlock; 754 755 ret = i915_gem_object_pin_pages(obj); 756 if (ret) 757 goto err_unlock; 758 759 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 760 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 761 ret = i915_gem_object_set_to_cpu_domain(obj, true); 762 if (ret) 763 goto err_unpin; 764 else 765 goto out; 766 } 767 768 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 769 770 /* If we're not in the cpu write domain, set ourself into the 771 * gtt write domain and manually flush cachelines (as required). 772 * This optimizes for the case when the gpu will use the data 773 * right away and we therefore have to clflush anyway. 774 */ 775 if (!obj->cache_dirty) { 776 *needs_clflush |= CLFLUSH_AFTER; 777 778 /* 779 * Same trick applies to invalidate partially written 780 * cachelines read before writing. 781 */ 782 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 783 *needs_clflush |= CLFLUSH_BEFORE; 784 } 785 786 out: 787 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 788 obj->mm.dirty = true; 789 /* return with the pages pinned */ 790 return 0; 791 792 err_unpin: 793 i915_gem_object_unpin_pages(obj); 794 err_unlock: 795 i915_gem_object_unlock(obj); 796 return ret; 797 } 798