1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "display/intel_frontbuffer.h" 9 #include "gt/intel_gt.h" 10 11 #include "i915_drv.h" 12 #include "i915_gem_clflush.h" 13 #include "i915_gem_domain.h" 14 #include "i915_gem_gtt.h" 15 #include "i915_gem_ioctls.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 #include "i915_gem_object.h" 19 #include "i915_vma.h" 20 21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 22 23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 24 { 25 struct drm_i915_private *i915 = to_i915(obj->base.dev); 26 27 if (IS_DGFX(i915)) 28 return false; 29 30 return !(obj->cache_level == I915_CACHE_NONE || 31 obj->cache_level == I915_CACHE_WT); 32 } 33 34 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 35 { 36 struct drm_i915_private *i915 = to_i915(obj->base.dev); 37 38 if (obj->cache_dirty) 39 return false; 40 41 if (IS_DGFX(i915)) 42 return false; 43 44 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 45 return true; 46 47 /* Currently in use by HW (display engine)? Keep flushed. */ 48 return i915_gem_object_is_framebuffer(obj); 49 } 50 51 static void 52 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 53 { 54 struct i915_vma *vma; 55 56 assert_object_held(obj); 57 58 if (!(obj->write_domain & flush_domains)) 59 return; 60 61 switch (obj->write_domain) { 62 case I915_GEM_DOMAIN_GTT: 63 spin_lock(&obj->vma.lock); 64 for_each_ggtt_vma(vma, obj) { 65 if (i915_vma_unset_ggtt_write(vma)) 66 intel_gt_flush_ggtt_writes(vma->vm->gt); 67 } 68 spin_unlock(&obj->vma.lock); 69 70 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 71 break; 72 73 case I915_GEM_DOMAIN_WC: 74 wmb(); 75 break; 76 77 case I915_GEM_DOMAIN_CPU: 78 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 79 break; 80 81 case I915_GEM_DOMAIN_RENDER: 82 if (gpu_write_needs_clflush(obj)) 83 obj->cache_dirty = true; 84 break; 85 } 86 87 obj->write_domain = 0; 88 } 89 90 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 91 { 92 /* 93 * We manually flush the CPU domain so that we can override and 94 * force the flush for the display, and perform it asyncrhonously. 95 */ 96 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 97 if (obj->cache_dirty) 98 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 99 obj->write_domain = 0; 100 } 101 102 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 103 { 104 if (!i915_gem_object_is_framebuffer(obj)) 105 return; 106 107 i915_gem_object_lock(obj, NULL); 108 __i915_gem_object_flush_for_display(obj); 109 i915_gem_object_unlock(obj); 110 } 111 112 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 113 { 114 if (i915_gem_object_is_framebuffer(obj)) 115 __i915_gem_object_flush_for_display(obj); 116 } 117 118 /** 119 * Moves a single object to the WC read, and possibly write domain. 120 * @obj: object to act on 121 * @write: ask for write access or read only 122 * 123 * This function returns when the move is complete, including waiting on 124 * flushes to occur. 125 */ 126 int 127 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 128 { 129 int ret; 130 131 assert_object_held(obj); 132 133 ret = i915_gem_object_wait(obj, 134 I915_WAIT_INTERRUPTIBLE | 135 (write ? I915_WAIT_ALL : 0), 136 MAX_SCHEDULE_TIMEOUT); 137 if (ret) 138 return ret; 139 140 if (obj->write_domain == I915_GEM_DOMAIN_WC) 141 return 0; 142 143 /* Flush and acquire obj->pages so that we are coherent through 144 * direct access in memory with previous cached writes through 145 * shmemfs and that our cache domain tracking remains valid. 146 * For example, if the obj->filp was moved to swap without us 147 * being notified and releasing the pages, we would mistakenly 148 * continue to assume that the obj remained out of the CPU cached 149 * domain. 150 */ 151 ret = i915_gem_object_pin_pages(obj); 152 if (ret) 153 return ret; 154 155 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 156 157 /* Serialise direct access to this object with the barriers for 158 * coherent writes from the GPU, by effectively invalidating the 159 * WC domain upon first access. 160 */ 161 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 162 mb(); 163 164 /* It should now be out of any other write domains, and we can update 165 * the domain values for our changes. 166 */ 167 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 168 obj->read_domains |= I915_GEM_DOMAIN_WC; 169 if (write) { 170 obj->read_domains = I915_GEM_DOMAIN_WC; 171 obj->write_domain = I915_GEM_DOMAIN_WC; 172 obj->mm.dirty = true; 173 } 174 175 i915_gem_object_unpin_pages(obj); 176 return 0; 177 } 178 179 /** 180 * Moves a single object to the GTT read, and possibly write domain. 181 * @obj: object to act on 182 * @write: ask for write access or read only 183 * 184 * This function returns when the move is complete, including waiting on 185 * flushes to occur. 186 */ 187 int 188 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 189 { 190 int ret; 191 192 assert_object_held(obj); 193 194 ret = i915_gem_object_wait(obj, 195 I915_WAIT_INTERRUPTIBLE | 196 (write ? I915_WAIT_ALL : 0), 197 MAX_SCHEDULE_TIMEOUT); 198 if (ret) 199 return ret; 200 201 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 202 return 0; 203 204 /* Flush and acquire obj->pages so that we are coherent through 205 * direct access in memory with previous cached writes through 206 * shmemfs and that our cache domain tracking remains valid. 207 * For example, if the obj->filp was moved to swap without us 208 * being notified and releasing the pages, we would mistakenly 209 * continue to assume that the obj remained out of the CPU cached 210 * domain. 211 */ 212 ret = i915_gem_object_pin_pages(obj); 213 if (ret) 214 return ret; 215 216 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 217 218 /* Serialise direct access to this object with the barriers for 219 * coherent writes from the GPU, by effectively invalidating the 220 * GTT domain upon first access. 221 */ 222 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 223 mb(); 224 225 /* It should now be out of any other write domains, and we can update 226 * the domain values for our changes. 227 */ 228 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 229 obj->read_domains |= I915_GEM_DOMAIN_GTT; 230 if (write) { 231 struct i915_vma *vma; 232 233 obj->read_domains = I915_GEM_DOMAIN_GTT; 234 obj->write_domain = I915_GEM_DOMAIN_GTT; 235 obj->mm.dirty = true; 236 237 spin_lock(&obj->vma.lock); 238 for_each_ggtt_vma(vma, obj) 239 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 240 i915_vma_set_ggtt_write(vma); 241 spin_unlock(&obj->vma.lock); 242 } 243 244 i915_gem_object_unpin_pages(obj); 245 return 0; 246 } 247 248 /** 249 * Changes the cache-level of an object across all VMA. 250 * @obj: object to act on 251 * @cache_level: new cache level to set for the object 252 * 253 * After this function returns, the object will be in the new cache-level 254 * across all GTT and the contents of the backing storage will be coherent, 255 * with respect to the new cache-level. In order to keep the backing storage 256 * coherent for all users, we only allow a single cache level to be set 257 * globally on the object and prevent it from being changed whilst the 258 * hardware is reading from the object. That is if the object is currently 259 * on the scanout it will be set to uncached (or equivalent display 260 * cache coherency) and all non-MOCS GPU access will also be uncached so 261 * that all direct access to the scanout remains coherent. 262 */ 263 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 264 enum i915_cache_level cache_level) 265 { 266 int ret; 267 268 if (obj->cache_level == cache_level) 269 return 0; 270 271 ret = i915_gem_object_wait(obj, 272 I915_WAIT_INTERRUPTIBLE | 273 I915_WAIT_ALL, 274 MAX_SCHEDULE_TIMEOUT); 275 if (ret) 276 return ret; 277 278 /* Always invalidate stale cachelines */ 279 if (obj->cache_level != cache_level) { 280 i915_gem_object_set_cache_coherency(obj, cache_level); 281 obj->cache_dirty = true; 282 } 283 284 /* The cache-level will be applied when each vma is rebound. */ 285 return i915_gem_object_unbind(obj, 286 I915_GEM_OBJECT_UNBIND_ACTIVE | 287 I915_GEM_OBJECT_UNBIND_BARRIER); 288 } 289 290 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 291 struct drm_file *file) 292 { 293 struct drm_i915_gem_caching *args = data; 294 struct drm_i915_gem_object *obj; 295 int err = 0; 296 297 if (IS_DGFX(to_i915(dev))) 298 return -ENODEV; 299 300 rcu_read_lock(); 301 obj = i915_gem_object_lookup_rcu(file, args->handle); 302 if (!obj) { 303 err = -ENOENT; 304 goto out; 305 } 306 307 switch (obj->cache_level) { 308 case I915_CACHE_LLC: 309 case I915_CACHE_L3_LLC: 310 args->caching = I915_CACHING_CACHED; 311 break; 312 313 case I915_CACHE_WT: 314 args->caching = I915_CACHING_DISPLAY; 315 break; 316 317 default: 318 args->caching = I915_CACHING_NONE; 319 break; 320 } 321 out: 322 rcu_read_unlock(); 323 return err; 324 } 325 326 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 327 struct drm_file *file) 328 { 329 struct drm_i915_private *i915 = to_i915(dev); 330 struct drm_i915_gem_caching *args = data; 331 struct drm_i915_gem_object *obj; 332 enum i915_cache_level level; 333 int ret = 0; 334 335 if (IS_DGFX(i915)) 336 return -ENODEV; 337 338 switch (args->caching) { 339 case I915_CACHING_NONE: 340 level = I915_CACHE_NONE; 341 break; 342 case I915_CACHING_CACHED: 343 /* 344 * Due to a HW issue on BXT A stepping, GPU stores via a 345 * snooped mapping may leave stale data in a corresponding CPU 346 * cacheline, whereas normally such cachelines would get 347 * invalidated. 348 */ 349 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 350 return -ENODEV; 351 352 level = I915_CACHE_LLC; 353 break; 354 case I915_CACHING_DISPLAY: 355 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 356 break; 357 default: 358 return -EINVAL; 359 } 360 361 obj = i915_gem_object_lookup(file, args->handle); 362 if (!obj) 363 return -ENOENT; 364 365 /* 366 * The caching mode of proxy object is handled by its generator, and 367 * not allowed to be changed by userspace. 368 */ 369 if (i915_gem_object_is_proxy(obj)) { 370 /* 371 * Silently allow cached for userptr; the vulkan driver 372 * sets all objects to cached 373 */ 374 if (!i915_gem_object_is_userptr(obj) || 375 args->caching != I915_CACHING_CACHED) 376 ret = -ENXIO; 377 378 goto out; 379 } 380 381 ret = i915_gem_object_lock_interruptible(obj, NULL); 382 if (ret) 383 goto out; 384 385 ret = i915_gem_object_set_cache_level(obj, level); 386 i915_gem_object_unlock(obj); 387 388 out: 389 i915_gem_object_put(obj); 390 return ret; 391 } 392 393 /* 394 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 395 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 396 * (for pageflips). We only flush the caches while preparing the buffer for 397 * display, the callers are responsible for frontbuffer flush. 398 */ 399 struct i915_vma * 400 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 401 struct i915_gem_ww_ctx *ww, 402 u32 alignment, 403 const struct i915_gtt_view *view, 404 unsigned int flags) 405 { 406 struct drm_i915_private *i915 = to_i915(obj->base.dev); 407 struct i915_vma *vma; 408 int ret; 409 410 /* Frame buffer must be in LMEM */ 411 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 412 return ERR_PTR(-EINVAL); 413 414 /* 415 * The display engine is not coherent with the LLC cache on gen6. As 416 * a result, we make sure that the pinning that is about to occur is 417 * done with uncached PTEs. This is lowest common denominator for all 418 * chipsets. 419 * 420 * However for gen6+, we could do better by using the GFDT bit instead 421 * of uncaching, which would allow us to flush all the LLC-cached data 422 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 423 */ 424 ret = i915_gem_object_set_cache_level(obj, 425 HAS_WT(i915) ? 426 I915_CACHE_WT : I915_CACHE_NONE); 427 if (ret) 428 return ERR_PTR(ret); 429 430 /* VT-d may overfetch before/after the vma, so pad with scratch */ 431 if (intel_scanout_needs_vtd_wa(i915)) { 432 unsigned int guard = VTD_GUARD; 433 434 if (i915_gem_object_is_tiled(obj)) 435 guard = max(guard, 436 i915_gem_object_get_tile_row_size(obj)); 437 438 flags |= PIN_OFFSET_GUARD | guard; 439 } 440 441 /* 442 * As the user may map the buffer once pinned in the display plane 443 * (e.g. libkms for the bootup splash), we have to ensure that we 444 * always use map_and_fenceable for all scanout buffers. However, 445 * it may simply be too big to fit into mappable, in which case 446 * put it anyway and hope that userspace can cope (but always first 447 * try to preserve the existing ABI). 448 */ 449 vma = ERR_PTR(-ENOSPC); 450 if ((flags & PIN_MAPPABLE) == 0 && 451 (!view || view->type == I915_GTT_VIEW_NORMAL)) 452 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 453 flags | PIN_MAPPABLE | 454 PIN_NONBLOCK); 455 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 456 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 457 alignment, flags); 458 if (IS_ERR(vma)) 459 return vma; 460 461 vma->display_alignment = max(vma->display_alignment, alignment); 462 i915_vma_mark_scanout(vma); 463 464 i915_gem_object_flush_if_display_locked(obj); 465 466 return vma; 467 } 468 469 /** 470 * Moves a single object to the CPU read, and possibly write domain. 471 * @obj: object to act on 472 * @write: requesting write or read-only access 473 * 474 * This function returns when the move is complete, including waiting on 475 * flushes to occur. 476 */ 477 int 478 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 479 { 480 int ret; 481 482 assert_object_held(obj); 483 484 ret = i915_gem_object_wait(obj, 485 I915_WAIT_INTERRUPTIBLE | 486 (write ? I915_WAIT_ALL : 0), 487 MAX_SCHEDULE_TIMEOUT); 488 if (ret) 489 return ret; 490 491 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 492 493 /* Flush the CPU cache if it's still invalid. */ 494 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 495 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 496 obj->read_domains |= I915_GEM_DOMAIN_CPU; 497 } 498 499 /* It should now be out of any other write domains, and we can update 500 * the domain values for our changes. 501 */ 502 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 503 504 /* If we're writing through the CPU, then the GPU read domains will 505 * need to be invalidated at next use. 506 */ 507 if (write) 508 __start_cpu_write(obj); 509 510 return 0; 511 } 512 513 /** 514 * Called when user space prepares to use an object with the CPU, either 515 * through the mmap ioctl's mapping or a GTT mapping. 516 * @dev: drm device 517 * @data: ioctl data blob 518 * @file: drm file 519 */ 520 int 521 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 522 struct drm_file *file) 523 { 524 struct drm_i915_gem_set_domain *args = data; 525 struct drm_i915_gem_object *obj; 526 u32 read_domains = args->read_domains; 527 u32 write_domain = args->write_domain; 528 int err; 529 530 if (IS_DGFX(to_i915(dev))) 531 return -ENODEV; 532 533 /* Only handle setting domains to types used by the CPU. */ 534 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 535 return -EINVAL; 536 537 /* 538 * Having something in the write domain implies it's in the read 539 * domain, and only that read domain. Enforce that in the request. 540 */ 541 if (write_domain && read_domains != write_domain) 542 return -EINVAL; 543 544 if (!read_domains) 545 return 0; 546 547 obj = i915_gem_object_lookup(file, args->handle); 548 if (!obj) 549 return -ENOENT; 550 551 /* 552 * Try to flush the object off the GPU without holding the lock. 553 * We will repeat the flush holding the lock in the normal manner 554 * to catch cases where we are gazumped. 555 */ 556 err = i915_gem_object_wait(obj, 557 I915_WAIT_INTERRUPTIBLE | 558 I915_WAIT_PRIORITY | 559 (write_domain ? I915_WAIT_ALL : 0), 560 MAX_SCHEDULE_TIMEOUT); 561 if (err) 562 goto out; 563 564 if (i915_gem_object_is_userptr(obj)) { 565 /* 566 * Try to grab userptr pages, iris uses set_domain to check 567 * userptr validity 568 */ 569 err = i915_gem_object_userptr_validate(obj); 570 if (!err) 571 err = i915_gem_object_wait(obj, 572 I915_WAIT_INTERRUPTIBLE | 573 I915_WAIT_PRIORITY | 574 (write_domain ? I915_WAIT_ALL : 0), 575 MAX_SCHEDULE_TIMEOUT); 576 goto out; 577 } 578 579 /* 580 * Proxy objects do not control access to the backing storage, ergo 581 * they cannot be used as a means to manipulate the cache domain 582 * tracking for that backing storage. The proxy object is always 583 * considered to be outside of any cache domain. 584 */ 585 if (i915_gem_object_is_proxy(obj)) { 586 err = -ENXIO; 587 goto out; 588 } 589 590 err = i915_gem_object_lock_interruptible(obj, NULL); 591 if (err) 592 goto out; 593 594 /* 595 * Flush and acquire obj->pages so that we are coherent through 596 * direct access in memory with previous cached writes through 597 * shmemfs and that our cache domain tracking remains valid. 598 * For example, if the obj->filp was moved to swap without us 599 * being notified and releasing the pages, we would mistakenly 600 * continue to assume that the obj remained out of the CPU cached 601 * domain. 602 */ 603 err = i915_gem_object_pin_pages(obj); 604 if (err) 605 goto out_unlock; 606 607 /* 608 * Already in the desired write domain? Nothing for us to do! 609 * 610 * We apply a little bit of cunning here to catch a broader set of 611 * no-ops. If obj->write_domain is set, we must be in the same 612 * obj->read_domains, and only that domain. Therefore, if that 613 * obj->write_domain matches the request read_domains, we are 614 * already in the same read/write domain and can skip the operation, 615 * without having to further check the requested write_domain. 616 */ 617 if (READ_ONCE(obj->write_domain) == read_domains) 618 goto out_unpin; 619 620 if (read_domains & I915_GEM_DOMAIN_WC) 621 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 622 else if (read_domains & I915_GEM_DOMAIN_GTT) 623 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 624 else 625 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 626 627 out_unpin: 628 i915_gem_object_unpin_pages(obj); 629 630 out_unlock: 631 i915_gem_object_unlock(obj); 632 633 if (!err && write_domain) 634 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 635 636 out: 637 i915_gem_object_put(obj); 638 return err; 639 } 640 641 /* 642 * Pins the specified object's pages and synchronizes the object with 643 * GPU accesses. Sets needs_clflush to non-zero if the caller should 644 * flush the object from the CPU cache. 645 */ 646 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 647 unsigned int *needs_clflush) 648 { 649 int ret; 650 651 *needs_clflush = 0; 652 if (!i915_gem_object_has_struct_page(obj)) 653 return -ENODEV; 654 655 assert_object_held(obj); 656 657 ret = i915_gem_object_wait(obj, 658 I915_WAIT_INTERRUPTIBLE, 659 MAX_SCHEDULE_TIMEOUT); 660 if (ret) 661 return ret; 662 663 ret = i915_gem_object_pin_pages(obj); 664 if (ret) 665 return ret; 666 667 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 668 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 669 ret = i915_gem_object_set_to_cpu_domain(obj, false); 670 if (ret) 671 goto err_unpin; 672 else 673 goto out; 674 } 675 676 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 677 678 /* If we're not in the cpu read domain, set ourself into the gtt 679 * read domain and manually flush cachelines (if required). This 680 * optimizes for the case when the gpu will dirty the data 681 * anyway again before the next pread happens. 682 */ 683 if (!obj->cache_dirty && 684 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 685 *needs_clflush = CLFLUSH_BEFORE; 686 687 out: 688 /* return with the pages pinned */ 689 return 0; 690 691 err_unpin: 692 i915_gem_object_unpin_pages(obj); 693 return ret; 694 } 695 696 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 697 unsigned int *needs_clflush) 698 { 699 int ret; 700 701 *needs_clflush = 0; 702 if (!i915_gem_object_has_struct_page(obj)) 703 return -ENODEV; 704 705 assert_object_held(obj); 706 707 ret = i915_gem_object_wait(obj, 708 I915_WAIT_INTERRUPTIBLE | 709 I915_WAIT_ALL, 710 MAX_SCHEDULE_TIMEOUT); 711 if (ret) 712 return ret; 713 714 ret = i915_gem_object_pin_pages(obj); 715 if (ret) 716 return ret; 717 718 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 719 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 720 ret = i915_gem_object_set_to_cpu_domain(obj, true); 721 if (ret) 722 goto err_unpin; 723 else 724 goto out; 725 } 726 727 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 728 729 /* If we're not in the cpu write domain, set ourself into the 730 * gtt write domain and manually flush cachelines (as required). 731 * This optimizes for the case when the gpu will use the data 732 * right away and we therefore have to clflush anyway. 733 */ 734 if (!obj->cache_dirty) { 735 *needs_clflush |= CLFLUSH_AFTER; 736 737 /* 738 * Same trick applies to invalidate partially written 739 * cachelines read before writing. 740 */ 741 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 742 *needs_clflush |= CLFLUSH_BEFORE; 743 } 744 745 out: 746 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 747 obj->mm.dirty = true; 748 /* return with the pages pinned */ 749 return 0; 750 751 err_unpin: 752 i915_gem_object_unpin_pages(obj); 753 return ret; 754 } 755