1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_frontbuffer.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_domain.h" 13 #include "i915_gem_gtt.h" 14 #include "i915_gem_ioctls.h" 15 #include "i915_gem_lmem.h" 16 #include "i915_gem_mman.h" 17 #include "i915_gem_object.h" 18 #include "i915_vma.h" 19 20 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 21 22 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 23 { 24 struct drm_i915_private *i915 = to_i915(obj->base.dev); 25 26 if (IS_DGFX(i915)) 27 return false; 28 29 return !(obj->cache_level == I915_CACHE_NONE || 30 obj->cache_level == I915_CACHE_WT); 31 } 32 33 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 34 { 35 struct drm_i915_private *i915 = to_i915(obj->base.dev); 36 37 if (obj->cache_dirty) 38 return false; 39 40 if (IS_DGFX(i915)) 41 return false; 42 43 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 44 return true; 45 46 /* Currently in use by HW (display engine)? Keep flushed. */ 47 return i915_gem_object_is_framebuffer(obj); 48 } 49 50 static void 51 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 52 { 53 struct i915_vma *vma; 54 55 assert_object_held(obj); 56 57 if (!(obj->write_domain & flush_domains)) 58 return; 59 60 switch (obj->write_domain) { 61 case I915_GEM_DOMAIN_GTT: 62 spin_lock(&obj->vma.lock); 63 for_each_ggtt_vma(vma, obj) { 64 if (i915_vma_unset_ggtt_write(vma)) 65 intel_gt_flush_ggtt_writes(vma->vm->gt); 66 } 67 spin_unlock(&obj->vma.lock); 68 69 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 70 break; 71 72 case I915_GEM_DOMAIN_WC: 73 wmb(); 74 break; 75 76 case I915_GEM_DOMAIN_CPU: 77 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 78 break; 79 80 case I915_GEM_DOMAIN_RENDER: 81 if (gpu_write_needs_clflush(obj)) 82 obj->cache_dirty = true; 83 break; 84 } 85 86 obj->write_domain = 0; 87 } 88 89 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 90 { 91 /* 92 * We manually flush the CPU domain so that we can override and 93 * force the flush for the display, and perform it asyncrhonously. 94 */ 95 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 96 if (obj->cache_dirty) 97 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 98 obj->write_domain = 0; 99 } 100 101 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 102 { 103 if (!i915_gem_object_is_framebuffer(obj)) 104 return; 105 106 i915_gem_object_lock(obj, NULL); 107 __i915_gem_object_flush_for_display(obj); 108 i915_gem_object_unlock(obj); 109 } 110 111 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 112 { 113 if (i915_gem_object_is_framebuffer(obj)) 114 __i915_gem_object_flush_for_display(obj); 115 } 116 117 /** 118 * Moves a single object to the WC read, and possibly write domain. 119 * @obj: object to act on 120 * @write: ask for write access or read only 121 * 122 * This function returns when the move is complete, including waiting on 123 * flushes to occur. 124 */ 125 int 126 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 127 { 128 int ret; 129 130 assert_object_held(obj); 131 132 ret = i915_gem_object_wait(obj, 133 I915_WAIT_INTERRUPTIBLE | 134 (write ? I915_WAIT_ALL : 0), 135 MAX_SCHEDULE_TIMEOUT); 136 if (ret) 137 return ret; 138 139 if (obj->write_domain == I915_GEM_DOMAIN_WC) 140 return 0; 141 142 /* Flush and acquire obj->pages so that we are coherent through 143 * direct access in memory with previous cached writes through 144 * shmemfs and that our cache domain tracking remains valid. 145 * For example, if the obj->filp was moved to swap without us 146 * being notified and releasing the pages, we would mistakenly 147 * continue to assume that the obj remained out of the CPU cached 148 * domain. 149 */ 150 ret = i915_gem_object_pin_pages(obj); 151 if (ret) 152 return ret; 153 154 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 155 156 /* Serialise direct access to this object with the barriers for 157 * coherent writes from the GPU, by effectively invalidating the 158 * WC domain upon first access. 159 */ 160 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 161 mb(); 162 163 /* It should now be out of any other write domains, and we can update 164 * the domain values for our changes. 165 */ 166 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 167 obj->read_domains |= I915_GEM_DOMAIN_WC; 168 if (write) { 169 obj->read_domains = I915_GEM_DOMAIN_WC; 170 obj->write_domain = I915_GEM_DOMAIN_WC; 171 obj->mm.dirty = true; 172 } 173 174 i915_gem_object_unpin_pages(obj); 175 return 0; 176 } 177 178 /** 179 * Moves a single object to the GTT read, and possibly write domain. 180 * @obj: object to act on 181 * @write: ask for write access or read only 182 * 183 * This function returns when the move is complete, including waiting on 184 * flushes to occur. 185 */ 186 int 187 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 188 { 189 int ret; 190 191 assert_object_held(obj); 192 193 ret = i915_gem_object_wait(obj, 194 I915_WAIT_INTERRUPTIBLE | 195 (write ? I915_WAIT_ALL : 0), 196 MAX_SCHEDULE_TIMEOUT); 197 if (ret) 198 return ret; 199 200 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 201 return 0; 202 203 /* Flush and acquire obj->pages so that we are coherent through 204 * direct access in memory with previous cached writes through 205 * shmemfs and that our cache domain tracking remains valid. 206 * For example, if the obj->filp was moved to swap without us 207 * being notified and releasing the pages, we would mistakenly 208 * continue to assume that the obj remained out of the CPU cached 209 * domain. 210 */ 211 ret = i915_gem_object_pin_pages(obj); 212 if (ret) 213 return ret; 214 215 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 216 217 /* Serialise direct access to this object with the barriers for 218 * coherent writes from the GPU, by effectively invalidating the 219 * GTT domain upon first access. 220 */ 221 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 222 mb(); 223 224 /* It should now be out of any other write domains, and we can update 225 * the domain values for our changes. 226 */ 227 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 228 obj->read_domains |= I915_GEM_DOMAIN_GTT; 229 if (write) { 230 struct i915_vma *vma; 231 232 obj->read_domains = I915_GEM_DOMAIN_GTT; 233 obj->write_domain = I915_GEM_DOMAIN_GTT; 234 obj->mm.dirty = true; 235 236 spin_lock(&obj->vma.lock); 237 for_each_ggtt_vma(vma, obj) 238 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 239 i915_vma_set_ggtt_write(vma); 240 spin_unlock(&obj->vma.lock); 241 } 242 243 i915_gem_object_unpin_pages(obj); 244 return 0; 245 } 246 247 /** 248 * Changes the cache-level of an object across all VMA. 249 * @obj: object to act on 250 * @cache_level: new cache level to set for the object 251 * 252 * After this function returns, the object will be in the new cache-level 253 * across all GTT and the contents of the backing storage will be coherent, 254 * with respect to the new cache-level. In order to keep the backing storage 255 * coherent for all users, we only allow a single cache level to be set 256 * globally on the object and prevent it from being changed whilst the 257 * hardware is reading from the object. That is if the object is currently 258 * on the scanout it will be set to uncached (or equivalent display 259 * cache coherency) and all non-MOCS GPU access will also be uncached so 260 * that all direct access to the scanout remains coherent. 261 */ 262 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 263 enum i915_cache_level cache_level) 264 { 265 int ret; 266 267 if (obj->cache_level == cache_level) 268 return 0; 269 270 ret = i915_gem_object_wait(obj, 271 I915_WAIT_INTERRUPTIBLE | 272 I915_WAIT_ALL, 273 MAX_SCHEDULE_TIMEOUT); 274 if (ret) 275 return ret; 276 277 /* Always invalidate stale cachelines */ 278 if (obj->cache_level != cache_level) { 279 i915_gem_object_set_cache_coherency(obj, cache_level); 280 obj->cache_dirty = true; 281 } 282 283 /* The cache-level will be applied when each vma is rebound. */ 284 return i915_gem_object_unbind(obj, 285 I915_GEM_OBJECT_UNBIND_ACTIVE | 286 I915_GEM_OBJECT_UNBIND_BARRIER); 287 } 288 289 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 290 struct drm_file *file) 291 { 292 struct drm_i915_gem_caching *args = data; 293 struct drm_i915_gem_object *obj; 294 int err = 0; 295 296 if (IS_DGFX(to_i915(dev))) 297 return -ENODEV; 298 299 rcu_read_lock(); 300 obj = i915_gem_object_lookup_rcu(file, args->handle); 301 if (!obj) { 302 err = -ENOENT; 303 goto out; 304 } 305 306 switch (obj->cache_level) { 307 case I915_CACHE_LLC: 308 case I915_CACHE_L3_LLC: 309 args->caching = I915_CACHING_CACHED; 310 break; 311 312 case I915_CACHE_WT: 313 args->caching = I915_CACHING_DISPLAY; 314 break; 315 316 default: 317 args->caching = I915_CACHING_NONE; 318 break; 319 } 320 out: 321 rcu_read_unlock(); 322 return err; 323 } 324 325 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 326 struct drm_file *file) 327 { 328 struct drm_i915_private *i915 = to_i915(dev); 329 struct drm_i915_gem_caching *args = data; 330 struct drm_i915_gem_object *obj; 331 enum i915_cache_level level; 332 int ret = 0; 333 334 if (IS_DGFX(i915)) 335 return -ENODEV; 336 337 switch (args->caching) { 338 case I915_CACHING_NONE: 339 level = I915_CACHE_NONE; 340 break; 341 case I915_CACHING_CACHED: 342 /* 343 * Due to a HW issue on BXT A stepping, GPU stores via a 344 * snooped mapping may leave stale data in a corresponding CPU 345 * cacheline, whereas normally such cachelines would get 346 * invalidated. 347 */ 348 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 349 return -ENODEV; 350 351 level = I915_CACHE_LLC; 352 break; 353 case I915_CACHING_DISPLAY: 354 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 355 break; 356 default: 357 return -EINVAL; 358 } 359 360 obj = i915_gem_object_lookup(file, args->handle); 361 if (!obj) 362 return -ENOENT; 363 364 /* 365 * The caching mode of proxy object is handled by its generator, and 366 * not allowed to be changed by userspace. 367 */ 368 if (i915_gem_object_is_proxy(obj)) { 369 /* 370 * Silently allow cached for userptr; the vulkan driver 371 * sets all objects to cached 372 */ 373 if (!i915_gem_object_is_userptr(obj) || 374 args->caching != I915_CACHING_CACHED) 375 ret = -ENXIO; 376 377 goto out; 378 } 379 380 ret = i915_gem_object_lock_interruptible(obj, NULL); 381 if (ret) 382 goto out; 383 384 ret = i915_gem_object_set_cache_level(obj, level); 385 i915_gem_object_unlock(obj); 386 387 out: 388 i915_gem_object_put(obj); 389 return ret; 390 } 391 392 /* 393 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 394 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 395 * (for pageflips). We only flush the caches while preparing the buffer for 396 * display, the callers are responsible for frontbuffer flush. 397 */ 398 struct i915_vma * 399 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 400 struct i915_gem_ww_ctx *ww, 401 u32 alignment, 402 const struct i915_gtt_view *view, 403 unsigned int flags) 404 { 405 struct drm_i915_private *i915 = to_i915(obj->base.dev); 406 struct i915_vma *vma; 407 int ret; 408 409 /* Frame buffer must be in LMEM */ 410 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 411 return ERR_PTR(-EINVAL); 412 413 /* 414 * The display engine is not coherent with the LLC cache on gen6. As 415 * a result, we make sure that the pinning that is about to occur is 416 * done with uncached PTEs. This is lowest common denominator for all 417 * chipsets. 418 * 419 * However for gen6+, we could do better by using the GFDT bit instead 420 * of uncaching, which would allow us to flush all the LLC-cached data 421 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 422 */ 423 ret = i915_gem_object_set_cache_level(obj, 424 HAS_WT(i915) ? 425 I915_CACHE_WT : I915_CACHE_NONE); 426 if (ret) 427 return ERR_PTR(ret); 428 429 /* VT-d may overfetch before/after the vma, so pad with scratch */ 430 if (intel_scanout_needs_vtd_wa(i915)) { 431 unsigned int guard = VTD_GUARD; 432 433 if (i915_gem_object_is_tiled(obj)) 434 guard = max(guard, 435 i915_gem_object_get_tile_row_size(obj)); 436 437 flags |= PIN_OFFSET_GUARD | guard; 438 } 439 440 /* 441 * As the user may map the buffer once pinned in the display plane 442 * (e.g. libkms for the bootup splash), we have to ensure that we 443 * always use map_and_fenceable for all scanout buffers. However, 444 * it may simply be too big to fit into mappable, in which case 445 * put it anyway and hope that userspace can cope (but always first 446 * try to preserve the existing ABI). 447 */ 448 vma = ERR_PTR(-ENOSPC); 449 if ((flags & PIN_MAPPABLE) == 0 && 450 (!view || view->type == I915_GTT_VIEW_NORMAL)) 451 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 452 flags | PIN_MAPPABLE | 453 PIN_NONBLOCK); 454 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 455 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 456 alignment, flags); 457 if (IS_ERR(vma)) 458 return vma; 459 460 vma->display_alignment = max(vma->display_alignment, alignment); 461 i915_vma_mark_scanout(vma); 462 463 i915_gem_object_flush_if_display_locked(obj); 464 465 return vma; 466 } 467 468 /** 469 * Moves a single object to the CPU read, and possibly write domain. 470 * @obj: object to act on 471 * @write: requesting write or read-only access 472 * 473 * This function returns when the move is complete, including waiting on 474 * flushes to occur. 475 */ 476 int 477 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 478 { 479 int ret; 480 481 assert_object_held(obj); 482 483 ret = i915_gem_object_wait(obj, 484 I915_WAIT_INTERRUPTIBLE | 485 (write ? I915_WAIT_ALL : 0), 486 MAX_SCHEDULE_TIMEOUT); 487 if (ret) 488 return ret; 489 490 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 491 492 /* Flush the CPU cache if it's still invalid. */ 493 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 494 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 495 obj->read_domains |= I915_GEM_DOMAIN_CPU; 496 } 497 498 /* It should now be out of any other write domains, and we can update 499 * the domain values for our changes. 500 */ 501 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 502 503 /* If we're writing through the CPU, then the GPU read domains will 504 * need to be invalidated at next use. 505 */ 506 if (write) 507 __start_cpu_write(obj); 508 509 return 0; 510 } 511 512 /** 513 * Called when user space prepares to use an object with the CPU, either 514 * through the mmap ioctl's mapping or a GTT mapping. 515 * @dev: drm device 516 * @data: ioctl data blob 517 * @file: drm file 518 */ 519 int 520 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 521 struct drm_file *file) 522 { 523 struct drm_i915_gem_set_domain *args = data; 524 struct drm_i915_gem_object *obj; 525 u32 read_domains = args->read_domains; 526 u32 write_domain = args->write_domain; 527 int err; 528 529 if (IS_DGFX(to_i915(dev))) 530 return -ENODEV; 531 532 /* Only handle setting domains to types used by the CPU. */ 533 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 534 return -EINVAL; 535 536 /* 537 * Having something in the write domain implies it's in the read 538 * domain, and only that read domain. Enforce that in the request. 539 */ 540 if (write_domain && read_domains != write_domain) 541 return -EINVAL; 542 543 if (!read_domains) 544 return 0; 545 546 obj = i915_gem_object_lookup(file, args->handle); 547 if (!obj) 548 return -ENOENT; 549 550 /* 551 * Try to flush the object off the GPU without holding the lock. 552 * We will repeat the flush holding the lock in the normal manner 553 * to catch cases where we are gazumped. 554 */ 555 err = i915_gem_object_wait(obj, 556 I915_WAIT_INTERRUPTIBLE | 557 I915_WAIT_PRIORITY | 558 (write_domain ? I915_WAIT_ALL : 0), 559 MAX_SCHEDULE_TIMEOUT); 560 if (err) 561 goto out; 562 563 if (i915_gem_object_is_userptr(obj)) { 564 /* 565 * Try to grab userptr pages, iris uses set_domain to check 566 * userptr validity 567 */ 568 err = i915_gem_object_userptr_validate(obj); 569 if (!err) 570 err = i915_gem_object_wait(obj, 571 I915_WAIT_INTERRUPTIBLE | 572 I915_WAIT_PRIORITY | 573 (write_domain ? I915_WAIT_ALL : 0), 574 MAX_SCHEDULE_TIMEOUT); 575 goto out; 576 } 577 578 /* 579 * Proxy objects do not control access to the backing storage, ergo 580 * they cannot be used as a means to manipulate the cache domain 581 * tracking for that backing storage. The proxy object is always 582 * considered to be outside of any cache domain. 583 */ 584 if (i915_gem_object_is_proxy(obj)) { 585 err = -ENXIO; 586 goto out; 587 } 588 589 err = i915_gem_object_lock_interruptible(obj, NULL); 590 if (err) 591 goto out; 592 593 /* 594 * Flush and acquire obj->pages so that we are coherent through 595 * direct access in memory with previous cached writes through 596 * shmemfs and that our cache domain tracking remains valid. 597 * For example, if the obj->filp was moved to swap without us 598 * being notified and releasing the pages, we would mistakenly 599 * continue to assume that the obj remained out of the CPU cached 600 * domain. 601 */ 602 err = i915_gem_object_pin_pages(obj); 603 if (err) 604 goto out_unlock; 605 606 /* 607 * Already in the desired write domain? Nothing for us to do! 608 * 609 * We apply a little bit of cunning here to catch a broader set of 610 * no-ops. If obj->write_domain is set, we must be in the same 611 * obj->read_domains, and only that domain. Therefore, if that 612 * obj->write_domain matches the request read_domains, we are 613 * already in the same read/write domain and can skip the operation, 614 * without having to further check the requested write_domain. 615 */ 616 if (READ_ONCE(obj->write_domain) == read_domains) 617 goto out_unpin; 618 619 if (read_domains & I915_GEM_DOMAIN_WC) 620 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 621 else if (read_domains & I915_GEM_DOMAIN_GTT) 622 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 623 else 624 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 625 626 out_unpin: 627 i915_gem_object_unpin_pages(obj); 628 629 out_unlock: 630 i915_gem_object_unlock(obj); 631 632 if (!err && write_domain) 633 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 634 635 out: 636 i915_gem_object_put(obj); 637 return err; 638 } 639 640 /* 641 * Pins the specified object's pages and synchronizes the object with 642 * GPU accesses. Sets needs_clflush to non-zero if the caller should 643 * flush the object from the CPU cache. 644 */ 645 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 646 unsigned int *needs_clflush) 647 { 648 int ret; 649 650 *needs_clflush = 0; 651 if (!i915_gem_object_has_struct_page(obj)) 652 return -ENODEV; 653 654 assert_object_held(obj); 655 656 ret = i915_gem_object_wait(obj, 657 I915_WAIT_INTERRUPTIBLE, 658 MAX_SCHEDULE_TIMEOUT); 659 if (ret) 660 return ret; 661 662 ret = i915_gem_object_pin_pages(obj); 663 if (ret) 664 return ret; 665 666 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 667 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 668 ret = i915_gem_object_set_to_cpu_domain(obj, false); 669 if (ret) 670 goto err_unpin; 671 else 672 goto out; 673 } 674 675 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 676 677 /* If we're not in the cpu read domain, set ourself into the gtt 678 * read domain and manually flush cachelines (if required). This 679 * optimizes for the case when the gpu will dirty the data 680 * anyway again before the next pread happens. 681 */ 682 if (!obj->cache_dirty && 683 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 684 *needs_clflush = CLFLUSH_BEFORE; 685 686 out: 687 /* return with the pages pinned */ 688 return 0; 689 690 err_unpin: 691 i915_gem_object_unpin_pages(obj); 692 return ret; 693 } 694 695 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 696 unsigned int *needs_clflush) 697 { 698 int ret; 699 700 *needs_clflush = 0; 701 if (!i915_gem_object_has_struct_page(obj)) 702 return -ENODEV; 703 704 assert_object_held(obj); 705 706 ret = i915_gem_object_wait(obj, 707 I915_WAIT_INTERRUPTIBLE | 708 I915_WAIT_ALL, 709 MAX_SCHEDULE_TIMEOUT); 710 if (ret) 711 return ret; 712 713 ret = i915_gem_object_pin_pages(obj); 714 if (ret) 715 return ret; 716 717 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 718 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 719 ret = i915_gem_object_set_to_cpu_domain(obj, true); 720 if (ret) 721 goto err_unpin; 722 else 723 goto out; 724 } 725 726 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 727 728 /* If we're not in the cpu write domain, set ourself into the 729 * gtt write domain and manually flush cachelines (as required). 730 * This optimizes for the case when the gpu will use the data 731 * right away and we therefore have to clflush anyway. 732 */ 733 if (!obj->cache_dirty) { 734 *needs_clflush |= CLFLUSH_AFTER; 735 736 /* 737 * Same trick applies to invalidate partially written 738 * cachelines read before writing. 739 */ 740 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 741 *needs_clflush |= CLFLUSH_BEFORE; 742 } 743 744 out: 745 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 746 obj->mm.dirty = true; 747 /* return with the pages pinned */ 748 return 0; 749 750 err_unpin: 751 i915_gem_object_unpin_pages(obj); 752 return ret; 753 } 754