1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "display/intel_frontbuffer.h" 9 #include "gt/intel_gt.h" 10 11 #include "i915_drv.h" 12 #include "i915_gem_clflush.h" 13 #include "i915_gem_domain.h" 14 #include "i915_gem_gtt.h" 15 #include "i915_gem_ioctls.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 #include "i915_gem_object.h" 19 #include "i915_vma.h" 20 21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 22 23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 24 { 25 struct drm_i915_private *i915 = to_i915(obj->base.dev); 26 27 if (IS_DGFX(i915)) 28 return false; 29 30 return !(obj->cache_level == I915_CACHE_NONE || 31 obj->cache_level == I915_CACHE_WT); 32 } 33 34 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 35 { 36 struct drm_i915_private *i915 = to_i915(obj->base.dev); 37 38 if (obj->cache_dirty) 39 return false; 40 41 if (IS_DGFX(i915)) 42 return false; 43 44 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 45 return true; 46 47 /* Currently in use by HW (display engine)? Keep flushed. */ 48 return i915_gem_object_is_framebuffer(obj); 49 } 50 51 static void 52 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 53 { 54 struct i915_vma *vma; 55 56 assert_object_held(obj); 57 58 if (!(obj->write_domain & flush_domains)) 59 return; 60 61 switch (obj->write_domain) { 62 case I915_GEM_DOMAIN_GTT: 63 spin_lock(&obj->vma.lock); 64 for_each_ggtt_vma(vma, obj) { 65 if (i915_vma_unset_ggtt_write(vma)) 66 intel_gt_flush_ggtt_writes(vma->vm->gt); 67 } 68 spin_unlock(&obj->vma.lock); 69 70 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 71 break; 72 73 case I915_GEM_DOMAIN_WC: 74 wmb(); 75 break; 76 77 case I915_GEM_DOMAIN_CPU: 78 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 79 break; 80 81 case I915_GEM_DOMAIN_RENDER: 82 if (gpu_write_needs_clflush(obj)) 83 obj->cache_dirty = true; 84 break; 85 } 86 87 obj->write_domain = 0; 88 } 89 90 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 91 { 92 /* 93 * We manually flush the CPU domain so that we can override and 94 * force the flush for the display, and perform it asyncrhonously. 95 */ 96 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 97 if (obj->cache_dirty) 98 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 99 obj->write_domain = 0; 100 } 101 102 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 103 { 104 if (!i915_gem_object_is_framebuffer(obj)) 105 return; 106 107 i915_gem_object_lock(obj, NULL); 108 __i915_gem_object_flush_for_display(obj); 109 i915_gem_object_unlock(obj); 110 } 111 112 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 113 { 114 if (i915_gem_object_is_framebuffer(obj)) 115 __i915_gem_object_flush_for_display(obj); 116 } 117 118 /** 119 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 120 * possibly write domain. 121 * @obj: object to act on 122 * @write: ask for write access or read only 123 * 124 * This function returns when the move is complete, including waiting on 125 * flushes to occur. 126 */ 127 int 128 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 129 { 130 int ret; 131 132 assert_object_held(obj); 133 134 ret = i915_gem_object_wait(obj, 135 I915_WAIT_INTERRUPTIBLE | 136 (write ? I915_WAIT_ALL : 0), 137 MAX_SCHEDULE_TIMEOUT); 138 if (ret) 139 return ret; 140 141 if (obj->write_domain == I915_GEM_DOMAIN_WC) 142 return 0; 143 144 /* Flush and acquire obj->pages so that we are coherent through 145 * direct access in memory with previous cached writes through 146 * shmemfs and that our cache domain tracking remains valid. 147 * For example, if the obj->filp was moved to swap without us 148 * being notified and releasing the pages, we would mistakenly 149 * continue to assume that the obj remained out of the CPU cached 150 * domain. 151 */ 152 ret = i915_gem_object_pin_pages(obj); 153 if (ret) 154 return ret; 155 156 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 157 158 /* Serialise direct access to this object with the barriers for 159 * coherent writes from the GPU, by effectively invalidating the 160 * WC domain upon first access. 161 */ 162 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 163 mb(); 164 165 /* It should now be out of any other write domains, and we can update 166 * the domain values for our changes. 167 */ 168 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 169 obj->read_domains |= I915_GEM_DOMAIN_WC; 170 if (write) { 171 obj->read_domains = I915_GEM_DOMAIN_WC; 172 obj->write_domain = I915_GEM_DOMAIN_WC; 173 obj->mm.dirty = true; 174 } 175 176 i915_gem_object_unpin_pages(obj); 177 return 0; 178 } 179 180 /** 181 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 182 * and possibly write domain. 183 * @obj: object to act on 184 * @write: ask for write access or read only 185 * 186 * This function returns when the move is complete, including waiting on 187 * flushes to occur. 188 */ 189 int 190 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 191 { 192 int ret; 193 194 assert_object_held(obj); 195 196 ret = i915_gem_object_wait(obj, 197 I915_WAIT_INTERRUPTIBLE | 198 (write ? I915_WAIT_ALL : 0), 199 MAX_SCHEDULE_TIMEOUT); 200 if (ret) 201 return ret; 202 203 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 204 return 0; 205 206 /* Flush and acquire obj->pages so that we are coherent through 207 * direct access in memory with previous cached writes through 208 * shmemfs and that our cache domain tracking remains valid. 209 * For example, if the obj->filp was moved to swap without us 210 * being notified and releasing the pages, we would mistakenly 211 * continue to assume that the obj remained out of the CPU cached 212 * domain. 213 */ 214 ret = i915_gem_object_pin_pages(obj); 215 if (ret) 216 return ret; 217 218 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 219 220 /* Serialise direct access to this object with the barriers for 221 * coherent writes from the GPU, by effectively invalidating the 222 * GTT domain upon first access. 223 */ 224 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 225 mb(); 226 227 /* It should now be out of any other write domains, and we can update 228 * the domain values for our changes. 229 */ 230 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 231 obj->read_domains |= I915_GEM_DOMAIN_GTT; 232 if (write) { 233 struct i915_vma *vma; 234 235 obj->read_domains = I915_GEM_DOMAIN_GTT; 236 obj->write_domain = I915_GEM_DOMAIN_GTT; 237 obj->mm.dirty = true; 238 239 spin_lock(&obj->vma.lock); 240 for_each_ggtt_vma(vma, obj) 241 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 242 i915_vma_set_ggtt_write(vma); 243 spin_unlock(&obj->vma.lock); 244 } 245 246 i915_gem_object_unpin_pages(obj); 247 return 0; 248 } 249 250 /** 251 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 252 * @obj: object to act on 253 * @cache_level: new cache level to set for the object 254 * 255 * After this function returns, the object will be in the new cache-level 256 * across all GTT and the contents of the backing storage will be coherent, 257 * with respect to the new cache-level. In order to keep the backing storage 258 * coherent for all users, we only allow a single cache level to be set 259 * globally on the object and prevent it from being changed whilst the 260 * hardware is reading from the object. That is if the object is currently 261 * on the scanout it will be set to uncached (or equivalent display 262 * cache coherency) and all non-MOCS GPU access will also be uncached so 263 * that all direct access to the scanout remains coherent. 264 */ 265 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 266 enum i915_cache_level cache_level) 267 { 268 int ret; 269 270 if (obj->cache_level == cache_level) 271 return 0; 272 273 ret = i915_gem_object_wait(obj, 274 I915_WAIT_INTERRUPTIBLE | 275 I915_WAIT_ALL, 276 MAX_SCHEDULE_TIMEOUT); 277 if (ret) 278 return ret; 279 280 /* Always invalidate stale cachelines */ 281 if (obj->cache_level != cache_level) { 282 i915_gem_object_set_cache_coherency(obj, cache_level); 283 obj->cache_dirty = true; 284 } 285 286 /* The cache-level will be applied when each vma is rebound. */ 287 return i915_gem_object_unbind(obj, 288 I915_GEM_OBJECT_UNBIND_ACTIVE | 289 I915_GEM_OBJECT_UNBIND_BARRIER); 290 } 291 292 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 293 struct drm_file *file) 294 { 295 struct drm_i915_gem_caching *args = data; 296 struct drm_i915_gem_object *obj; 297 int err = 0; 298 299 if (IS_DGFX(to_i915(dev))) 300 return -ENODEV; 301 302 rcu_read_lock(); 303 obj = i915_gem_object_lookup_rcu(file, args->handle); 304 if (!obj) { 305 err = -ENOENT; 306 goto out; 307 } 308 309 switch (obj->cache_level) { 310 case I915_CACHE_LLC: 311 case I915_CACHE_L3_LLC: 312 args->caching = I915_CACHING_CACHED; 313 break; 314 315 case I915_CACHE_WT: 316 args->caching = I915_CACHING_DISPLAY; 317 break; 318 319 default: 320 args->caching = I915_CACHING_NONE; 321 break; 322 } 323 out: 324 rcu_read_unlock(); 325 return err; 326 } 327 328 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 329 struct drm_file *file) 330 { 331 struct drm_i915_private *i915 = to_i915(dev); 332 struct drm_i915_gem_caching *args = data; 333 struct drm_i915_gem_object *obj; 334 enum i915_cache_level level; 335 int ret = 0; 336 337 if (IS_DGFX(i915)) 338 return -ENODEV; 339 340 switch (args->caching) { 341 case I915_CACHING_NONE: 342 level = I915_CACHE_NONE; 343 break; 344 case I915_CACHING_CACHED: 345 /* 346 * Due to a HW issue on BXT A stepping, GPU stores via a 347 * snooped mapping may leave stale data in a corresponding CPU 348 * cacheline, whereas normally such cachelines would get 349 * invalidated. 350 */ 351 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 352 return -ENODEV; 353 354 level = I915_CACHE_LLC; 355 break; 356 case I915_CACHING_DISPLAY: 357 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 358 break; 359 default: 360 return -EINVAL; 361 } 362 363 obj = i915_gem_object_lookup(file, args->handle); 364 if (!obj) 365 return -ENOENT; 366 367 /* 368 * The caching mode of proxy object is handled by its generator, and 369 * not allowed to be changed by userspace. 370 */ 371 if (i915_gem_object_is_proxy(obj)) { 372 /* 373 * Silently allow cached for userptr; the vulkan driver 374 * sets all objects to cached 375 */ 376 if (!i915_gem_object_is_userptr(obj) || 377 args->caching != I915_CACHING_CACHED) 378 ret = -ENXIO; 379 380 goto out; 381 } 382 383 ret = i915_gem_object_lock_interruptible(obj, NULL); 384 if (ret) 385 goto out; 386 387 ret = i915_gem_object_set_cache_level(obj, level); 388 i915_gem_object_unlock(obj); 389 390 out: 391 i915_gem_object_put(obj); 392 return ret; 393 } 394 395 /* 396 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 397 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 398 * (for pageflips). We only flush the caches while preparing the buffer for 399 * display, the callers are responsible for frontbuffer flush. 400 */ 401 struct i915_vma * 402 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 403 struct i915_gem_ww_ctx *ww, 404 u32 alignment, 405 const struct i915_gtt_view *view, 406 unsigned int flags) 407 { 408 struct drm_i915_private *i915 = to_i915(obj->base.dev); 409 struct i915_vma *vma; 410 int ret; 411 412 /* Frame buffer must be in LMEM */ 413 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 414 return ERR_PTR(-EINVAL); 415 416 /* 417 * The display engine is not coherent with the LLC cache on gen6. As 418 * a result, we make sure that the pinning that is about to occur is 419 * done with uncached PTEs. This is lowest common denominator for all 420 * chipsets. 421 * 422 * However for gen6+, we could do better by using the GFDT bit instead 423 * of uncaching, which would allow us to flush all the LLC-cached data 424 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 425 */ 426 ret = i915_gem_object_set_cache_level(obj, 427 HAS_WT(i915) ? 428 I915_CACHE_WT : I915_CACHE_NONE); 429 if (ret) 430 return ERR_PTR(ret); 431 432 /* VT-d may overfetch before/after the vma, so pad with scratch */ 433 if (intel_scanout_needs_vtd_wa(i915)) { 434 unsigned int guard = VTD_GUARD; 435 436 if (i915_gem_object_is_tiled(obj)) 437 guard = max(guard, 438 i915_gem_object_get_tile_row_size(obj)); 439 440 flags |= PIN_OFFSET_GUARD | guard; 441 } 442 443 /* 444 * As the user may map the buffer once pinned in the display plane 445 * (e.g. libkms for the bootup splash), we have to ensure that we 446 * always use map_and_fenceable for all scanout buffers. However, 447 * it may simply be too big to fit into mappable, in which case 448 * put it anyway and hope that userspace can cope (but always first 449 * try to preserve the existing ABI). 450 */ 451 vma = ERR_PTR(-ENOSPC); 452 if ((flags & PIN_MAPPABLE) == 0 && 453 (!view || view->type == I915_GTT_VIEW_NORMAL)) 454 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 455 flags | PIN_MAPPABLE | 456 PIN_NONBLOCK); 457 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 458 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 459 alignment, flags); 460 if (IS_ERR(vma)) 461 return vma; 462 463 vma->display_alignment = max(vma->display_alignment, alignment); 464 i915_vma_mark_scanout(vma); 465 466 i915_gem_object_flush_if_display_locked(obj); 467 468 return vma; 469 } 470 471 /** 472 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 473 * and possibly write domain. 474 * @obj: object to act on 475 * @write: requesting write or read-only access 476 * 477 * This function returns when the move is complete, including waiting on 478 * flushes to occur. 479 */ 480 int 481 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 482 { 483 int ret; 484 485 assert_object_held(obj); 486 487 ret = i915_gem_object_wait(obj, 488 I915_WAIT_INTERRUPTIBLE | 489 (write ? I915_WAIT_ALL : 0), 490 MAX_SCHEDULE_TIMEOUT); 491 if (ret) 492 return ret; 493 494 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 495 496 /* Flush the CPU cache if it's still invalid. */ 497 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 498 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 499 obj->read_domains |= I915_GEM_DOMAIN_CPU; 500 } 501 502 /* It should now be out of any other write domains, and we can update 503 * the domain values for our changes. 504 */ 505 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 506 507 /* If we're writing through the CPU, then the GPU read domains will 508 * need to be invalidated at next use. 509 */ 510 if (write) 511 __start_cpu_write(obj); 512 513 return 0; 514 } 515 516 /** 517 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 518 * object with the CPU, either 519 * through the mmap ioctl's mapping or a GTT mapping. 520 * @dev: drm device 521 * @data: ioctl data blob 522 * @file: drm file 523 */ 524 int 525 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 526 struct drm_file *file) 527 { 528 struct drm_i915_gem_set_domain *args = data; 529 struct drm_i915_gem_object *obj; 530 u32 read_domains = args->read_domains; 531 u32 write_domain = args->write_domain; 532 int err; 533 534 if (IS_DGFX(to_i915(dev))) 535 return -ENODEV; 536 537 /* Only handle setting domains to types used by the CPU. */ 538 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 539 return -EINVAL; 540 541 /* 542 * Having something in the write domain implies it's in the read 543 * domain, and only that read domain. Enforce that in the request. 544 */ 545 if (write_domain && read_domains != write_domain) 546 return -EINVAL; 547 548 if (!read_domains) 549 return 0; 550 551 obj = i915_gem_object_lookup(file, args->handle); 552 if (!obj) 553 return -ENOENT; 554 555 /* 556 * Try to flush the object off the GPU without holding the lock. 557 * We will repeat the flush holding the lock in the normal manner 558 * to catch cases where we are gazumped. 559 */ 560 err = i915_gem_object_wait(obj, 561 I915_WAIT_INTERRUPTIBLE | 562 I915_WAIT_PRIORITY | 563 (write_domain ? I915_WAIT_ALL : 0), 564 MAX_SCHEDULE_TIMEOUT); 565 if (err) 566 goto out; 567 568 if (i915_gem_object_is_userptr(obj)) { 569 /* 570 * Try to grab userptr pages, iris uses set_domain to check 571 * userptr validity 572 */ 573 err = i915_gem_object_userptr_validate(obj); 574 if (!err) 575 err = i915_gem_object_wait(obj, 576 I915_WAIT_INTERRUPTIBLE | 577 I915_WAIT_PRIORITY | 578 (write_domain ? I915_WAIT_ALL : 0), 579 MAX_SCHEDULE_TIMEOUT); 580 goto out; 581 } 582 583 /* 584 * Proxy objects do not control access to the backing storage, ergo 585 * they cannot be used as a means to manipulate the cache domain 586 * tracking for that backing storage. The proxy object is always 587 * considered to be outside of any cache domain. 588 */ 589 if (i915_gem_object_is_proxy(obj)) { 590 err = -ENXIO; 591 goto out; 592 } 593 594 err = i915_gem_object_lock_interruptible(obj, NULL); 595 if (err) 596 goto out; 597 598 /* 599 * Flush and acquire obj->pages so that we are coherent through 600 * direct access in memory with previous cached writes through 601 * shmemfs and that our cache domain tracking remains valid. 602 * For example, if the obj->filp was moved to swap without us 603 * being notified and releasing the pages, we would mistakenly 604 * continue to assume that the obj remained out of the CPU cached 605 * domain. 606 */ 607 err = i915_gem_object_pin_pages(obj); 608 if (err) 609 goto out_unlock; 610 611 /* 612 * Already in the desired write domain? Nothing for us to do! 613 * 614 * We apply a little bit of cunning here to catch a broader set of 615 * no-ops. If obj->write_domain is set, we must be in the same 616 * obj->read_domains, and only that domain. Therefore, if that 617 * obj->write_domain matches the request read_domains, we are 618 * already in the same read/write domain and can skip the operation, 619 * without having to further check the requested write_domain. 620 */ 621 if (READ_ONCE(obj->write_domain) == read_domains) 622 goto out_unpin; 623 624 if (read_domains & I915_GEM_DOMAIN_WC) 625 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 626 else if (read_domains & I915_GEM_DOMAIN_GTT) 627 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 628 else 629 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 630 631 out_unpin: 632 i915_gem_object_unpin_pages(obj); 633 634 out_unlock: 635 i915_gem_object_unlock(obj); 636 637 if (!err && write_domain) 638 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 639 640 out: 641 i915_gem_object_put(obj); 642 return err; 643 } 644 645 /* 646 * Pins the specified object's pages and synchronizes the object with 647 * GPU accesses. Sets needs_clflush to non-zero if the caller should 648 * flush the object from the CPU cache. 649 */ 650 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 651 unsigned int *needs_clflush) 652 { 653 int ret; 654 655 *needs_clflush = 0; 656 if (!i915_gem_object_has_struct_page(obj)) 657 return -ENODEV; 658 659 assert_object_held(obj); 660 661 ret = i915_gem_object_wait(obj, 662 I915_WAIT_INTERRUPTIBLE, 663 MAX_SCHEDULE_TIMEOUT); 664 if (ret) 665 return ret; 666 667 ret = i915_gem_object_pin_pages(obj); 668 if (ret) 669 return ret; 670 671 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 672 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 673 ret = i915_gem_object_set_to_cpu_domain(obj, false); 674 if (ret) 675 goto err_unpin; 676 else 677 goto out; 678 } 679 680 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 681 682 /* If we're not in the cpu read domain, set ourself into the gtt 683 * read domain and manually flush cachelines (if required). This 684 * optimizes for the case when the gpu will dirty the data 685 * anyway again before the next pread happens. 686 */ 687 if (!obj->cache_dirty && 688 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 689 *needs_clflush = CLFLUSH_BEFORE; 690 691 out: 692 /* return with the pages pinned */ 693 return 0; 694 695 err_unpin: 696 i915_gem_object_unpin_pages(obj); 697 return ret; 698 } 699 700 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 701 unsigned int *needs_clflush) 702 { 703 int ret; 704 705 *needs_clflush = 0; 706 if (!i915_gem_object_has_struct_page(obj)) 707 return -ENODEV; 708 709 assert_object_held(obj); 710 711 ret = i915_gem_object_wait(obj, 712 I915_WAIT_INTERRUPTIBLE | 713 I915_WAIT_ALL, 714 MAX_SCHEDULE_TIMEOUT); 715 if (ret) 716 return ret; 717 718 ret = i915_gem_object_pin_pages(obj); 719 if (ret) 720 return ret; 721 722 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 723 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 724 ret = i915_gem_object_set_to_cpu_domain(obj, true); 725 if (ret) 726 goto err_unpin; 727 else 728 goto out; 729 } 730 731 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 732 733 /* If we're not in the cpu write domain, set ourself into the 734 * gtt write domain and manually flush cachelines (as required). 735 * This optimizes for the case when the gpu will use the data 736 * right away and we therefore have to clflush anyway. 737 */ 738 if (!obj->cache_dirty) { 739 *needs_clflush |= CLFLUSH_AFTER; 740 741 /* 742 * Same trick applies to invalidate partially written 743 * cachelines read before writing. 744 */ 745 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 746 *needs_clflush |= CLFLUSH_BEFORE; 747 } 748 749 out: 750 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 751 obj->mm.dirty = true; 752 /* return with the pages pinned */ 753 return 0; 754 755 err_unpin: 756 i915_gem_object_unpin_pages(obj); 757 return ret; 758 } 759