1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "display/intel_frontbuffer.h" 9 #include "gt/intel_gt.h" 10 11 #include "i915_drv.h" 12 #include "i915_gem_clflush.h" 13 #include "i915_gem_domain.h" 14 #include "i915_gem_gtt.h" 15 #include "i915_gem_ioctls.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 #include "i915_gem_object.h" 19 #include "i915_vma.h" 20 21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 22 23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 24 { 25 struct drm_i915_private *i915 = to_i915(obj->base.dev); 26 27 if (IS_DGFX(i915)) 28 return false; 29 30 /* 31 * For objects created by userspace through GEM_CREATE with pat_index 32 * set by set_pat extension, i915_gem_object_has_cache_level() will 33 * always return true, because the coherency of such object is managed 34 * by userspace. Othereise the call here would fall back to checking 35 * whether the object is un-cached or write-through. 36 */ 37 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 38 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 39 } 40 41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 42 { 43 struct drm_i915_private *i915 = to_i915(obj->base.dev); 44 45 if (obj->cache_dirty) 46 return false; 47 48 if (IS_DGFX(i915)) 49 return false; 50 51 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 52 return true; 53 54 /* Currently in use by HW (display engine)? Keep flushed. */ 55 return i915_gem_object_is_framebuffer(obj); 56 } 57 58 static void 59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 60 { 61 struct i915_vma *vma; 62 63 assert_object_held(obj); 64 65 if (!(obj->write_domain & flush_domains)) 66 return; 67 68 switch (obj->write_domain) { 69 case I915_GEM_DOMAIN_GTT: 70 spin_lock(&obj->vma.lock); 71 for_each_ggtt_vma(vma, obj) { 72 if (i915_vma_unset_ggtt_write(vma)) 73 intel_gt_flush_ggtt_writes(vma->vm->gt); 74 } 75 spin_unlock(&obj->vma.lock); 76 77 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 78 break; 79 80 case I915_GEM_DOMAIN_WC: 81 wmb(); 82 break; 83 84 case I915_GEM_DOMAIN_CPU: 85 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 86 break; 87 88 case I915_GEM_DOMAIN_RENDER: 89 if (gpu_write_needs_clflush(obj)) 90 obj->cache_dirty = true; 91 break; 92 } 93 94 obj->write_domain = 0; 95 } 96 97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 98 { 99 /* 100 * We manually flush the CPU domain so that we can override and 101 * force the flush for the display, and perform it asyncrhonously. 102 */ 103 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 104 if (obj->cache_dirty) 105 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 106 obj->write_domain = 0; 107 } 108 109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 110 { 111 if (!i915_gem_object_is_framebuffer(obj)) 112 return; 113 114 i915_gem_object_lock(obj, NULL); 115 __i915_gem_object_flush_for_display(obj); 116 i915_gem_object_unlock(obj); 117 } 118 119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 120 { 121 if (i915_gem_object_is_framebuffer(obj)) 122 __i915_gem_object_flush_for_display(obj); 123 } 124 125 /** 126 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 127 * possibly write domain. 128 * @obj: object to act on 129 * @write: ask for write access or read only 130 * 131 * This function returns when the move is complete, including waiting on 132 * flushes to occur. 133 */ 134 int 135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 136 { 137 int ret; 138 139 assert_object_held(obj); 140 141 ret = i915_gem_object_wait(obj, 142 I915_WAIT_INTERRUPTIBLE | 143 (write ? I915_WAIT_ALL : 0), 144 MAX_SCHEDULE_TIMEOUT); 145 if (ret) 146 return ret; 147 148 if (obj->write_domain == I915_GEM_DOMAIN_WC) 149 return 0; 150 151 /* Flush and acquire obj->pages so that we are coherent through 152 * direct access in memory with previous cached writes through 153 * shmemfs and that our cache domain tracking remains valid. 154 * For example, if the obj->filp was moved to swap without us 155 * being notified and releasing the pages, we would mistakenly 156 * continue to assume that the obj remained out of the CPU cached 157 * domain. 158 */ 159 ret = i915_gem_object_pin_pages(obj); 160 if (ret) 161 return ret; 162 163 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 164 165 /* Serialise direct access to this object with the barriers for 166 * coherent writes from the GPU, by effectively invalidating the 167 * WC domain upon first access. 168 */ 169 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 170 mb(); 171 172 /* It should now be out of any other write domains, and we can update 173 * the domain values for our changes. 174 */ 175 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 176 obj->read_domains |= I915_GEM_DOMAIN_WC; 177 if (write) { 178 obj->read_domains = I915_GEM_DOMAIN_WC; 179 obj->write_domain = I915_GEM_DOMAIN_WC; 180 obj->mm.dirty = true; 181 } 182 183 i915_gem_object_unpin_pages(obj); 184 return 0; 185 } 186 187 /** 188 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 189 * and possibly write domain. 190 * @obj: object to act on 191 * @write: ask for write access or read only 192 * 193 * This function returns when the move is complete, including waiting on 194 * flushes to occur. 195 */ 196 int 197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 198 { 199 int ret; 200 201 assert_object_held(obj); 202 203 ret = i915_gem_object_wait(obj, 204 I915_WAIT_INTERRUPTIBLE | 205 (write ? I915_WAIT_ALL : 0), 206 MAX_SCHEDULE_TIMEOUT); 207 if (ret) 208 return ret; 209 210 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 211 return 0; 212 213 /* Flush and acquire obj->pages so that we are coherent through 214 * direct access in memory with previous cached writes through 215 * shmemfs and that our cache domain tracking remains valid. 216 * For example, if the obj->filp was moved to swap without us 217 * being notified and releasing the pages, we would mistakenly 218 * continue to assume that the obj remained out of the CPU cached 219 * domain. 220 */ 221 ret = i915_gem_object_pin_pages(obj); 222 if (ret) 223 return ret; 224 225 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 226 227 /* Serialise direct access to this object with the barriers for 228 * coherent writes from the GPU, by effectively invalidating the 229 * GTT domain upon first access. 230 */ 231 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 232 mb(); 233 234 /* It should now be out of any other write domains, and we can update 235 * the domain values for our changes. 236 */ 237 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 238 obj->read_domains |= I915_GEM_DOMAIN_GTT; 239 if (write) { 240 struct i915_vma *vma; 241 242 obj->read_domains = I915_GEM_DOMAIN_GTT; 243 obj->write_domain = I915_GEM_DOMAIN_GTT; 244 obj->mm.dirty = true; 245 246 spin_lock(&obj->vma.lock); 247 for_each_ggtt_vma(vma, obj) 248 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 249 i915_vma_set_ggtt_write(vma); 250 spin_unlock(&obj->vma.lock); 251 } 252 253 i915_gem_object_unpin_pages(obj); 254 return 0; 255 } 256 257 /** 258 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 259 * @obj: object to act on 260 * @cache_level: new cache level to set for the object 261 * 262 * After this function returns, the object will be in the new cache-level 263 * across all GTT and the contents of the backing storage will be coherent, 264 * with respect to the new cache-level. In order to keep the backing storage 265 * coherent for all users, we only allow a single cache level to be set 266 * globally on the object and prevent it from being changed whilst the 267 * hardware is reading from the object. That is if the object is currently 268 * on the scanout it will be set to uncached (or equivalent display 269 * cache coherency) and all non-MOCS GPU access will also be uncached so 270 * that all direct access to the scanout remains coherent. 271 */ 272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 273 enum i915_cache_level cache_level) 274 { 275 int ret; 276 277 /* 278 * For objects created by userspace through GEM_CREATE with pat_index 279 * set by set_pat extension, simply return 0 here without touching 280 * the cache setting, because such objects should have an immutable 281 * cache setting by desgin and always managed by userspace. 282 */ 283 if (i915_gem_object_has_cache_level(obj, cache_level)) 284 return 0; 285 286 ret = i915_gem_object_wait(obj, 287 I915_WAIT_INTERRUPTIBLE | 288 I915_WAIT_ALL, 289 MAX_SCHEDULE_TIMEOUT); 290 if (ret) 291 return ret; 292 293 /* Always invalidate stale cachelines */ 294 i915_gem_object_set_cache_coherency(obj, cache_level); 295 obj->cache_dirty = true; 296 297 /* The cache-level will be applied when each vma is rebound. */ 298 return i915_gem_object_unbind(obj, 299 I915_GEM_OBJECT_UNBIND_ACTIVE | 300 I915_GEM_OBJECT_UNBIND_BARRIER); 301 } 302 303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 304 struct drm_file *file) 305 { 306 struct drm_i915_gem_caching *args = data; 307 struct drm_i915_gem_object *obj; 308 int err = 0; 309 310 if (IS_DGFX(to_i915(dev))) 311 return -ENODEV; 312 313 rcu_read_lock(); 314 obj = i915_gem_object_lookup_rcu(file, args->handle); 315 if (!obj) { 316 err = -ENOENT; 317 goto out; 318 } 319 320 /* 321 * This ioctl should be disabled for the objects with pat_index 322 * set by user space. 323 */ 324 if (obj->pat_set_by_user) { 325 err = -EOPNOTSUPP; 326 goto out; 327 } 328 329 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 330 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 331 args->caching = I915_CACHING_CACHED; 332 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 333 args->caching = I915_CACHING_DISPLAY; 334 else 335 args->caching = I915_CACHING_NONE; 336 out: 337 rcu_read_unlock(); 338 return err; 339 } 340 341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 342 struct drm_file *file) 343 { 344 struct drm_i915_private *i915 = to_i915(dev); 345 struct drm_i915_gem_caching *args = data; 346 struct drm_i915_gem_object *obj; 347 enum i915_cache_level level; 348 int ret = 0; 349 350 if (IS_DGFX(i915)) 351 return -ENODEV; 352 353 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 354 return -EOPNOTSUPP; 355 356 switch (args->caching) { 357 case I915_CACHING_NONE: 358 level = I915_CACHE_NONE; 359 break; 360 case I915_CACHING_CACHED: 361 /* 362 * Due to a HW issue on BXT A stepping, GPU stores via a 363 * snooped mapping may leave stale data in a corresponding CPU 364 * cacheline, whereas normally such cachelines would get 365 * invalidated. 366 */ 367 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 368 return -ENODEV; 369 370 level = I915_CACHE_LLC; 371 break; 372 case I915_CACHING_DISPLAY: 373 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 374 break; 375 default: 376 return -EINVAL; 377 } 378 379 obj = i915_gem_object_lookup(file, args->handle); 380 if (!obj) 381 return -ENOENT; 382 383 /* 384 * This ioctl should be disabled for the objects with pat_index 385 * set by user space. 386 */ 387 if (obj->pat_set_by_user) { 388 ret = -EOPNOTSUPP; 389 goto out; 390 } 391 392 /* 393 * The caching mode of proxy object is handled by its generator, and 394 * not allowed to be changed by userspace. 395 */ 396 if (i915_gem_object_is_proxy(obj)) { 397 /* 398 * Silently allow cached for userptr; the vulkan driver 399 * sets all objects to cached 400 */ 401 if (!i915_gem_object_is_userptr(obj) || 402 args->caching != I915_CACHING_CACHED) 403 ret = -ENXIO; 404 405 goto out; 406 } 407 408 ret = i915_gem_object_lock_interruptible(obj, NULL); 409 if (ret) 410 goto out; 411 412 ret = i915_gem_object_set_cache_level(obj, level); 413 i915_gem_object_unlock(obj); 414 415 out: 416 i915_gem_object_put(obj); 417 return ret; 418 } 419 420 /* 421 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 422 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 423 * (for pageflips). We only flush the caches while preparing the buffer for 424 * display, the callers are responsible for frontbuffer flush. 425 */ 426 struct i915_vma * 427 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 428 struct i915_gem_ww_ctx *ww, 429 u32 alignment, 430 const struct i915_gtt_view *view, 431 unsigned int flags) 432 { 433 struct drm_i915_private *i915 = to_i915(obj->base.dev); 434 struct i915_vma *vma; 435 int ret; 436 437 /* Frame buffer must be in LMEM */ 438 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 439 return ERR_PTR(-EINVAL); 440 441 /* 442 * The display engine is not coherent with the LLC cache on gen6. As 443 * a result, we make sure that the pinning that is about to occur is 444 * done with uncached PTEs. This is lowest common denominator for all 445 * chipsets. 446 * 447 * However for gen6+, we could do better by using the GFDT bit instead 448 * of uncaching, which would allow us to flush all the LLC-cached data 449 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 450 */ 451 ret = i915_gem_object_set_cache_level(obj, 452 HAS_WT(i915) ? 453 I915_CACHE_WT : I915_CACHE_NONE); 454 if (ret) 455 return ERR_PTR(ret); 456 457 /* VT-d may overfetch before/after the vma, so pad with scratch */ 458 if (intel_scanout_needs_vtd_wa(i915)) { 459 unsigned int guard = VTD_GUARD; 460 461 if (i915_gem_object_is_tiled(obj)) 462 guard = max(guard, 463 i915_gem_object_get_tile_row_size(obj)); 464 465 flags |= PIN_OFFSET_GUARD | guard; 466 } 467 468 /* 469 * As the user may map the buffer once pinned in the display plane 470 * (e.g. libkms for the bootup splash), we have to ensure that we 471 * always use map_and_fenceable for all scanout buffers. However, 472 * it may simply be too big to fit into mappable, in which case 473 * put it anyway and hope that userspace can cope (but always first 474 * try to preserve the existing ABI). 475 */ 476 vma = ERR_PTR(-ENOSPC); 477 if ((flags & PIN_MAPPABLE) == 0 && 478 (!view || view->type == I915_GTT_VIEW_NORMAL)) 479 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 480 flags | PIN_MAPPABLE | 481 PIN_NONBLOCK); 482 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 483 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 484 alignment, flags); 485 if (IS_ERR(vma)) 486 return vma; 487 488 vma->display_alignment = max(vma->display_alignment, alignment); 489 i915_vma_mark_scanout(vma); 490 491 i915_gem_object_flush_if_display_locked(obj); 492 493 return vma; 494 } 495 496 /** 497 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 498 * and possibly write domain. 499 * @obj: object to act on 500 * @write: requesting write or read-only access 501 * 502 * This function returns when the move is complete, including waiting on 503 * flushes to occur. 504 */ 505 int 506 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 507 { 508 int ret; 509 510 assert_object_held(obj); 511 512 ret = i915_gem_object_wait(obj, 513 I915_WAIT_INTERRUPTIBLE | 514 (write ? I915_WAIT_ALL : 0), 515 MAX_SCHEDULE_TIMEOUT); 516 if (ret) 517 return ret; 518 519 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 520 521 /* Flush the CPU cache if it's still invalid. */ 522 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 523 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 524 obj->read_domains |= I915_GEM_DOMAIN_CPU; 525 } 526 527 /* It should now be out of any other write domains, and we can update 528 * the domain values for our changes. 529 */ 530 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 531 532 /* If we're writing through the CPU, then the GPU read domains will 533 * need to be invalidated at next use. 534 */ 535 if (write) 536 __start_cpu_write(obj); 537 538 return 0; 539 } 540 541 /** 542 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 543 * object with the CPU, either 544 * through the mmap ioctl's mapping or a GTT mapping. 545 * @dev: drm device 546 * @data: ioctl data blob 547 * @file: drm file 548 */ 549 int 550 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 551 struct drm_file *file) 552 { 553 struct drm_i915_gem_set_domain *args = data; 554 struct drm_i915_gem_object *obj; 555 u32 read_domains = args->read_domains; 556 u32 write_domain = args->write_domain; 557 int err; 558 559 if (IS_DGFX(to_i915(dev))) 560 return -ENODEV; 561 562 /* Only handle setting domains to types used by the CPU. */ 563 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 564 return -EINVAL; 565 566 /* 567 * Having something in the write domain implies it's in the read 568 * domain, and only that read domain. Enforce that in the request. 569 */ 570 if (write_domain && read_domains != write_domain) 571 return -EINVAL; 572 573 if (!read_domains) 574 return 0; 575 576 obj = i915_gem_object_lookup(file, args->handle); 577 if (!obj) 578 return -ENOENT; 579 580 /* 581 * Try to flush the object off the GPU without holding the lock. 582 * We will repeat the flush holding the lock in the normal manner 583 * to catch cases where we are gazumped. 584 */ 585 err = i915_gem_object_wait(obj, 586 I915_WAIT_INTERRUPTIBLE | 587 I915_WAIT_PRIORITY | 588 (write_domain ? I915_WAIT_ALL : 0), 589 MAX_SCHEDULE_TIMEOUT); 590 if (err) 591 goto out; 592 593 if (i915_gem_object_is_userptr(obj)) { 594 /* 595 * Try to grab userptr pages, iris uses set_domain to check 596 * userptr validity 597 */ 598 err = i915_gem_object_userptr_validate(obj); 599 if (!err) 600 err = i915_gem_object_wait(obj, 601 I915_WAIT_INTERRUPTIBLE | 602 I915_WAIT_PRIORITY | 603 (write_domain ? I915_WAIT_ALL : 0), 604 MAX_SCHEDULE_TIMEOUT); 605 goto out; 606 } 607 608 /* 609 * Proxy objects do not control access to the backing storage, ergo 610 * they cannot be used as a means to manipulate the cache domain 611 * tracking for that backing storage. The proxy object is always 612 * considered to be outside of any cache domain. 613 */ 614 if (i915_gem_object_is_proxy(obj)) { 615 err = -ENXIO; 616 goto out; 617 } 618 619 err = i915_gem_object_lock_interruptible(obj, NULL); 620 if (err) 621 goto out; 622 623 /* 624 * Flush and acquire obj->pages so that we are coherent through 625 * direct access in memory with previous cached writes through 626 * shmemfs and that our cache domain tracking remains valid. 627 * For example, if the obj->filp was moved to swap without us 628 * being notified and releasing the pages, we would mistakenly 629 * continue to assume that the obj remained out of the CPU cached 630 * domain. 631 */ 632 err = i915_gem_object_pin_pages(obj); 633 if (err) 634 goto out_unlock; 635 636 /* 637 * Already in the desired write domain? Nothing for us to do! 638 * 639 * We apply a little bit of cunning here to catch a broader set of 640 * no-ops. If obj->write_domain is set, we must be in the same 641 * obj->read_domains, and only that domain. Therefore, if that 642 * obj->write_domain matches the request read_domains, we are 643 * already in the same read/write domain and can skip the operation, 644 * without having to further check the requested write_domain. 645 */ 646 if (READ_ONCE(obj->write_domain) == read_domains) 647 goto out_unpin; 648 649 if (read_domains & I915_GEM_DOMAIN_WC) 650 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 651 else if (read_domains & I915_GEM_DOMAIN_GTT) 652 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 653 else 654 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 655 656 out_unpin: 657 i915_gem_object_unpin_pages(obj); 658 659 out_unlock: 660 i915_gem_object_unlock(obj); 661 662 if (!err && write_domain) 663 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 664 665 out: 666 i915_gem_object_put(obj); 667 return err; 668 } 669 670 /* 671 * Pins the specified object's pages and synchronizes the object with 672 * GPU accesses. Sets needs_clflush to non-zero if the caller should 673 * flush the object from the CPU cache. 674 */ 675 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 676 unsigned int *needs_clflush) 677 { 678 int ret; 679 680 *needs_clflush = 0; 681 if (!i915_gem_object_has_struct_page(obj)) 682 return -ENODEV; 683 684 assert_object_held(obj); 685 686 ret = i915_gem_object_wait(obj, 687 I915_WAIT_INTERRUPTIBLE, 688 MAX_SCHEDULE_TIMEOUT); 689 if (ret) 690 return ret; 691 692 ret = i915_gem_object_pin_pages(obj); 693 if (ret) 694 return ret; 695 696 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 697 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 698 ret = i915_gem_object_set_to_cpu_domain(obj, false); 699 if (ret) 700 goto err_unpin; 701 else 702 goto out; 703 } 704 705 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 706 707 /* If we're not in the cpu read domain, set ourself into the gtt 708 * read domain and manually flush cachelines (if required). This 709 * optimizes for the case when the gpu will dirty the data 710 * anyway again before the next pread happens. 711 */ 712 if (!obj->cache_dirty && 713 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 714 *needs_clflush = CLFLUSH_BEFORE; 715 716 out: 717 /* return with the pages pinned */ 718 return 0; 719 720 err_unpin: 721 i915_gem_object_unpin_pages(obj); 722 return ret; 723 } 724 725 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 726 unsigned int *needs_clflush) 727 { 728 int ret; 729 730 *needs_clflush = 0; 731 if (!i915_gem_object_has_struct_page(obj)) 732 return -ENODEV; 733 734 assert_object_held(obj); 735 736 ret = i915_gem_object_wait(obj, 737 I915_WAIT_INTERRUPTIBLE | 738 I915_WAIT_ALL, 739 MAX_SCHEDULE_TIMEOUT); 740 if (ret) 741 return ret; 742 743 ret = i915_gem_object_pin_pages(obj); 744 if (ret) 745 return ret; 746 747 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 748 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 749 ret = i915_gem_object_set_to_cpu_domain(obj, true); 750 if (ret) 751 goto err_unpin; 752 else 753 goto out; 754 } 755 756 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 757 758 /* If we're not in the cpu write domain, set ourself into the 759 * gtt write domain and manually flush cachelines (as required). 760 * This optimizes for the case when the gpu will use the data 761 * right away and we therefore have to clflush anyway. 762 */ 763 if (!obj->cache_dirty) { 764 *needs_clflush |= CLFLUSH_AFTER; 765 766 /* 767 * Same trick applies to invalidate partially written 768 * cachelines read before writing. 769 */ 770 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 771 *needs_clflush |= CLFLUSH_BEFORE; 772 } 773 774 out: 775 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 776 obj->mm.dirty = true; 777 /* return with the pages pinned */ 778 return 0; 779 780 err_unpin: 781 i915_gem_object_unpin_pages(obj); 782 return ret; 783 } 784