1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "display/intel_frontbuffer.h" 9 #include "gt/intel_gt.h" 10 11 #include "i915_drv.h" 12 #include "i915_gem_clflush.h" 13 #include "i915_gem_domain.h" 14 #include "i915_gem_gtt.h" 15 #include "i915_gem_ioctls.h" 16 #include "i915_gem_lmem.h" 17 #include "i915_gem_mman.h" 18 #include "i915_gem_object.h" 19 #include "i915_vma.h" 20 21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 22 23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 24 { 25 struct drm_i915_private *i915 = to_i915(obj->base.dev); 26 27 if (IS_DGFX(i915)) 28 return false; 29 30 /* 31 * For objects created by userspace through GEM_CREATE with pat_index 32 * set by set_pat extension, i915_gem_object_has_cache_level() will 33 * always return true, because the coherency of such object is managed 34 * by userspace. Othereise the call here would fall back to checking 35 * whether the object is un-cached or write-through. 36 */ 37 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 38 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 39 } 40 41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 42 { 43 struct drm_i915_private *i915 = to_i915(obj->base.dev); 44 45 if (obj->cache_dirty) 46 return false; 47 48 if (IS_DGFX(i915)) 49 return false; 50 51 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 52 return true; 53 54 /* Currently in use by HW (display engine)? Keep flushed. */ 55 return i915_gem_object_is_framebuffer(obj); 56 } 57 58 static void 59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 60 { 61 struct i915_vma *vma; 62 63 assert_object_held(obj); 64 65 if (!(obj->write_domain & flush_domains)) 66 return; 67 68 switch (obj->write_domain) { 69 case I915_GEM_DOMAIN_GTT: 70 spin_lock(&obj->vma.lock); 71 for_each_ggtt_vma(vma, obj) { 72 if (i915_vma_unset_ggtt_write(vma)) 73 intel_gt_flush_ggtt_writes(vma->vm->gt); 74 } 75 spin_unlock(&obj->vma.lock); 76 77 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 78 break; 79 80 case I915_GEM_DOMAIN_WC: 81 wmb(); 82 break; 83 84 case I915_GEM_DOMAIN_CPU: 85 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 86 break; 87 88 case I915_GEM_DOMAIN_RENDER: 89 if (gpu_write_needs_clflush(obj)) 90 obj->cache_dirty = true; 91 break; 92 } 93 94 obj->write_domain = 0; 95 } 96 97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 98 { 99 /* 100 * We manually flush the CPU domain so that we can override and 101 * force the flush for the display, and perform it asyncrhonously. 102 */ 103 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 104 if (obj->cache_dirty) 105 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 106 obj->write_domain = 0; 107 } 108 109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 110 { 111 if (!i915_gem_object_is_framebuffer(obj)) 112 return; 113 114 i915_gem_object_lock(obj, NULL); 115 __i915_gem_object_flush_for_display(obj); 116 i915_gem_object_unlock(obj); 117 } 118 119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 120 { 121 if (i915_gem_object_is_framebuffer(obj)) 122 __i915_gem_object_flush_for_display(obj); 123 } 124 125 /** 126 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 127 * possibly write domain. 128 * @obj: object to act on 129 * @write: ask for write access or read only 130 * 131 * This function returns when the move is complete, including waiting on 132 * flushes to occur. 133 */ 134 int 135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 136 { 137 int ret; 138 139 assert_object_held(obj); 140 141 ret = i915_gem_object_wait(obj, 142 I915_WAIT_INTERRUPTIBLE | 143 (write ? I915_WAIT_ALL : 0), 144 MAX_SCHEDULE_TIMEOUT); 145 if (ret) 146 return ret; 147 148 if (obj->write_domain == I915_GEM_DOMAIN_WC) 149 return 0; 150 151 /* Flush and acquire obj->pages so that we are coherent through 152 * direct access in memory with previous cached writes through 153 * shmemfs and that our cache domain tracking remains valid. 154 * For example, if the obj->filp was moved to swap without us 155 * being notified and releasing the pages, we would mistakenly 156 * continue to assume that the obj remained out of the CPU cached 157 * domain. 158 */ 159 ret = i915_gem_object_pin_pages(obj); 160 if (ret) 161 return ret; 162 163 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 164 165 /* Serialise direct access to this object with the barriers for 166 * coherent writes from the GPU, by effectively invalidating the 167 * WC domain upon first access. 168 */ 169 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 170 mb(); 171 172 /* It should now be out of any other write domains, and we can update 173 * the domain values for our changes. 174 */ 175 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 176 obj->read_domains |= I915_GEM_DOMAIN_WC; 177 if (write) { 178 obj->read_domains = I915_GEM_DOMAIN_WC; 179 obj->write_domain = I915_GEM_DOMAIN_WC; 180 obj->mm.dirty = true; 181 } 182 183 i915_gem_object_unpin_pages(obj); 184 return 0; 185 } 186 187 /** 188 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 189 * and possibly write domain. 190 * @obj: object to act on 191 * @write: ask for write access or read only 192 * 193 * This function returns when the move is complete, including waiting on 194 * flushes to occur. 195 */ 196 int 197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 198 { 199 int ret; 200 201 assert_object_held(obj); 202 203 ret = i915_gem_object_wait(obj, 204 I915_WAIT_INTERRUPTIBLE | 205 (write ? I915_WAIT_ALL : 0), 206 MAX_SCHEDULE_TIMEOUT); 207 if (ret) 208 return ret; 209 210 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 211 return 0; 212 213 /* Flush and acquire obj->pages so that we are coherent through 214 * direct access in memory with previous cached writes through 215 * shmemfs and that our cache domain tracking remains valid. 216 * For example, if the obj->filp was moved to swap without us 217 * being notified and releasing the pages, we would mistakenly 218 * continue to assume that the obj remained out of the CPU cached 219 * domain. 220 */ 221 ret = i915_gem_object_pin_pages(obj); 222 if (ret) 223 return ret; 224 225 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 226 227 /* Serialise direct access to this object with the barriers for 228 * coherent writes from the GPU, by effectively invalidating the 229 * GTT domain upon first access. 230 */ 231 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 232 mb(); 233 234 /* It should now be out of any other write domains, and we can update 235 * the domain values for our changes. 236 */ 237 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 238 obj->read_domains |= I915_GEM_DOMAIN_GTT; 239 if (write) { 240 struct i915_vma *vma; 241 242 obj->read_domains = I915_GEM_DOMAIN_GTT; 243 obj->write_domain = I915_GEM_DOMAIN_GTT; 244 obj->mm.dirty = true; 245 246 spin_lock(&obj->vma.lock); 247 for_each_ggtt_vma(vma, obj) 248 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 249 i915_vma_set_ggtt_write(vma); 250 spin_unlock(&obj->vma.lock); 251 } 252 253 i915_gem_object_unpin_pages(obj); 254 return 0; 255 } 256 257 /** 258 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 259 * @obj: object to act on 260 * @cache_level: new cache level to set for the object 261 * 262 * After this function returns, the object will be in the new cache-level 263 * across all GTT and the contents of the backing storage will be coherent, 264 * with respect to the new cache-level. In order to keep the backing storage 265 * coherent for all users, we only allow a single cache level to be set 266 * globally on the object and prevent it from being changed whilst the 267 * hardware is reading from the object. That is if the object is currently 268 * on the scanout it will be set to uncached (or equivalent display 269 * cache coherency) and all non-MOCS GPU access will also be uncached so 270 * that all direct access to the scanout remains coherent. 271 */ 272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 273 enum i915_cache_level cache_level) 274 { 275 int ret; 276 277 /* 278 * For objects created by userspace through GEM_CREATE with pat_index 279 * set by set_pat extension, simply return 0 here without touching 280 * the cache setting, because such objects should have an immutable 281 * cache setting by desgin and always managed by userspace. 282 */ 283 if (i915_gem_object_has_cache_level(obj, cache_level)) 284 return 0; 285 286 ret = i915_gem_object_wait(obj, 287 I915_WAIT_INTERRUPTIBLE | 288 I915_WAIT_ALL, 289 MAX_SCHEDULE_TIMEOUT); 290 if (ret) 291 return ret; 292 293 /* Always invalidate stale cachelines */ 294 i915_gem_object_set_cache_coherency(obj, cache_level); 295 obj->cache_dirty = true; 296 297 /* The cache-level will be applied when each vma is rebound. */ 298 return i915_gem_object_unbind(obj, 299 I915_GEM_OBJECT_UNBIND_ACTIVE | 300 I915_GEM_OBJECT_UNBIND_BARRIER); 301 } 302 303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 304 struct drm_file *file) 305 { 306 struct drm_i915_gem_caching *args = data; 307 struct drm_i915_gem_object *obj; 308 int err = 0; 309 310 if (IS_DGFX(to_i915(dev))) 311 return -ENODEV; 312 313 rcu_read_lock(); 314 obj = i915_gem_object_lookup_rcu(file, args->handle); 315 if (!obj) { 316 err = -ENOENT; 317 goto out; 318 } 319 320 /* 321 * This ioctl should be disabled for the objects with pat_index 322 * set by user space. 323 */ 324 if (obj->pat_set_by_user) { 325 err = -EOPNOTSUPP; 326 goto out; 327 } 328 329 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 330 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 331 args->caching = I915_CACHING_CACHED; 332 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 333 args->caching = I915_CACHING_DISPLAY; 334 else 335 args->caching = I915_CACHING_NONE; 336 out: 337 rcu_read_unlock(); 338 return err; 339 } 340 341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 342 struct drm_file *file) 343 { 344 struct drm_i915_private *i915 = to_i915(dev); 345 struct drm_i915_gem_caching *args = data; 346 struct drm_i915_gem_object *obj; 347 enum i915_cache_level level; 348 int ret = 0; 349 350 if (IS_DGFX(i915)) 351 return -ENODEV; 352 353 switch (args->caching) { 354 case I915_CACHING_NONE: 355 level = I915_CACHE_NONE; 356 break; 357 case I915_CACHING_CACHED: 358 /* 359 * Due to a HW issue on BXT A stepping, GPU stores via a 360 * snooped mapping may leave stale data in a corresponding CPU 361 * cacheline, whereas normally such cachelines would get 362 * invalidated. 363 */ 364 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 365 return -ENODEV; 366 367 level = I915_CACHE_LLC; 368 break; 369 case I915_CACHING_DISPLAY: 370 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 371 break; 372 default: 373 return -EINVAL; 374 } 375 376 obj = i915_gem_object_lookup(file, args->handle); 377 if (!obj) 378 return -ENOENT; 379 380 /* 381 * This ioctl should be disabled for the objects with pat_index 382 * set by user space. 383 */ 384 if (obj->pat_set_by_user) { 385 ret = -EOPNOTSUPP; 386 goto out; 387 } 388 389 /* 390 * The caching mode of proxy object is handled by its generator, and 391 * not allowed to be changed by userspace. 392 */ 393 if (i915_gem_object_is_proxy(obj)) { 394 /* 395 * Silently allow cached for userptr; the vulkan driver 396 * sets all objects to cached 397 */ 398 if (!i915_gem_object_is_userptr(obj) || 399 args->caching != I915_CACHING_CACHED) 400 ret = -ENXIO; 401 402 goto out; 403 } 404 405 ret = i915_gem_object_lock_interruptible(obj, NULL); 406 if (ret) 407 goto out; 408 409 ret = i915_gem_object_set_cache_level(obj, level); 410 i915_gem_object_unlock(obj); 411 412 out: 413 i915_gem_object_put(obj); 414 return ret; 415 } 416 417 /* 418 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 419 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 420 * (for pageflips). We only flush the caches while preparing the buffer for 421 * display, the callers are responsible for frontbuffer flush. 422 */ 423 struct i915_vma * 424 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 425 struct i915_gem_ww_ctx *ww, 426 u32 alignment, 427 const struct i915_gtt_view *view, 428 unsigned int flags) 429 { 430 struct drm_i915_private *i915 = to_i915(obj->base.dev); 431 struct i915_vma *vma; 432 int ret; 433 434 /* Frame buffer must be in LMEM */ 435 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 436 return ERR_PTR(-EINVAL); 437 438 /* 439 * The display engine is not coherent with the LLC cache on gen6. As 440 * a result, we make sure that the pinning that is about to occur is 441 * done with uncached PTEs. This is lowest common denominator for all 442 * chipsets. 443 * 444 * However for gen6+, we could do better by using the GFDT bit instead 445 * of uncaching, which would allow us to flush all the LLC-cached data 446 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 447 */ 448 ret = i915_gem_object_set_cache_level(obj, 449 HAS_WT(i915) ? 450 I915_CACHE_WT : I915_CACHE_NONE); 451 if (ret) 452 return ERR_PTR(ret); 453 454 /* VT-d may overfetch before/after the vma, so pad with scratch */ 455 if (intel_scanout_needs_vtd_wa(i915)) { 456 unsigned int guard = VTD_GUARD; 457 458 if (i915_gem_object_is_tiled(obj)) 459 guard = max(guard, 460 i915_gem_object_get_tile_row_size(obj)); 461 462 flags |= PIN_OFFSET_GUARD | guard; 463 } 464 465 /* 466 * As the user may map the buffer once pinned in the display plane 467 * (e.g. libkms for the bootup splash), we have to ensure that we 468 * always use map_and_fenceable for all scanout buffers. However, 469 * it may simply be too big to fit into mappable, in which case 470 * put it anyway and hope that userspace can cope (but always first 471 * try to preserve the existing ABI). 472 */ 473 vma = ERR_PTR(-ENOSPC); 474 if ((flags & PIN_MAPPABLE) == 0 && 475 (!view || view->type == I915_GTT_VIEW_NORMAL)) 476 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 477 flags | PIN_MAPPABLE | 478 PIN_NONBLOCK); 479 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 480 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 481 alignment, flags); 482 if (IS_ERR(vma)) 483 return vma; 484 485 vma->display_alignment = max(vma->display_alignment, alignment); 486 i915_vma_mark_scanout(vma); 487 488 i915_gem_object_flush_if_display_locked(obj); 489 490 return vma; 491 } 492 493 /** 494 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 495 * and possibly write domain. 496 * @obj: object to act on 497 * @write: requesting write or read-only access 498 * 499 * This function returns when the move is complete, including waiting on 500 * flushes to occur. 501 */ 502 int 503 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 504 { 505 int ret; 506 507 assert_object_held(obj); 508 509 ret = i915_gem_object_wait(obj, 510 I915_WAIT_INTERRUPTIBLE | 511 (write ? I915_WAIT_ALL : 0), 512 MAX_SCHEDULE_TIMEOUT); 513 if (ret) 514 return ret; 515 516 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 517 518 /* Flush the CPU cache if it's still invalid. */ 519 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 520 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 521 obj->read_domains |= I915_GEM_DOMAIN_CPU; 522 } 523 524 /* It should now be out of any other write domains, and we can update 525 * the domain values for our changes. 526 */ 527 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 528 529 /* If we're writing through the CPU, then the GPU read domains will 530 * need to be invalidated at next use. 531 */ 532 if (write) 533 __start_cpu_write(obj); 534 535 return 0; 536 } 537 538 /** 539 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 540 * object with the CPU, either 541 * through the mmap ioctl's mapping or a GTT mapping. 542 * @dev: drm device 543 * @data: ioctl data blob 544 * @file: drm file 545 */ 546 int 547 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 548 struct drm_file *file) 549 { 550 struct drm_i915_gem_set_domain *args = data; 551 struct drm_i915_gem_object *obj; 552 u32 read_domains = args->read_domains; 553 u32 write_domain = args->write_domain; 554 int err; 555 556 if (IS_DGFX(to_i915(dev))) 557 return -ENODEV; 558 559 /* Only handle setting domains to types used by the CPU. */ 560 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 561 return -EINVAL; 562 563 /* 564 * Having something in the write domain implies it's in the read 565 * domain, and only that read domain. Enforce that in the request. 566 */ 567 if (write_domain && read_domains != write_domain) 568 return -EINVAL; 569 570 if (!read_domains) 571 return 0; 572 573 obj = i915_gem_object_lookup(file, args->handle); 574 if (!obj) 575 return -ENOENT; 576 577 /* 578 * Try to flush the object off the GPU without holding the lock. 579 * We will repeat the flush holding the lock in the normal manner 580 * to catch cases where we are gazumped. 581 */ 582 err = i915_gem_object_wait(obj, 583 I915_WAIT_INTERRUPTIBLE | 584 I915_WAIT_PRIORITY | 585 (write_domain ? I915_WAIT_ALL : 0), 586 MAX_SCHEDULE_TIMEOUT); 587 if (err) 588 goto out; 589 590 if (i915_gem_object_is_userptr(obj)) { 591 /* 592 * Try to grab userptr pages, iris uses set_domain to check 593 * userptr validity 594 */ 595 err = i915_gem_object_userptr_validate(obj); 596 if (!err) 597 err = i915_gem_object_wait(obj, 598 I915_WAIT_INTERRUPTIBLE | 599 I915_WAIT_PRIORITY | 600 (write_domain ? I915_WAIT_ALL : 0), 601 MAX_SCHEDULE_TIMEOUT); 602 goto out; 603 } 604 605 /* 606 * Proxy objects do not control access to the backing storage, ergo 607 * they cannot be used as a means to manipulate the cache domain 608 * tracking for that backing storage. The proxy object is always 609 * considered to be outside of any cache domain. 610 */ 611 if (i915_gem_object_is_proxy(obj)) { 612 err = -ENXIO; 613 goto out; 614 } 615 616 err = i915_gem_object_lock_interruptible(obj, NULL); 617 if (err) 618 goto out; 619 620 /* 621 * Flush and acquire obj->pages so that we are coherent through 622 * direct access in memory with previous cached writes through 623 * shmemfs and that our cache domain tracking remains valid. 624 * For example, if the obj->filp was moved to swap without us 625 * being notified and releasing the pages, we would mistakenly 626 * continue to assume that the obj remained out of the CPU cached 627 * domain. 628 */ 629 err = i915_gem_object_pin_pages(obj); 630 if (err) 631 goto out_unlock; 632 633 /* 634 * Already in the desired write domain? Nothing for us to do! 635 * 636 * We apply a little bit of cunning here to catch a broader set of 637 * no-ops. If obj->write_domain is set, we must be in the same 638 * obj->read_domains, and only that domain. Therefore, if that 639 * obj->write_domain matches the request read_domains, we are 640 * already in the same read/write domain and can skip the operation, 641 * without having to further check the requested write_domain. 642 */ 643 if (READ_ONCE(obj->write_domain) == read_domains) 644 goto out_unpin; 645 646 if (read_domains & I915_GEM_DOMAIN_WC) 647 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 648 else if (read_domains & I915_GEM_DOMAIN_GTT) 649 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 650 else 651 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 652 653 out_unpin: 654 i915_gem_object_unpin_pages(obj); 655 656 out_unlock: 657 i915_gem_object_unlock(obj); 658 659 if (!err && write_domain) 660 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 661 662 out: 663 i915_gem_object_put(obj); 664 return err; 665 } 666 667 /* 668 * Pins the specified object's pages and synchronizes the object with 669 * GPU accesses. Sets needs_clflush to non-zero if the caller should 670 * flush the object from the CPU cache. 671 */ 672 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 673 unsigned int *needs_clflush) 674 { 675 int ret; 676 677 *needs_clflush = 0; 678 if (!i915_gem_object_has_struct_page(obj)) 679 return -ENODEV; 680 681 assert_object_held(obj); 682 683 ret = i915_gem_object_wait(obj, 684 I915_WAIT_INTERRUPTIBLE, 685 MAX_SCHEDULE_TIMEOUT); 686 if (ret) 687 return ret; 688 689 ret = i915_gem_object_pin_pages(obj); 690 if (ret) 691 return ret; 692 693 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 694 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 695 ret = i915_gem_object_set_to_cpu_domain(obj, false); 696 if (ret) 697 goto err_unpin; 698 else 699 goto out; 700 } 701 702 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 703 704 /* If we're not in the cpu read domain, set ourself into the gtt 705 * read domain and manually flush cachelines (if required). This 706 * optimizes for the case when the gpu will dirty the data 707 * anyway again before the next pread happens. 708 */ 709 if (!obj->cache_dirty && 710 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 711 *needs_clflush = CLFLUSH_BEFORE; 712 713 out: 714 /* return with the pages pinned */ 715 return 0; 716 717 err_unpin: 718 i915_gem_object_unpin_pages(obj); 719 return ret; 720 } 721 722 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 723 unsigned int *needs_clflush) 724 { 725 int ret; 726 727 *needs_clflush = 0; 728 if (!i915_gem_object_has_struct_page(obj)) 729 return -ENODEV; 730 731 assert_object_held(obj); 732 733 ret = i915_gem_object_wait(obj, 734 I915_WAIT_INTERRUPTIBLE | 735 I915_WAIT_ALL, 736 MAX_SCHEDULE_TIMEOUT); 737 if (ret) 738 return ret; 739 740 ret = i915_gem_object_pin_pages(obj); 741 if (ret) 742 return ret; 743 744 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 745 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 746 ret = i915_gem_object_set_to_cpu_domain(obj, true); 747 if (ret) 748 goto err_unpin; 749 else 750 goto out; 751 } 752 753 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 754 755 /* If we're not in the cpu write domain, set ourself into the 756 * gtt write domain and manually flush cachelines (as required). 757 * This optimizes for the case when the gpu will use the data 758 * right away and we therefore have to clflush anyway. 759 */ 760 if (!obj->cache_dirty) { 761 *needs_clflush |= CLFLUSH_AFTER; 762 763 /* 764 * Same trick applies to invalidate partially written 765 * cachelines read before writing. 766 */ 767 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 768 *needs_clflush |= CLFLUSH_BEFORE; 769 } 770 771 out: 772 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 773 obj->mm.dirty = true; 774 /* return with the pages pinned */ 775 return 0; 776 777 err_unpin: 778 i915_gem_object_unpin_pages(obj); 779 return ret; 780 } 781