xref: /openbmc/linux/drivers/gpu/drm/i915/gem/i915_gem_domain.c (revision c0d3b83100c896e1b0909023df58a0ebdd428d61)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include "display/intel_frontbuffer.h"
8 #include "gt/intel_gt.h"
9 
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_domain.h"
13 #include "i915_gem_gtt.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17 #include "i915_gem_object.h"
18 #include "i915_vma.h"
19 
20 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
21 {
22 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
23 
24 	if (IS_DGFX(i915))
25 		return false;
26 
27 	return !(obj->cache_level == I915_CACHE_NONE ||
28 		 obj->cache_level == I915_CACHE_WT);
29 }
30 
31 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
32 {
33 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
34 
35 	if (obj->cache_dirty)
36 		return false;
37 
38 	if (IS_DGFX(i915))
39 		return false;
40 
41 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
42 		return true;
43 
44 	/* Currently in use by HW (display engine)? Keep flushed. */
45 	return i915_gem_object_is_framebuffer(obj);
46 }
47 
48 static void
49 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
50 {
51 	struct i915_vma *vma;
52 
53 	assert_object_held(obj);
54 
55 	if (!(obj->write_domain & flush_domains))
56 		return;
57 
58 	switch (obj->write_domain) {
59 	case I915_GEM_DOMAIN_GTT:
60 		spin_lock(&obj->vma.lock);
61 		for_each_ggtt_vma(vma, obj) {
62 			if (i915_vma_unset_ggtt_write(vma))
63 				intel_gt_flush_ggtt_writes(vma->vm->gt);
64 		}
65 		spin_unlock(&obj->vma.lock);
66 
67 		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
68 		break;
69 
70 	case I915_GEM_DOMAIN_WC:
71 		wmb();
72 		break;
73 
74 	case I915_GEM_DOMAIN_CPU:
75 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
76 		break;
77 
78 	case I915_GEM_DOMAIN_RENDER:
79 		if (gpu_write_needs_clflush(obj))
80 			obj->cache_dirty = true;
81 		break;
82 	}
83 
84 	obj->write_domain = 0;
85 }
86 
87 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
88 {
89 	/*
90 	 * We manually flush the CPU domain so that we can override and
91 	 * force the flush for the display, and perform it asyncrhonously.
92 	 */
93 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
94 	if (obj->cache_dirty)
95 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
96 	obj->write_domain = 0;
97 }
98 
99 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
100 {
101 	if (!i915_gem_object_is_framebuffer(obj))
102 		return;
103 
104 	i915_gem_object_lock(obj, NULL);
105 	__i915_gem_object_flush_for_display(obj);
106 	i915_gem_object_unlock(obj);
107 }
108 
109 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
110 {
111 	if (i915_gem_object_is_framebuffer(obj))
112 		__i915_gem_object_flush_for_display(obj);
113 }
114 
115 /**
116  * Moves a single object to the WC read, and possibly write domain.
117  * @obj: object to act on
118  * @write: ask for write access or read only
119  *
120  * This function returns when the move is complete, including waiting on
121  * flushes to occur.
122  */
123 int
124 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
125 {
126 	int ret;
127 
128 	assert_object_held(obj);
129 
130 	ret = i915_gem_object_wait(obj,
131 				   I915_WAIT_INTERRUPTIBLE |
132 				   (write ? I915_WAIT_ALL : 0),
133 				   MAX_SCHEDULE_TIMEOUT);
134 	if (ret)
135 		return ret;
136 
137 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
138 		return 0;
139 
140 	/* Flush and acquire obj->pages so that we are coherent through
141 	 * direct access in memory with previous cached writes through
142 	 * shmemfs and that our cache domain tracking remains valid.
143 	 * For example, if the obj->filp was moved to swap without us
144 	 * being notified and releasing the pages, we would mistakenly
145 	 * continue to assume that the obj remained out of the CPU cached
146 	 * domain.
147 	 */
148 	ret = i915_gem_object_pin_pages(obj);
149 	if (ret)
150 		return ret;
151 
152 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
153 
154 	/* Serialise direct access to this object with the barriers for
155 	 * coherent writes from the GPU, by effectively invalidating the
156 	 * WC domain upon first access.
157 	 */
158 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
159 		mb();
160 
161 	/* It should now be out of any other write domains, and we can update
162 	 * the domain values for our changes.
163 	 */
164 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
165 	obj->read_domains |= I915_GEM_DOMAIN_WC;
166 	if (write) {
167 		obj->read_domains = I915_GEM_DOMAIN_WC;
168 		obj->write_domain = I915_GEM_DOMAIN_WC;
169 		obj->mm.dirty = true;
170 	}
171 
172 	i915_gem_object_unpin_pages(obj);
173 	return 0;
174 }
175 
176 /**
177  * Moves a single object to the GTT read, and possibly write domain.
178  * @obj: object to act on
179  * @write: ask for write access or read only
180  *
181  * This function returns when the move is complete, including waiting on
182  * flushes to occur.
183  */
184 int
185 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
186 {
187 	int ret;
188 
189 	assert_object_held(obj);
190 
191 	ret = i915_gem_object_wait(obj,
192 				   I915_WAIT_INTERRUPTIBLE |
193 				   (write ? I915_WAIT_ALL : 0),
194 				   MAX_SCHEDULE_TIMEOUT);
195 	if (ret)
196 		return ret;
197 
198 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
199 		return 0;
200 
201 	/* Flush and acquire obj->pages so that we are coherent through
202 	 * direct access in memory with previous cached writes through
203 	 * shmemfs and that our cache domain tracking remains valid.
204 	 * For example, if the obj->filp was moved to swap without us
205 	 * being notified and releasing the pages, we would mistakenly
206 	 * continue to assume that the obj remained out of the CPU cached
207 	 * domain.
208 	 */
209 	ret = i915_gem_object_pin_pages(obj);
210 	if (ret)
211 		return ret;
212 
213 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
214 
215 	/* Serialise direct access to this object with the barriers for
216 	 * coherent writes from the GPU, by effectively invalidating the
217 	 * GTT domain upon first access.
218 	 */
219 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
220 		mb();
221 
222 	/* It should now be out of any other write domains, and we can update
223 	 * the domain values for our changes.
224 	 */
225 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
226 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
227 	if (write) {
228 		struct i915_vma *vma;
229 
230 		obj->read_domains = I915_GEM_DOMAIN_GTT;
231 		obj->write_domain = I915_GEM_DOMAIN_GTT;
232 		obj->mm.dirty = true;
233 
234 		spin_lock(&obj->vma.lock);
235 		for_each_ggtt_vma(vma, obj)
236 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
237 				i915_vma_set_ggtt_write(vma);
238 		spin_unlock(&obj->vma.lock);
239 	}
240 
241 	i915_gem_object_unpin_pages(obj);
242 	return 0;
243 }
244 
245 /**
246  * Changes the cache-level of an object across all VMA.
247  * @obj: object to act on
248  * @cache_level: new cache level to set for the object
249  *
250  * After this function returns, the object will be in the new cache-level
251  * across all GTT and the contents of the backing storage will be coherent,
252  * with respect to the new cache-level. In order to keep the backing storage
253  * coherent for all users, we only allow a single cache level to be set
254  * globally on the object and prevent it from being changed whilst the
255  * hardware is reading from the object. That is if the object is currently
256  * on the scanout it will be set to uncached (or equivalent display
257  * cache coherency) and all non-MOCS GPU access will also be uncached so
258  * that all direct access to the scanout remains coherent.
259  */
260 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
261 				    enum i915_cache_level cache_level)
262 {
263 	int ret;
264 
265 	if (obj->cache_level == cache_level)
266 		return 0;
267 
268 	ret = i915_gem_object_wait(obj,
269 				   I915_WAIT_INTERRUPTIBLE |
270 				   I915_WAIT_ALL,
271 				   MAX_SCHEDULE_TIMEOUT);
272 	if (ret)
273 		return ret;
274 
275 	/* Always invalidate stale cachelines */
276 	if (obj->cache_level != cache_level) {
277 		i915_gem_object_set_cache_coherency(obj, cache_level);
278 		obj->cache_dirty = true;
279 	}
280 
281 	/* The cache-level will be applied when each vma is rebound. */
282 	return i915_gem_object_unbind(obj,
283 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
284 				      I915_GEM_OBJECT_UNBIND_BARRIER);
285 }
286 
287 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
288 			       struct drm_file *file)
289 {
290 	struct drm_i915_gem_caching *args = data;
291 	struct drm_i915_gem_object *obj;
292 	int err = 0;
293 
294 	if (IS_DGFX(to_i915(dev)))
295 		return -ENODEV;
296 
297 	rcu_read_lock();
298 	obj = i915_gem_object_lookup_rcu(file, args->handle);
299 	if (!obj) {
300 		err = -ENOENT;
301 		goto out;
302 	}
303 
304 	switch (obj->cache_level) {
305 	case I915_CACHE_LLC:
306 	case I915_CACHE_L3_LLC:
307 		args->caching = I915_CACHING_CACHED;
308 		break;
309 
310 	case I915_CACHE_WT:
311 		args->caching = I915_CACHING_DISPLAY;
312 		break;
313 
314 	default:
315 		args->caching = I915_CACHING_NONE;
316 		break;
317 	}
318 out:
319 	rcu_read_unlock();
320 	return err;
321 }
322 
323 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
324 			       struct drm_file *file)
325 {
326 	struct drm_i915_private *i915 = to_i915(dev);
327 	struct drm_i915_gem_caching *args = data;
328 	struct drm_i915_gem_object *obj;
329 	enum i915_cache_level level;
330 	int ret = 0;
331 
332 	if (IS_DGFX(i915))
333 		return -ENODEV;
334 
335 	switch (args->caching) {
336 	case I915_CACHING_NONE:
337 		level = I915_CACHE_NONE;
338 		break;
339 	case I915_CACHING_CACHED:
340 		/*
341 		 * Due to a HW issue on BXT A stepping, GPU stores via a
342 		 * snooped mapping may leave stale data in a corresponding CPU
343 		 * cacheline, whereas normally such cachelines would get
344 		 * invalidated.
345 		 */
346 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
347 			return -ENODEV;
348 
349 		level = I915_CACHE_LLC;
350 		break;
351 	case I915_CACHING_DISPLAY:
352 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
353 		break;
354 	default:
355 		return -EINVAL;
356 	}
357 
358 	obj = i915_gem_object_lookup(file, args->handle);
359 	if (!obj)
360 		return -ENOENT;
361 
362 	/*
363 	 * The caching mode of proxy object is handled by its generator, and
364 	 * not allowed to be changed by userspace.
365 	 */
366 	if (i915_gem_object_is_proxy(obj)) {
367 		/*
368 		 * Silently allow cached for userptr; the vulkan driver
369 		 * sets all objects to cached
370 		 */
371 		if (!i915_gem_object_is_userptr(obj) ||
372 		    args->caching != I915_CACHING_CACHED)
373 			ret = -ENXIO;
374 
375 		goto out;
376 	}
377 
378 	ret = i915_gem_object_lock_interruptible(obj, NULL);
379 	if (ret)
380 		goto out;
381 
382 	ret = i915_gem_object_set_cache_level(obj, level);
383 	i915_gem_object_unlock(obj);
384 
385 out:
386 	i915_gem_object_put(obj);
387 	return ret;
388 }
389 
390 /*
391  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
392  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
393  * (for pageflips). We only flush the caches while preparing the buffer for
394  * display, the callers are responsible for frontbuffer flush.
395  */
396 struct i915_vma *
397 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
398 				     struct i915_gem_ww_ctx *ww,
399 				     u32 alignment,
400 				     const struct i915_gtt_view *view,
401 				     unsigned int flags)
402 {
403 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
404 	struct i915_vma *vma;
405 	int ret;
406 
407 	/* Frame buffer must be in LMEM */
408 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
409 		return ERR_PTR(-EINVAL);
410 
411 	/*
412 	 * The display engine is not coherent with the LLC cache on gen6.  As
413 	 * a result, we make sure that the pinning that is about to occur is
414 	 * done with uncached PTEs. This is lowest common denominator for all
415 	 * chipsets.
416 	 *
417 	 * However for gen6+, we could do better by using the GFDT bit instead
418 	 * of uncaching, which would allow us to flush all the LLC-cached data
419 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
420 	 */
421 	ret = i915_gem_object_set_cache_level(obj,
422 					      HAS_WT(i915) ?
423 					      I915_CACHE_WT : I915_CACHE_NONE);
424 	if (ret)
425 		return ERR_PTR(ret);
426 
427 	/*
428 	 * As the user may map the buffer once pinned in the display plane
429 	 * (e.g. libkms for the bootup splash), we have to ensure that we
430 	 * always use map_and_fenceable for all scanout buffers. However,
431 	 * it may simply be too big to fit into mappable, in which case
432 	 * put it anyway and hope that userspace can cope (but always first
433 	 * try to preserve the existing ABI).
434 	 */
435 	vma = ERR_PTR(-ENOSPC);
436 	if ((flags & PIN_MAPPABLE) == 0 &&
437 	    (!view || view->type == I915_GTT_VIEW_NORMAL))
438 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
439 						  flags | PIN_MAPPABLE |
440 						  PIN_NONBLOCK);
441 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
442 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
443 						  alignment, flags);
444 	if (IS_ERR(vma))
445 		return vma;
446 
447 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
448 	i915_vma_mark_scanout(vma);
449 
450 	i915_gem_object_flush_if_display_locked(obj);
451 
452 	return vma;
453 }
454 
455 /**
456  * Moves a single object to the CPU read, and possibly write domain.
457  * @obj: object to act on
458  * @write: requesting write or read-only access
459  *
460  * This function returns when the move is complete, including waiting on
461  * flushes to occur.
462  */
463 int
464 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
465 {
466 	int ret;
467 
468 	assert_object_held(obj);
469 
470 	ret = i915_gem_object_wait(obj,
471 				   I915_WAIT_INTERRUPTIBLE |
472 				   (write ? I915_WAIT_ALL : 0),
473 				   MAX_SCHEDULE_TIMEOUT);
474 	if (ret)
475 		return ret;
476 
477 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
478 
479 	/* Flush the CPU cache if it's still invalid. */
480 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
481 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
482 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
483 	}
484 
485 	/* It should now be out of any other write domains, and we can update
486 	 * the domain values for our changes.
487 	 */
488 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
489 
490 	/* If we're writing through the CPU, then the GPU read domains will
491 	 * need to be invalidated at next use.
492 	 */
493 	if (write)
494 		__start_cpu_write(obj);
495 
496 	return 0;
497 }
498 
499 /**
500  * Called when user space prepares to use an object with the CPU, either
501  * through the mmap ioctl's mapping or a GTT mapping.
502  * @dev: drm device
503  * @data: ioctl data blob
504  * @file: drm file
505  */
506 int
507 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
508 			  struct drm_file *file)
509 {
510 	struct drm_i915_gem_set_domain *args = data;
511 	struct drm_i915_gem_object *obj;
512 	u32 read_domains = args->read_domains;
513 	u32 write_domain = args->write_domain;
514 	int err;
515 
516 	if (IS_DGFX(to_i915(dev)))
517 		return -ENODEV;
518 
519 	/* Only handle setting domains to types used by the CPU. */
520 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
521 		return -EINVAL;
522 
523 	/*
524 	 * Having something in the write domain implies it's in the read
525 	 * domain, and only that read domain.  Enforce that in the request.
526 	 */
527 	if (write_domain && read_domains != write_domain)
528 		return -EINVAL;
529 
530 	if (!read_domains)
531 		return 0;
532 
533 	obj = i915_gem_object_lookup(file, args->handle);
534 	if (!obj)
535 		return -ENOENT;
536 
537 	/*
538 	 * Try to flush the object off the GPU without holding the lock.
539 	 * We will repeat the flush holding the lock in the normal manner
540 	 * to catch cases where we are gazumped.
541 	 */
542 	err = i915_gem_object_wait(obj,
543 				   I915_WAIT_INTERRUPTIBLE |
544 				   I915_WAIT_PRIORITY |
545 				   (write_domain ? I915_WAIT_ALL : 0),
546 				   MAX_SCHEDULE_TIMEOUT);
547 	if (err)
548 		goto out;
549 
550 	if (i915_gem_object_is_userptr(obj)) {
551 		/*
552 		 * Try to grab userptr pages, iris uses set_domain to check
553 		 * userptr validity
554 		 */
555 		err = i915_gem_object_userptr_validate(obj);
556 		if (!err)
557 			err = i915_gem_object_wait(obj,
558 						   I915_WAIT_INTERRUPTIBLE |
559 						   I915_WAIT_PRIORITY |
560 						   (write_domain ? I915_WAIT_ALL : 0),
561 						   MAX_SCHEDULE_TIMEOUT);
562 		goto out;
563 	}
564 
565 	/*
566 	 * Proxy objects do not control access to the backing storage, ergo
567 	 * they cannot be used as a means to manipulate the cache domain
568 	 * tracking for that backing storage. The proxy object is always
569 	 * considered to be outside of any cache domain.
570 	 */
571 	if (i915_gem_object_is_proxy(obj)) {
572 		err = -ENXIO;
573 		goto out;
574 	}
575 
576 	err = i915_gem_object_lock_interruptible(obj, NULL);
577 	if (err)
578 		goto out;
579 
580 	/*
581 	 * Flush and acquire obj->pages so that we are coherent through
582 	 * direct access in memory with previous cached writes through
583 	 * shmemfs and that our cache domain tracking remains valid.
584 	 * For example, if the obj->filp was moved to swap without us
585 	 * being notified and releasing the pages, we would mistakenly
586 	 * continue to assume that the obj remained out of the CPU cached
587 	 * domain.
588 	 */
589 	err = i915_gem_object_pin_pages(obj);
590 	if (err)
591 		goto out_unlock;
592 
593 	/*
594 	 * Already in the desired write domain? Nothing for us to do!
595 	 *
596 	 * We apply a little bit of cunning here to catch a broader set of
597 	 * no-ops. If obj->write_domain is set, we must be in the same
598 	 * obj->read_domains, and only that domain. Therefore, if that
599 	 * obj->write_domain matches the request read_domains, we are
600 	 * already in the same read/write domain and can skip the operation,
601 	 * without having to further check the requested write_domain.
602 	 */
603 	if (READ_ONCE(obj->write_domain) == read_domains)
604 		goto out_unpin;
605 
606 	if (read_domains & I915_GEM_DOMAIN_WC)
607 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
608 	else if (read_domains & I915_GEM_DOMAIN_GTT)
609 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
610 	else
611 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
612 
613 out_unpin:
614 	i915_gem_object_unpin_pages(obj);
615 
616 out_unlock:
617 	i915_gem_object_unlock(obj);
618 
619 	if (!err && write_domain)
620 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
621 
622 out:
623 	i915_gem_object_put(obj);
624 	return err;
625 }
626 
627 /*
628  * Pins the specified object's pages and synchronizes the object with
629  * GPU accesses. Sets needs_clflush to non-zero if the caller should
630  * flush the object from the CPU cache.
631  */
632 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
633 				 unsigned int *needs_clflush)
634 {
635 	int ret;
636 
637 	*needs_clflush = 0;
638 	if (!i915_gem_object_has_struct_page(obj))
639 		return -ENODEV;
640 
641 	assert_object_held(obj);
642 
643 	ret = i915_gem_object_wait(obj,
644 				   I915_WAIT_INTERRUPTIBLE,
645 				   MAX_SCHEDULE_TIMEOUT);
646 	if (ret)
647 		return ret;
648 
649 	ret = i915_gem_object_pin_pages(obj);
650 	if (ret)
651 		return ret;
652 
653 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
654 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
655 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
656 		if (ret)
657 			goto err_unpin;
658 		else
659 			goto out;
660 	}
661 
662 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
663 
664 	/* If we're not in the cpu read domain, set ourself into the gtt
665 	 * read domain and manually flush cachelines (if required). This
666 	 * optimizes for the case when the gpu will dirty the data
667 	 * anyway again before the next pread happens.
668 	 */
669 	if (!obj->cache_dirty &&
670 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
671 		*needs_clflush = CLFLUSH_BEFORE;
672 
673 out:
674 	/* return with the pages pinned */
675 	return 0;
676 
677 err_unpin:
678 	i915_gem_object_unpin_pages(obj);
679 	return ret;
680 }
681 
682 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
683 				  unsigned int *needs_clflush)
684 {
685 	int ret;
686 
687 	*needs_clflush = 0;
688 	if (!i915_gem_object_has_struct_page(obj))
689 		return -ENODEV;
690 
691 	assert_object_held(obj);
692 
693 	ret = i915_gem_object_wait(obj,
694 				   I915_WAIT_INTERRUPTIBLE |
695 				   I915_WAIT_ALL,
696 				   MAX_SCHEDULE_TIMEOUT);
697 	if (ret)
698 		return ret;
699 
700 	ret = i915_gem_object_pin_pages(obj);
701 	if (ret)
702 		return ret;
703 
704 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
705 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
706 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
707 		if (ret)
708 			goto err_unpin;
709 		else
710 			goto out;
711 	}
712 
713 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
714 
715 	/* If we're not in the cpu write domain, set ourself into the
716 	 * gtt write domain and manually flush cachelines (as required).
717 	 * This optimizes for the case when the gpu will use the data
718 	 * right away and we therefore have to clflush anyway.
719 	 */
720 	if (!obj->cache_dirty) {
721 		*needs_clflush |= CLFLUSH_AFTER;
722 
723 		/*
724 		 * Same trick applies to invalidate partially written
725 		 * cachelines read before writing.
726 		 */
727 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
728 			*needs_clflush |= CLFLUSH_BEFORE;
729 	}
730 
731 out:
732 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
733 	obj->mm.dirty = true;
734 	/* return with the pages pinned */
735 	return 0;
736 
737 err_unpin:
738 	i915_gem_object_unpin_pages(obj);
739 	return ret;
740 }
741