1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include "display/intel_display.h"
8 #include "display/intel_frontbuffer.h"
9 #include "gt/intel_gt.h"
10 
11 #include "i915_drv.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_domain.h"
14 #include "i915_gem_gtt.h"
15 #include "i915_gem_ioctls.h"
16 #include "i915_gem_lmem.h"
17 #include "i915_gem_mman.h"
18 #include "i915_gem_object.h"
19 #include "i915_vma.h"
20 
21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
22 
23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
24 {
25 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
26 
27 	if (IS_DGFX(i915))
28 		return false;
29 
30 	/*
31 	 * For objects created by userspace through GEM_CREATE with pat_index
32 	 * set by set_pat extension, i915_gem_object_has_cache_level() will
33 	 * always return true, because the coherency of such object is managed
34 	 * by userspace. Othereise the call here would fall back to checking
35 	 * whether the object is un-cached or write-through.
36 	 */
37 	return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
38 		 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
39 }
40 
41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
42 {
43 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
44 
45 	if (obj->cache_dirty)
46 		return false;
47 
48 	if (IS_DGFX(i915))
49 		return false;
50 
51 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
52 		return true;
53 
54 	/* Currently in use by HW (display engine)? Keep flushed. */
55 	return i915_gem_object_is_framebuffer(obj);
56 }
57 
58 static void
59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
60 {
61 	struct i915_vma *vma;
62 
63 	assert_object_held(obj);
64 
65 	if (!(obj->write_domain & flush_domains))
66 		return;
67 
68 	switch (obj->write_domain) {
69 	case I915_GEM_DOMAIN_GTT:
70 		spin_lock(&obj->vma.lock);
71 		for_each_ggtt_vma(vma, obj) {
72 			if (i915_vma_unset_ggtt_write(vma))
73 				intel_gt_flush_ggtt_writes(vma->vm->gt);
74 		}
75 		spin_unlock(&obj->vma.lock);
76 
77 		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
78 		break;
79 
80 	case I915_GEM_DOMAIN_WC:
81 		wmb();
82 		break;
83 
84 	case I915_GEM_DOMAIN_CPU:
85 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
86 		break;
87 
88 	case I915_GEM_DOMAIN_RENDER:
89 		if (gpu_write_needs_clflush(obj))
90 			obj->cache_dirty = true;
91 		break;
92 	}
93 
94 	obj->write_domain = 0;
95 }
96 
97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
98 {
99 	/*
100 	 * We manually flush the CPU domain so that we can override and
101 	 * force the flush for the display, and perform it asyncrhonously.
102 	 */
103 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
104 	if (obj->cache_dirty)
105 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
106 	obj->write_domain = 0;
107 }
108 
109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
110 {
111 	if (!i915_gem_object_is_framebuffer(obj))
112 		return;
113 
114 	i915_gem_object_lock(obj, NULL);
115 	__i915_gem_object_flush_for_display(obj);
116 	i915_gem_object_unlock(obj);
117 }
118 
119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
120 {
121 	if (i915_gem_object_is_framebuffer(obj))
122 		__i915_gem_object_flush_for_display(obj);
123 }
124 
125 /**
126  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
127  *                                    possibly write domain.
128  * @obj: object to act on
129  * @write: ask for write access or read only
130  *
131  * This function returns when the move is complete, including waiting on
132  * flushes to occur.
133  */
134 int
135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
136 {
137 	int ret;
138 
139 	assert_object_held(obj);
140 
141 	ret = i915_gem_object_wait(obj,
142 				   I915_WAIT_INTERRUPTIBLE |
143 				   (write ? I915_WAIT_ALL : 0),
144 				   MAX_SCHEDULE_TIMEOUT);
145 	if (ret)
146 		return ret;
147 
148 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
149 		return 0;
150 
151 	/* Flush and acquire obj->pages so that we are coherent through
152 	 * direct access in memory with previous cached writes through
153 	 * shmemfs and that our cache domain tracking remains valid.
154 	 * For example, if the obj->filp was moved to swap without us
155 	 * being notified and releasing the pages, we would mistakenly
156 	 * continue to assume that the obj remained out of the CPU cached
157 	 * domain.
158 	 */
159 	ret = i915_gem_object_pin_pages(obj);
160 	if (ret)
161 		return ret;
162 
163 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
164 
165 	/* Serialise direct access to this object with the barriers for
166 	 * coherent writes from the GPU, by effectively invalidating the
167 	 * WC domain upon first access.
168 	 */
169 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
170 		mb();
171 
172 	/* It should now be out of any other write domains, and we can update
173 	 * the domain values for our changes.
174 	 */
175 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
176 	obj->read_domains |= I915_GEM_DOMAIN_WC;
177 	if (write) {
178 		obj->read_domains = I915_GEM_DOMAIN_WC;
179 		obj->write_domain = I915_GEM_DOMAIN_WC;
180 		obj->mm.dirty = true;
181 	}
182 
183 	i915_gem_object_unpin_pages(obj);
184 	return 0;
185 }
186 
187 /**
188  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
189  *                                     and possibly write domain.
190  * @obj: object to act on
191  * @write: ask for write access or read only
192  *
193  * This function returns when the move is complete, including waiting on
194  * flushes to occur.
195  */
196 int
197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
198 {
199 	int ret;
200 
201 	assert_object_held(obj);
202 
203 	ret = i915_gem_object_wait(obj,
204 				   I915_WAIT_INTERRUPTIBLE |
205 				   (write ? I915_WAIT_ALL : 0),
206 				   MAX_SCHEDULE_TIMEOUT);
207 	if (ret)
208 		return ret;
209 
210 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
211 		return 0;
212 
213 	/* Flush and acquire obj->pages so that we are coherent through
214 	 * direct access in memory with previous cached writes through
215 	 * shmemfs and that our cache domain tracking remains valid.
216 	 * For example, if the obj->filp was moved to swap without us
217 	 * being notified and releasing the pages, we would mistakenly
218 	 * continue to assume that the obj remained out of the CPU cached
219 	 * domain.
220 	 */
221 	ret = i915_gem_object_pin_pages(obj);
222 	if (ret)
223 		return ret;
224 
225 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
226 
227 	/* Serialise direct access to this object with the barriers for
228 	 * coherent writes from the GPU, by effectively invalidating the
229 	 * GTT domain upon first access.
230 	 */
231 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
232 		mb();
233 
234 	/* It should now be out of any other write domains, and we can update
235 	 * the domain values for our changes.
236 	 */
237 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
238 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
239 	if (write) {
240 		struct i915_vma *vma;
241 
242 		obj->read_domains = I915_GEM_DOMAIN_GTT;
243 		obj->write_domain = I915_GEM_DOMAIN_GTT;
244 		obj->mm.dirty = true;
245 
246 		spin_lock(&obj->vma.lock);
247 		for_each_ggtt_vma(vma, obj)
248 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
249 				i915_vma_set_ggtt_write(vma);
250 		spin_unlock(&obj->vma.lock);
251 	}
252 
253 	i915_gem_object_unpin_pages(obj);
254 	return 0;
255 }
256 
257 /**
258  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
259  * @obj: object to act on
260  * @cache_level: new cache level to set for the object
261  *
262  * After this function returns, the object will be in the new cache-level
263  * across all GTT and the contents of the backing storage will be coherent,
264  * with respect to the new cache-level. In order to keep the backing storage
265  * coherent for all users, we only allow a single cache level to be set
266  * globally on the object and prevent it from being changed whilst the
267  * hardware is reading from the object. That is if the object is currently
268  * on the scanout it will be set to uncached (or equivalent display
269  * cache coherency) and all non-MOCS GPU access will also be uncached so
270  * that all direct access to the scanout remains coherent.
271  */
272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
273 				    enum i915_cache_level cache_level)
274 {
275 	int ret;
276 
277 	/*
278 	 * For objects created by userspace through GEM_CREATE with pat_index
279 	 * set by set_pat extension, simply return 0 here without touching
280 	 * the cache setting, because such objects should have an immutable
281 	 * cache setting by desgin and always managed by userspace.
282 	 */
283 	if (i915_gem_object_has_cache_level(obj, cache_level))
284 		return 0;
285 
286 	ret = i915_gem_object_wait(obj,
287 				   I915_WAIT_INTERRUPTIBLE |
288 				   I915_WAIT_ALL,
289 				   MAX_SCHEDULE_TIMEOUT);
290 	if (ret)
291 		return ret;
292 
293 	/* Always invalidate stale cachelines */
294 	i915_gem_object_set_cache_coherency(obj, cache_level);
295 	obj->cache_dirty = true;
296 
297 	/* The cache-level will be applied when each vma is rebound. */
298 	return i915_gem_object_unbind(obj,
299 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
300 				      I915_GEM_OBJECT_UNBIND_BARRIER);
301 }
302 
303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
304 			       struct drm_file *file)
305 {
306 	struct drm_i915_gem_caching *args = data;
307 	struct drm_i915_gem_object *obj;
308 	int err = 0;
309 
310 	if (IS_DGFX(to_i915(dev)))
311 		return -ENODEV;
312 
313 	rcu_read_lock();
314 	obj = i915_gem_object_lookup_rcu(file, args->handle);
315 	if (!obj) {
316 		err = -ENOENT;
317 		goto out;
318 	}
319 
320 	/*
321 	 * This ioctl should be disabled for the objects with pat_index
322 	 * set by user space.
323 	 */
324 	if (obj->pat_set_by_user) {
325 		err = -EOPNOTSUPP;
326 		goto out;
327 	}
328 
329 	if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
330 	    i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
331 		args->caching = I915_CACHING_CACHED;
332 	else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
333 		args->caching = I915_CACHING_DISPLAY;
334 	else
335 		args->caching = I915_CACHING_NONE;
336 out:
337 	rcu_read_unlock();
338 	return err;
339 }
340 
341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
342 			       struct drm_file *file)
343 {
344 	struct drm_i915_private *i915 = to_i915(dev);
345 	struct drm_i915_gem_caching *args = data;
346 	struct drm_i915_gem_object *obj;
347 	enum i915_cache_level level;
348 	int ret = 0;
349 
350 	if (IS_DGFX(i915))
351 		return -ENODEV;
352 
353 	switch (args->caching) {
354 	case I915_CACHING_NONE:
355 		level = I915_CACHE_NONE;
356 		break;
357 	case I915_CACHING_CACHED:
358 		/*
359 		 * Due to a HW issue on BXT A stepping, GPU stores via a
360 		 * snooped mapping may leave stale data in a corresponding CPU
361 		 * cacheline, whereas normally such cachelines would get
362 		 * invalidated.
363 		 */
364 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
365 			return -ENODEV;
366 
367 		level = I915_CACHE_LLC;
368 		break;
369 	case I915_CACHING_DISPLAY:
370 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
371 		break;
372 	default:
373 		return -EINVAL;
374 	}
375 
376 	obj = i915_gem_object_lookup(file, args->handle);
377 	if (!obj)
378 		return -ENOENT;
379 
380 	/*
381 	 * This ioctl should be disabled for the objects with pat_index
382 	 * set by user space.
383 	 */
384 	if (obj->pat_set_by_user) {
385 		ret = -EOPNOTSUPP;
386 		goto out;
387 	}
388 
389 	/*
390 	 * The caching mode of proxy object is handled by its generator, and
391 	 * not allowed to be changed by userspace.
392 	 */
393 	if (i915_gem_object_is_proxy(obj)) {
394 		/*
395 		 * Silently allow cached for userptr; the vulkan driver
396 		 * sets all objects to cached
397 		 */
398 		if (!i915_gem_object_is_userptr(obj) ||
399 		    args->caching != I915_CACHING_CACHED)
400 			ret = -ENXIO;
401 
402 		goto out;
403 	}
404 
405 	ret = i915_gem_object_lock_interruptible(obj, NULL);
406 	if (ret)
407 		goto out;
408 
409 	ret = i915_gem_object_set_cache_level(obj, level);
410 	i915_gem_object_unlock(obj);
411 
412 out:
413 	i915_gem_object_put(obj);
414 	return ret;
415 }
416 
417 /*
418  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
419  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
420  * (for pageflips). We only flush the caches while preparing the buffer for
421  * display, the callers are responsible for frontbuffer flush.
422  */
423 struct i915_vma *
424 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
425 				     struct i915_gem_ww_ctx *ww,
426 				     u32 alignment,
427 				     const struct i915_gtt_view *view,
428 				     unsigned int flags)
429 {
430 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
431 	struct i915_vma *vma;
432 	int ret;
433 
434 	/* Frame buffer must be in LMEM */
435 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
436 		return ERR_PTR(-EINVAL);
437 
438 	/*
439 	 * The display engine is not coherent with the LLC cache on gen6.  As
440 	 * a result, we make sure that the pinning that is about to occur is
441 	 * done with uncached PTEs. This is lowest common denominator for all
442 	 * chipsets.
443 	 *
444 	 * However for gen6+, we could do better by using the GFDT bit instead
445 	 * of uncaching, which would allow us to flush all the LLC-cached data
446 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
447 	 */
448 	ret = i915_gem_object_set_cache_level(obj,
449 					      HAS_WT(i915) ?
450 					      I915_CACHE_WT : I915_CACHE_NONE);
451 	if (ret)
452 		return ERR_PTR(ret);
453 
454 	/* VT-d may overfetch before/after the vma, so pad with scratch */
455 	if (intel_scanout_needs_vtd_wa(i915)) {
456 		unsigned int guard = VTD_GUARD;
457 
458 		if (i915_gem_object_is_tiled(obj))
459 			guard = max(guard,
460 				    i915_gem_object_get_tile_row_size(obj));
461 
462 		flags |= PIN_OFFSET_GUARD | guard;
463 	}
464 
465 	/*
466 	 * As the user may map the buffer once pinned in the display plane
467 	 * (e.g. libkms for the bootup splash), we have to ensure that we
468 	 * always use map_and_fenceable for all scanout buffers. However,
469 	 * it may simply be too big to fit into mappable, in which case
470 	 * put it anyway and hope that userspace can cope (but always first
471 	 * try to preserve the existing ABI).
472 	 */
473 	vma = ERR_PTR(-ENOSPC);
474 	if ((flags & PIN_MAPPABLE) == 0 &&
475 	    (!view || view->type == I915_GTT_VIEW_NORMAL))
476 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
477 						  flags | PIN_MAPPABLE |
478 						  PIN_NONBLOCK);
479 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
480 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
481 						  alignment, flags);
482 	if (IS_ERR(vma))
483 		return vma;
484 
485 	vma->display_alignment = max(vma->display_alignment, alignment);
486 	i915_vma_mark_scanout(vma);
487 
488 	i915_gem_object_flush_if_display_locked(obj);
489 
490 	return vma;
491 }
492 
493 /**
494  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
495  *                                     and possibly write domain.
496  * @obj: object to act on
497  * @write: requesting write or read-only access
498  *
499  * This function returns when the move is complete, including waiting on
500  * flushes to occur.
501  */
502 int
503 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
504 {
505 	int ret;
506 
507 	assert_object_held(obj);
508 
509 	ret = i915_gem_object_wait(obj,
510 				   I915_WAIT_INTERRUPTIBLE |
511 				   (write ? I915_WAIT_ALL : 0),
512 				   MAX_SCHEDULE_TIMEOUT);
513 	if (ret)
514 		return ret;
515 
516 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
517 
518 	/* Flush the CPU cache if it's still invalid. */
519 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
520 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
521 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
522 	}
523 
524 	/* It should now be out of any other write domains, and we can update
525 	 * the domain values for our changes.
526 	 */
527 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
528 
529 	/* If we're writing through the CPU, then the GPU read domains will
530 	 * need to be invalidated at next use.
531 	 */
532 	if (write)
533 		__start_cpu_write(obj);
534 
535 	return 0;
536 }
537 
538 /**
539  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
540  *                             object with the CPU, either
541  * through the mmap ioctl's mapping or a GTT mapping.
542  * @dev: drm device
543  * @data: ioctl data blob
544  * @file: drm file
545  */
546 int
547 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
548 			  struct drm_file *file)
549 {
550 	struct drm_i915_gem_set_domain *args = data;
551 	struct drm_i915_gem_object *obj;
552 	u32 read_domains = args->read_domains;
553 	u32 write_domain = args->write_domain;
554 	int err;
555 
556 	if (IS_DGFX(to_i915(dev)))
557 		return -ENODEV;
558 
559 	/* Only handle setting domains to types used by the CPU. */
560 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
561 		return -EINVAL;
562 
563 	/*
564 	 * Having something in the write domain implies it's in the read
565 	 * domain, and only that read domain.  Enforce that in the request.
566 	 */
567 	if (write_domain && read_domains != write_domain)
568 		return -EINVAL;
569 
570 	if (!read_domains)
571 		return 0;
572 
573 	obj = i915_gem_object_lookup(file, args->handle);
574 	if (!obj)
575 		return -ENOENT;
576 
577 	/*
578 	 * Try to flush the object off the GPU without holding the lock.
579 	 * We will repeat the flush holding the lock in the normal manner
580 	 * to catch cases where we are gazumped.
581 	 */
582 	err = i915_gem_object_wait(obj,
583 				   I915_WAIT_INTERRUPTIBLE |
584 				   I915_WAIT_PRIORITY |
585 				   (write_domain ? I915_WAIT_ALL : 0),
586 				   MAX_SCHEDULE_TIMEOUT);
587 	if (err)
588 		goto out;
589 
590 	if (i915_gem_object_is_userptr(obj)) {
591 		/*
592 		 * Try to grab userptr pages, iris uses set_domain to check
593 		 * userptr validity
594 		 */
595 		err = i915_gem_object_userptr_validate(obj);
596 		if (!err)
597 			err = i915_gem_object_wait(obj,
598 						   I915_WAIT_INTERRUPTIBLE |
599 						   I915_WAIT_PRIORITY |
600 						   (write_domain ? I915_WAIT_ALL : 0),
601 						   MAX_SCHEDULE_TIMEOUT);
602 		goto out;
603 	}
604 
605 	/*
606 	 * Proxy objects do not control access to the backing storage, ergo
607 	 * they cannot be used as a means to manipulate the cache domain
608 	 * tracking for that backing storage. The proxy object is always
609 	 * considered to be outside of any cache domain.
610 	 */
611 	if (i915_gem_object_is_proxy(obj)) {
612 		err = -ENXIO;
613 		goto out;
614 	}
615 
616 	err = i915_gem_object_lock_interruptible(obj, NULL);
617 	if (err)
618 		goto out;
619 
620 	/*
621 	 * Flush and acquire obj->pages so that we are coherent through
622 	 * direct access in memory with previous cached writes through
623 	 * shmemfs and that our cache domain tracking remains valid.
624 	 * For example, if the obj->filp was moved to swap without us
625 	 * being notified and releasing the pages, we would mistakenly
626 	 * continue to assume that the obj remained out of the CPU cached
627 	 * domain.
628 	 */
629 	err = i915_gem_object_pin_pages(obj);
630 	if (err)
631 		goto out_unlock;
632 
633 	/*
634 	 * Already in the desired write domain? Nothing for us to do!
635 	 *
636 	 * We apply a little bit of cunning here to catch a broader set of
637 	 * no-ops. If obj->write_domain is set, we must be in the same
638 	 * obj->read_domains, and only that domain. Therefore, if that
639 	 * obj->write_domain matches the request read_domains, we are
640 	 * already in the same read/write domain and can skip the operation,
641 	 * without having to further check the requested write_domain.
642 	 */
643 	if (READ_ONCE(obj->write_domain) == read_domains)
644 		goto out_unpin;
645 
646 	if (read_domains & I915_GEM_DOMAIN_WC)
647 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
648 	else if (read_domains & I915_GEM_DOMAIN_GTT)
649 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
650 	else
651 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
652 
653 out_unpin:
654 	i915_gem_object_unpin_pages(obj);
655 
656 out_unlock:
657 	i915_gem_object_unlock(obj);
658 
659 	if (!err && write_domain)
660 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
661 
662 out:
663 	i915_gem_object_put(obj);
664 	return err;
665 }
666 
667 /*
668  * Pins the specified object's pages and synchronizes the object with
669  * GPU accesses. Sets needs_clflush to non-zero if the caller should
670  * flush the object from the CPU cache.
671  */
672 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
673 				 unsigned int *needs_clflush)
674 {
675 	int ret;
676 
677 	*needs_clflush = 0;
678 	if (!i915_gem_object_has_struct_page(obj))
679 		return -ENODEV;
680 
681 	assert_object_held(obj);
682 
683 	ret = i915_gem_object_wait(obj,
684 				   I915_WAIT_INTERRUPTIBLE,
685 				   MAX_SCHEDULE_TIMEOUT);
686 	if (ret)
687 		return ret;
688 
689 	ret = i915_gem_object_pin_pages(obj);
690 	if (ret)
691 		return ret;
692 
693 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
694 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
695 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
696 		if (ret)
697 			goto err_unpin;
698 		else
699 			goto out;
700 	}
701 
702 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
703 
704 	/* If we're not in the cpu read domain, set ourself into the gtt
705 	 * read domain and manually flush cachelines (if required). This
706 	 * optimizes for the case when the gpu will dirty the data
707 	 * anyway again before the next pread happens.
708 	 */
709 	if (!obj->cache_dirty &&
710 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
711 		*needs_clflush = CLFLUSH_BEFORE;
712 
713 out:
714 	/* return with the pages pinned */
715 	return 0;
716 
717 err_unpin:
718 	i915_gem_object_unpin_pages(obj);
719 	return ret;
720 }
721 
722 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
723 				  unsigned int *needs_clflush)
724 {
725 	int ret;
726 
727 	*needs_clflush = 0;
728 	if (!i915_gem_object_has_struct_page(obj))
729 		return -ENODEV;
730 
731 	assert_object_held(obj);
732 
733 	ret = i915_gem_object_wait(obj,
734 				   I915_WAIT_INTERRUPTIBLE |
735 				   I915_WAIT_ALL,
736 				   MAX_SCHEDULE_TIMEOUT);
737 	if (ret)
738 		return ret;
739 
740 	ret = i915_gem_object_pin_pages(obj);
741 	if (ret)
742 		return ret;
743 
744 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
745 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
746 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
747 		if (ret)
748 			goto err_unpin;
749 		else
750 			goto out;
751 	}
752 
753 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
754 
755 	/* If we're not in the cpu write domain, set ourself into the
756 	 * gtt write domain and manually flush cachelines (as required).
757 	 * This optimizes for the case when the gpu will use the data
758 	 * right away and we therefore have to clflush anyway.
759 	 */
760 	if (!obj->cache_dirty) {
761 		*needs_clflush |= CLFLUSH_AFTER;
762 
763 		/*
764 		 * Same trick applies to invalidate partially written
765 		 * cachelines read before writing.
766 		 */
767 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
768 			*needs_clflush |= CLFLUSH_BEFORE;
769 	}
770 
771 out:
772 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
773 	obj->mm.dirty = true;
774 	/* return with the pages pinned */
775 	return 0;
776 
777 err_unpin:
778 	i915_gem_object_unpin_pages(obj);
779 	return ret;
780 }
781