1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include "display/intel_display.h"
8 #include "display/intel_frontbuffer.h"
9 #include "gt/intel_gt.h"
10 
11 #include "i915_drv.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_domain.h"
14 #include "i915_gem_gtt.h"
15 #include "i915_gem_ioctls.h"
16 #include "i915_gem_lmem.h"
17 #include "i915_gem_mman.h"
18 #include "i915_gem_object.h"
19 #include "i915_vma.h"
20 
21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
22 
23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
24 {
25 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
26 
27 	if (IS_DGFX(i915))
28 		return false;
29 
30 	/*
31 	 * For objects created by userspace through GEM_CREATE with pat_index
32 	 * set by set_pat extension, i915_gem_object_has_cache_level() will
33 	 * always return true, because the coherency of such object is managed
34 	 * by userspace. Othereise the call here would fall back to checking
35 	 * whether the object is un-cached or write-through.
36 	 */
37 	return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
38 		 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
39 }
40 
41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
42 {
43 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
44 
45 	if (obj->cache_dirty)
46 		return false;
47 
48 	if (IS_DGFX(i915))
49 		return false;
50 
51 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
52 		return true;
53 
54 	/* Currently in use by HW (display engine)? Keep flushed. */
55 	return i915_gem_object_is_framebuffer(obj);
56 }
57 
58 static void
59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
60 {
61 	struct i915_vma *vma;
62 
63 	assert_object_held(obj);
64 
65 	if (!(obj->write_domain & flush_domains))
66 		return;
67 
68 	switch (obj->write_domain) {
69 	case I915_GEM_DOMAIN_GTT:
70 		spin_lock(&obj->vma.lock);
71 		for_each_ggtt_vma(vma, obj) {
72 			if (i915_vma_unset_ggtt_write(vma))
73 				intel_gt_flush_ggtt_writes(vma->vm->gt);
74 		}
75 		spin_unlock(&obj->vma.lock);
76 
77 		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
78 		break;
79 
80 	case I915_GEM_DOMAIN_WC:
81 		wmb();
82 		break;
83 
84 	case I915_GEM_DOMAIN_CPU:
85 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
86 		break;
87 
88 	case I915_GEM_DOMAIN_RENDER:
89 		if (gpu_write_needs_clflush(obj))
90 			obj->cache_dirty = true;
91 		break;
92 	}
93 
94 	obj->write_domain = 0;
95 }
96 
97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
98 {
99 	/*
100 	 * We manually flush the CPU domain so that we can override and
101 	 * force the flush for the display, and perform it asyncrhonously.
102 	 */
103 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
104 	if (obj->cache_dirty)
105 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
106 	obj->write_domain = 0;
107 }
108 
109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
110 {
111 	if (!i915_gem_object_is_framebuffer(obj))
112 		return;
113 
114 	i915_gem_object_lock(obj, NULL);
115 	__i915_gem_object_flush_for_display(obj);
116 	i915_gem_object_unlock(obj);
117 }
118 
119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
120 {
121 	if (i915_gem_object_is_framebuffer(obj))
122 		__i915_gem_object_flush_for_display(obj);
123 }
124 
125 /**
126  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
127  *                                    possibly write domain.
128  * @obj: object to act on
129  * @write: ask for write access or read only
130  *
131  * This function returns when the move is complete, including waiting on
132  * flushes to occur.
133  */
134 int
135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
136 {
137 	int ret;
138 
139 	assert_object_held(obj);
140 
141 	ret = i915_gem_object_wait(obj,
142 				   I915_WAIT_INTERRUPTIBLE |
143 				   (write ? I915_WAIT_ALL : 0),
144 				   MAX_SCHEDULE_TIMEOUT);
145 	if (ret)
146 		return ret;
147 
148 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
149 		return 0;
150 
151 	/* Flush and acquire obj->pages so that we are coherent through
152 	 * direct access in memory with previous cached writes through
153 	 * shmemfs and that our cache domain tracking remains valid.
154 	 * For example, if the obj->filp was moved to swap without us
155 	 * being notified and releasing the pages, we would mistakenly
156 	 * continue to assume that the obj remained out of the CPU cached
157 	 * domain.
158 	 */
159 	ret = i915_gem_object_pin_pages(obj);
160 	if (ret)
161 		return ret;
162 
163 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
164 
165 	/* Serialise direct access to this object with the barriers for
166 	 * coherent writes from the GPU, by effectively invalidating the
167 	 * WC domain upon first access.
168 	 */
169 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
170 		mb();
171 
172 	/* It should now be out of any other write domains, and we can update
173 	 * the domain values for our changes.
174 	 */
175 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
176 	obj->read_domains |= I915_GEM_DOMAIN_WC;
177 	if (write) {
178 		obj->read_domains = I915_GEM_DOMAIN_WC;
179 		obj->write_domain = I915_GEM_DOMAIN_WC;
180 		obj->mm.dirty = true;
181 	}
182 
183 	i915_gem_object_unpin_pages(obj);
184 	return 0;
185 }
186 
187 /**
188  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
189  *                                     and possibly write domain.
190  * @obj: object to act on
191  * @write: ask for write access or read only
192  *
193  * This function returns when the move is complete, including waiting on
194  * flushes to occur.
195  */
196 int
197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
198 {
199 	int ret;
200 
201 	assert_object_held(obj);
202 
203 	ret = i915_gem_object_wait(obj,
204 				   I915_WAIT_INTERRUPTIBLE |
205 				   (write ? I915_WAIT_ALL : 0),
206 				   MAX_SCHEDULE_TIMEOUT);
207 	if (ret)
208 		return ret;
209 
210 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
211 		return 0;
212 
213 	/* Flush and acquire obj->pages so that we are coherent through
214 	 * direct access in memory with previous cached writes through
215 	 * shmemfs and that our cache domain tracking remains valid.
216 	 * For example, if the obj->filp was moved to swap without us
217 	 * being notified and releasing the pages, we would mistakenly
218 	 * continue to assume that the obj remained out of the CPU cached
219 	 * domain.
220 	 */
221 	ret = i915_gem_object_pin_pages(obj);
222 	if (ret)
223 		return ret;
224 
225 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
226 
227 	/* Serialise direct access to this object with the barriers for
228 	 * coherent writes from the GPU, by effectively invalidating the
229 	 * GTT domain upon first access.
230 	 */
231 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
232 		mb();
233 
234 	/* It should now be out of any other write domains, and we can update
235 	 * the domain values for our changes.
236 	 */
237 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
238 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
239 	if (write) {
240 		struct i915_vma *vma;
241 
242 		obj->read_domains = I915_GEM_DOMAIN_GTT;
243 		obj->write_domain = I915_GEM_DOMAIN_GTT;
244 		obj->mm.dirty = true;
245 
246 		spin_lock(&obj->vma.lock);
247 		for_each_ggtt_vma(vma, obj)
248 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
249 				i915_vma_set_ggtt_write(vma);
250 		spin_unlock(&obj->vma.lock);
251 	}
252 
253 	i915_gem_object_unpin_pages(obj);
254 	return 0;
255 }
256 
257 /**
258  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
259  * @obj: object to act on
260  * @cache_level: new cache level to set for the object
261  *
262  * After this function returns, the object will be in the new cache-level
263  * across all GTT and the contents of the backing storage will be coherent,
264  * with respect to the new cache-level. In order to keep the backing storage
265  * coherent for all users, we only allow a single cache level to be set
266  * globally on the object and prevent it from being changed whilst the
267  * hardware is reading from the object. That is if the object is currently
268  * on the scanout it will be set to uncached (or equivalent display
269  * cache coherency) and all non-MOCS GPU access will also be uncached so
270  * that all direct access to the scanout remains coherent.
271  */
272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
273 				    enum i915_cache_level cache_level)
274 {
275 	int ret;
276 
277 	/*
278 	 * For objects created by userspace through GEM_CREATE with pat_index
279 	 * set by set_pat extension, simply return 0 here without touching
280 	 * the cache setting, because such objects should have an immutable
281 	 * cache setting by desgin and always managed by userspace.
282 	 */
283 	if (i915_gem_object_has_cache_level(obj, cache_level))
284 		return 0;
285 
286 	ret = i915_gem_object_wait(obj,
287 				   I915_WAIT_INTERRUPTIBLE |
288 				   I915_WAIT_ALL,
289 				   MAX_SCHEDULE_TIMEOUT);
290 	if (ret)
291 		return ret;
292 
293 	/* Always invalidate stale cachelines */
294 	i915_gem_object_set_cache_coherency(obj, cache_level);
295 	obj->cache_dirty = true;
296 
297 	/* The cache-level will be applied when each vma is rebound. */
298 	return i915_gem_object_unbind(obj,
299 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
300 				      I915_GEM_OBJECT_UNBIND_BARRIER);
301 }
302 
303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
304 			       struct drm_file *file)
305 {
306 	struct drm_i915_gem_caching *args = data;
307 	struct drm_i915_gem_object *obj;
308 	int err = 0;
309 
310 	if (IS_DGFX(to_i915(dev)))
311 		return -ENODEV;
312 
313 	rcu_read_lock();
314 	obj = i915_gem_object_lookup_rcu(file, args->handle);
315 	if (!obj) {
316 		err = -ENOENT;
317 		goto out;
318 	}
319 
320 	/*
321 	 * This ioctl should be disabled for the objects with pat_index
322 	 * set by user space.
323 	 */
324 	if (obj->pat_set_by_user) {
325 		err = -EOPNOTSUPP;
326 		goto out;
327 	}
328 
329 	if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
330 	    i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
331 		args->caching = I915_CACHING_CACHED;
332 	else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
333 		args->caching = I915_CACHING_DISPLAY;
334 	else
335 		args->caching = I915_CACHING_NONE;
336 out:
337 	rcu_read_unlock();
338 	return err;
339 }
340 
341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
342 			       struct drm_file *file)
343 {
344 	struct drm_i915_private *i915 = to_i915(dev);
345 	struct drm_i915_gem_caching *args = data;
346 	struct drm_i915_gem_object *obj;
347 	enum i915_cache_level level;
348 	int ret = 0;
349 
350 	if (IS_DGFX(i915))
351 		return -ENODEV;
352 
353 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
354 		return -EOPNOTSUPP;
355 
356 	switch (args->caching) {
357 	case I915_CACHING_NONE:
358 		level = I915_CACHE_NONE;
359 		break;
360 	case I915_CACHING_CACHED:
361 		/*
362 		 * Due to a HW issue on BXT A stepping, GPU stores via a
363 		 * snooped mapping may leave stale data in a corresponding CPU
364 		 * cacheline, whereas normally such cachelines would get
365 		 * invalidated.
366 		 */
367 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
368 			return -ENODEV;
369 
370 		level = I915_CACHE_LLC;
371 		break;
372 	case I915_CACHING_DISPLAY:
373 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
374 		break;
375 	default:
376 		return -EINVAL;
377 	}
378 
379 	obj = i915_gem_object_lookup(file, args->handle);
380 	if (!obj)
381 		return -ENOENT;
382 
383 	/*
384 	 * This ioctl should be disabled for the objects with pat_index
385 	 * set by user space.
386 	 */
387 	if (obj->pat_set_by_user) {
388 		ret = -EOPNOTSUPP;
389 		goto out;
390 	}
391 
392 	/*
393 	 * The caching mode of proxy object is handled by its generator, and
394 	 * not allowed to be changed by userspace.
395 	 */
396 	if (i915_gem_object_is_proxy(obj)) {
397 		/*
398 		 * Silently allow cached for userptr; the vulkan driver
399 		 * sets all objects to cached
400 		 */
401 		if (!i915_gem_object_is_userptr(obj) ||
402 		    args->caching != I915_CACHING_CACHED)
403 			ret = -ENXIO;
404 
405 		goto out;
406 	}
407 
408 	ret = i915_gem_object_lock_interruptible(obj, NULL);
409 	if (ret)
410 		goto out;
411 
412 	ret = i915_gem_object_set_cache_level(obj, level);
413 	i915_gem_object_unlock(obj);
414 
415 out:
416 	i915_gem_object_put(obj);
417 	return ret;
418 }
419 
420 /*
421  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
422  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
423  * (for pageflips). We only flush the caches while preparing the buffer for
424  * display, the callers are responsible for frontbuffer flush.
425  */
426 struct i915_vma *
427 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
428 				     struct i915_gem_ww_ctx *ww,
429 				     u32 alignment,
430 				     const struct i915_gtt_view *view,
431 				     unsigned int flags)
432 {
433 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
434 	struct i915_vma *vma;
435 	int ret;
436 
437 	/* Frame buffer must be in LMEM */
438 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
439 		return ERR_PTR(-EINVAL);
440 
441 	/*
442 	 * The display engine is not coherent with the LLC cache on gen6.  As
443 	 * a result, we make sure that the pinning that is about to occur is
444 	 * done with uncached PTEs. This is lowest common denominator for all
445 	 * chipsets.
446 	 *
447 	 * However for gen6+, we could do better by using the GFDT bit instead
448 	 * of uncaching, which would allow us to flush all the LLC-cached data
449 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
450 	 */
451 	ret = i915_gem_object_set_cache_level(obj,
452 					      HAS_WT(i915) ?
453 					      I915_CACHE_WT : I915_CACHE_NONE);
454 	if (ret)
455 		return ERR_PTR(ret);
456 
457 	/* VT-d may overfetch before/after the vma, so pad with scratch */
458 	if (intel_scanout_needs_vtd_wa(i915)) {
459 		unsigned int guard = VTD_GUARD;
460 
461 		if (i915_gem_object_is_tiled(obj))
462 			guard = max(guard,
463 				    i915_gem_object_get_tile_row_size(obj));
464 
465 		flags |= PIN_OFFSET_GUARD | guard;
466 	}
467 
468 	/*
469 	 * As the user may map the buffer once pinned in the display plane
470 	 * (e.g. libkms for the bootup splash), we have to ensure that we
471 	 * always use map_and_fenceable for all scanout buffers. However,
472 	 * it may simply be too big to fit into mappable, in which case
473 	 * put it anyway and hope that userspace can cope (but always first
474 	 * try to preserve the existing ABI).
475 	 */
476 	vma = ERR_PTR(-ENOSPC);
477 	if ((flags & PIN_MAPPABLE) == 0 &&
478 	    (!view || view->type == I915_GTT_VIEW_NORMAL))
479 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
480 						  flags | PIN_MAPPABLE |
481 						  PIN_NONBLOCK);
482 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
483 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
484 						  alignment, flags);
485 	if (IS_ERR(vma))
486 		return vma;
487 
488 	vma->display_alignment = max(vma->display_alignment, alignment);
489 	i915_vma_mark_scanout(vma);
490 
491 	i915_gem_object_flush_if_display_locked(obj);
492 
493 	return vma;
494 }
495 
496 /**
497  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
498  *                                     and possibly write domain.
499  * @obj: object to act on
500  * @write: requesting write or read-only access
501  *
502  * This function returns when the move is complete, including waiting on
503  * flushes to occur.
504  */
505 int
506 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
507 {
508 	int ret;
509 
510 	assert_object_held(obj);
511 
512 	ret = i915_gem_object_wait(obj,
513 				   I915_WAIT_INTERRUPTIBLE |
514 				   (write ? I915_WAIT_ALL : 0),
515 				   MAX_SCHEDULE_TIMEOUT);
516 	if (ret)
517 		return ret;
518 
519 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
520 
521 	/* Flush the CPU cache if it's still invalid. */
522 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
523 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
524 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
525 	}
526 
527 	/* It should now be out of any other write domains, and we can update
528 	 * the domain values for our changes.
529 	 */
530 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
531 
532 	/* If we're writing through the CPU, then the GPU read domains will
533 	 * need to be invalidated at next use.
534 	 */
535 	if (write)
536 		__start_cpu_write(obj);
537 
538 	return 0;
539 }
540 
541 /**
542  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
543  *                             object with the CPU, either
544  * through the mmap ioctl's mapping or a GTT mapping.
545  * @dev: drm device
546  * @data: ioctl data blob
547  * @file: drm file
548  */
549 int
550 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
551 			  struct drm_file *file)
552 {
553 	struct drm_i915_gem_set_domain *args = data;
554 	struct drm_i915_gem_object *obj;
555 	u32 read_domains = args->read_domains;
556 	u32 write_domain = args->write_domain;
557 	int err;
558 
559 	if (IS_DGFX(to_i915(dev)))
560 		return -ENODEV;
561 
562 	/* Only handle setting domains to types used by the CPU. */
563 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
564 		return -EINVAL;
565 
566 	/*
567 	 * Having something in the write domain implies it's in the read
568 	 * domain, and only that read domain.  Enforce that in the request.
569 	 */
570 	if (write_domain && read_domains != write_domain)
571 		return -EINVAL;
572 
573 	if (!read_domains)
574 		return 0;
575 
576 	obj = i915_gem_object_lookup(file, args->handle);
577 	if (!obj)
578 		return -ENOENT;
579 
580 	/*
581 	 * Try to flush the object off the GPU without holding the lock.
582 	 * We will repeat the flush holding the lock in the normal manner
583 	 * to catch cases where we are gazumped.
584 	 */
585 	err = i915_gem_object_wait(obj,
586 				   I915_WAIT_INTERRUPTIBLE |
587 				   I915_WAIT_PRIORITY |
588 				   (write_domain ? I915_WAIT_ALL : 0),
589 				   MAX_SCHEDULE_TIMEOUT);
590 	if (err)
591 		goto out;
592 
593 	if (i915_gem_object_is_userptr(obj)) {
594 		/*
595 		 * Try to grab userptr pages, iris uses set_domain to check
596 		 * userptr validity
597 		 */
598 		err = i915_gem_object_userptr_validate(obj);
599 		if (!err)
600 			err = i915_gem_object_wait(obj,
601 						   I915_WAIT_INTERRUPTIBLE |
602 						   I915_WAIT_PRIORITY |
603 						   (write_domain ? I915_WAIT_ALL : 0),
604 						   MAX_SCHEDULE_TIMEOUT);
605 		goto out;
606 	}
607 
608 	/*
609 	 * Proxy objects do not control access to the backing storage, ergo
610 	 * they cannot be used as a means to manipulate the cache domain
611 	 * tracking for that backing storage. The proxy object is always
612 	 * considered to be outside of any cache domain.
613 	 */
614 	if (i915_gem_object_is_proxy(obj)) {
615 		err = -ENXIO;
616 		goto out;
617 	}
618 
619 	err = i915_gem_object_lock_interruptible(obj, NULL);
620 	if (err)
621 		goto out;
622 
623 	/*
624 	 * Flush and acquire obj->pages so that we are coherent through
625 	 * direct access in memory with previous cached writes through
626 	 * shmemfs and that our cache domain tracking remains valid.
627 	 * For example, if the obj->filp was moved to swap without us
628 	 * being notified and releasing the pages, we would mistakenly
629 	 * continue to assume that the obj remained out of the CPU cached
630 	 * domain.
631 	 */
632 	err = i915_gem_object_pin_pages(obj);
633 	if (err)
634 		goto out_unlock;
635 
636 	/*
637 	 * Already in the desired write domain? Nothing for us to do!
638 	 *
639 	 * We apply a little bit of cunning here to catch a broader set of
640 	 * no-ops. If obj->write_domain is set, we must be in the same
641 	 * obj->read_domains, and only that domain. Therefore, if that
642 	 * obj->write_domain matches the request read_domains, we are
643 	 * already in the same read/write domain and can skip the operation,
644 	 * without having to further check the requested write_domain.
645 	 */
646 	if (READ_ONCE(obj->write_domain) == read_domains)
647 		goto out_unpin;
648 
649 	if (read_domains & I915_GEM_DOMAIN_WC)
650 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
651 	else if (read_domains & I915_GEM_DOMAIN_GTT)
652 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
653 	else
654 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
655 
656 out_unpin:
657 	i915_gem_object_unpin_pages(obj);
658 
659 out_unlock:
660 	i915_gem_object_unlock(obj);
661 
662 	if (!err && write_domain)
663 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
664 
665 out:
666 	i915_gem_object_put(obj);
667 	return err;
668 }
669 
670 /*
671  * Pins the specified object's pages and synchronizes the object with
672  * GPU accesses. Sets needs_clflush to non-zero if the caller should
673  * flush the object from the CPU cache.
674  */
675 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
676 				 unsigned int *needs_clflush)
677 {
678 	int ret;
679 
680 	*needs_clflush = 0;
681 	if (!i915_gem_object_has_struct_page(obj))
682 		return -ENODEV;
683 
684 	assert_object_held(obj);
685 
686 	ret = i915_gem_object_wait(obj,
687 				   I915_WAIT_INTERRUPTIBLE,
688 				   MAX_SCHEDULE_TIMEOUT);
689 	if (ret)
690 		return ret;
691 
692 	ret = i915_gem_object_pin_pages(obj);
693 	if (ret)
694 		return ret;
695 
696 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
697 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
698 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
699 		if (ret)
700 			goto err_unpin;
701 		else
702 			goto out;
703 	}
704 
705 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
706 
707 	/* If we're not in the cpu read domain, set ourself into the gtt
708 	 * read domain and manually flush cachelines (if required). This
709 	 * optimizes for the case when the gpu will dirty the data
710 	 * anyway again before the next pread happens.
711 	 */
712 	if (!obj->cache_dirty &&
713 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
714 		*needs_clflush = CLFLUSH_BEFORE;
715 
716 out:
717 	/* return with the pages pinned */
718 	return 0;
719 
720 err_unpin:
721 	i915_gem_object_unpin_pages(obj);
722 	return ret;
723 }
724 
725 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
726 				  unsigned int *needs_clflush)
727 {
728 	int ret;
729 
730 	*needs_clflush = 0;
731 	if (!i915_gem_object_has_struct_page(obj))
732 		return -ENODEV;
733 
734 	assert_object_held(obj);
735 
736 	ret = i915_gem_object_wait(obj,
737 				   I915_WAIT_INTERRUPTIBLE |
738 				   I915_WAIT_ALL,
739 				   MAX_SCHEDULE_TIMEOUT);
740 	if (ret)
741 		return ret;
742 
743 	ret = i915_gem_object_pin_pages(obj);
744 	if (ret)
745 		return ret;
746 
747 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
748 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
749 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
750 		if (ret)
751 			goto err_unpin;
752 		else
753 			goto out;
754 	}
755 
756 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
757 
758 	/* If we're not in the cpu write domain, set ourself into the
759 	 * gtt write domain and manually flush cachelines (as required).
760 	 * This optimizes for the case when the gpu will use the data
761 	 * right away and we therefore have to clflush anyway.
762 	 */
763 	if (!obj->cache_dirty) {
764 		*needs_clflush |= CLFLUSH_AFTER;
765 
766 		/*
767 		 * Same trick applies to invalidate partially written
768 		 * cachelines read before writing.
769 		 */
770 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
771 			*needs_clflush |= CLFLUSH_BEFORE;
772 	}
773 
774 out:
775 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
776 	obj->mm.dirty = true;
777 	/* return with the pages pinned */
778 	return 0;
779 
780 err_unpin:
781 	i915_gem_object_unpin_pages(obj);
782 	return ret;
783 }
784