1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6 
7 #include "display/intel_frontbuffer.h"
8 #include "gt/intel_gt.h"
9 
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_gtt.h"
13 #include "i915_gem_ioctls.h"
14 #include "i915_gem_object.h"
15 #include "i915_vma.h"
16 #include "i915_gem_lmem.h"
17 #include "i915_gem_mman.h"
18 
19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
20 {
21 	return !(obj->cache_level == I915_CACHE_NONE ||
22 		 obj->cache_level == I915_CACHE_WT);
23 }
24 
25 static void
26 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
27 {
28 	struct i915_vma *vma;
29 
30 	assert_object_held(obj);
31 
32 	if (!(obj->write_domain & flush_domains))
33 		return;
34 
35 	switch (obj->write_domain) {
36 	case I915_GEM_DOMAIN_GTT:
37 		spin_lock(&obj->vma.lock);
38 		for_each_ggtt_vma(vma, obj) {
39 			if (i915_vma_unset_ggtt_write(vma))
40 				intel_gt_flush_ggtt_writes(vma->vm->gt);
41 		}
42 		spin_unlock(&obj->vma.lock);
43 
44 		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
45 		break;
46 
47 	case I915_GEM_DOMAIN_WC:
48 		wmb();
49 		break;
50 
51 	case I915_GEM_DOMAIN_CPU:
52 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
53 		break;
54 
55 	case I915_GEM_DOMAIN_RENDER:
56 		if (gpu_write_needs_clflush(obj))
57 			obj->cache_dirty = true;
58 		break;
59 	}
60 
61 	obj->write_domain = 0;
62 }
63 
64 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
65 {
66 	/*
67 	 * We manually flush the CPU domain so that we can override and
68 	 * force the flush for the display, and perform it asyncrhonously.
69 	 */
70 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
71 	if (obj->cache_dirty)
72 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
73 	obj->write_domain = 0;
74 }
75 
76 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
77 {
78 	if (!i915_gem_object_is_framebuffer(obj))
79 		return;
80 
81 	i915_gem_object_lock(obj, NULL);
82 	__i915_gem_object_flush_for_display(obj);
83 	i915_gem_object_unlock(obj);
84 }
85 
86 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
87 {
88 	if (i915_gem_object_is_framebuffer(obj))
89 		__i915_gem_object_flush_for_display(obj);
90 }
91 
92 /**
93  * Moves a single object to the WC read, and possibly write domain.
94  * @obj: object to act on
95  * @write: ask for write access or read only
96  *
97  * This function returns when the move is complete, including waiting on
98  * flushes to occur.
99  */
100 int
101 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
102 {
103 	int ret;
104 
105 	assert_object_held(obj);
106 
107 	ret = i915_gem_object_wait(obj,
108 				   I915_WAIT_INTERRUPTIBLE |
109 				   (write ? I915_WAIT_ALL : 0),
110 				   MAX_SCHEDULE_TIMEOUT);
111 	if (ret)
112 		return ret;
113 
114 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
115 		return 0;
116 
117 	/* Flush and acquire obj->pages so that we are coherent through
118 	 * direct access in memory with previous cached writes through
119 	 * shmemfs and that our cache domain tracking remains valid.
120 	 * For example, if the obj->filp was moved to swap without us
121 	 * being notified and releasing the pages, we would mistakenly
122 	 * continue to assume that the obj remained out of the CPU cached
123 	 * domain.
124 	 */
125 	ret = i915_gem_object_pin_pages(obj);
126 	if (ret)
127 		return ret;
128 
129 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
130 
131 	/* Serialise direct access to this object with the barriers for
132 	 * coherent writes from the GPU, by effectively invalidating the
133 	 * WC domain upon first access.
134 	 */
135 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
136 		mb();
137 
138 	/* It should now be out of any other write domains, and we can update
139 	 * the domain values for our changes.
140 	 */
141 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
142 	obj->read_domains |= I915_GEM_DOMAIN_WC;
143 	if (write) {
144 		obj->read_domains = I915_GEM_DOMAIN_WC;
145 		obj->write_domain = I915_GEM_DOMAIN_WC;
146 		obj->mm.dirty = true;
147 	}
148 
149 	i915_gem_object_unpin_pages(obj);
150 	return 0;
151 }
152 
153 /**
154  * Moves a single object to the GTT read, and possibly write domain.
155  * @obj: object to act on
156  * @write: ask for write access or read only
157  *
158  * This function returns when the move is complete, including waiting on
159  * flushes to occur.
160  */
161 int
162 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
163 {
164 	int ret;
165 
166 	assert_object_held(obj);
167 
168 	ret = i915_gem_object_wait(obj,
169 				   I915_WAIT_INTERRUPTIBLE |
170 				   (write ? I915_WAIT_ALL : 0),
171 				   MAX_SCHEDULE_TIMEOUT);
172 	if (ret)
173 		return ret;
174 
175 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
176 		return 0;
177 
178 	/* Flush and acquire obj->pages so that we are coherent through
179 	 * direct access in memory with previous cached writes through
180 	 * shmemfs and that our cache domain tracking remains valid.
181 	 * For example, if the obj->filp was moved to swap without us
182 	 * being notified and releasing the pages, we would mistakenly
183 	 * continue to assume that the obj remained out of the CPU cached
184 	 * domain.
185 	 */
186 	ret = i915_gem_object_pin_pages(obj);
187 	if (ret)
188 		return ret;
189 
190 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
191 
192 	/* Serialise direct access to this object with the barriers for
193 	 * coherent writes from the GPU, by effectively invalidating the
194 	 * GTT domain upon first access.
195 	 */
196 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
197 		mb();
198 
199 	/* It should now be out of any other write domains, and we can update
200 	 * the domain values for our changes.
201 	 */
202 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
203 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
204 	if (write) {
205 		struct i915_vma *vma;
206 
207 		obj->read_domains = I915_GEM_DOMAIN_GTT;
208 		obj->write_domain = I915_GEM_DOMAIN_GTT;
209 		obj->mm.dirty = true;
210 
211 		spin_lock(&obj->vma.lock);
212 		for_each_ggtt_vma(vma, obj)
213 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
214 				i915_vma_set_ggtt_write(vma);
215 		spin_unlock(&obj->vma.lock);
216 	}
217 
218 	i915_gem_object_unpin_pages(obj);
219 	return 0;
220 }
221 
222 /**
223  * Changes the cache-level of an object across all VMA.
224  * @obj: object to act on
225  * @cache_level: new cache level to set for the object
226  *
227  * After this function returns, the object will be in the new cache-level
228  * across all GTT and the contents of the backing storage will be coherent,
229  * with respect to the new cache-level. In order to keep the backing storage
230  * coherent for all users, we only allow a single cache level to be set
231  * globally on the object and prevent it from being changed whilst the
232  * hardware is reading from the object. That is if the object is currently
233  * on the scanout it will be set to uncached (or equivalent display
234  * cache coherency) and all non-MOCS GPU access will also be uncached so
235  * that all direct access to the scanout remains coherent.
236  */
237 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
238 				    enum i915_cache_level cache_level)
239 {
240 	int ret;
241 
242 	if (obj->cache_level == cache_level)
243 		return 0;
244 
245 	ret = i915_gem_object_wait(obj,
246 				   I915_WAIT_INTERRUPTIBLE |
247 				   I915_WAIT_ALL,
248 				   MAX_SCHEDULE_TIMEOUT);
249 	if (ret)
250 		return ret;
251 
252 	/* Always invalidate stale cachelines */
253 	if (obj->cache_level != cache_level) {
254 		i915_gem_object_set_cache_coherency(obj, cache_level);
255 		obj->cache_dirty = true;
256 	}
257 
258 	/* The cache-level will be applied when each vma is rebound. */
259 	return i915_gem_object_unbind(obj,
260 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
261 				      I915_GEM_OBJECT_UNBIND_BARRIER);
262 }
263 
264 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
265 			       struct drm_file *file)
266 {
267 	struct drm_i915_gem_caching *args = data;
268 	struct drm_i915_gem_object *obj;
269 	int err = 0;
270 
271 	if (IS_DGFX(to_i915(dev)))
272 		return -ENODEV;
273 
274 	rcu_read_lock();
275 	obj = i915_gem_object_lookup_rcu(file, args->handle);
276 	if (!obj) {
277 		err = -ENOENT;
278 		goto out;
279 	}
280 
281 	switch (obj->cache_level) {
282 	case I915_CACHE_LLC:
283 	case I915_CACHE_L3_LLC:
284 		args->caching = I915_CACHING_CACHED;
285 		break;
286 
287 	case I915_CACHE_WT:
288 		args->caching = I915_CACHING_DISPLAY;
289 		break;
290 
291 	default:
292 		args->caching = I915_CACHING_NONE;
293 		break;
294 	}
295 out:
296 	rcu_read_unlock();
297 	return err;
298 }
299 
300 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
301 			       struct drm_file *file)
302 {
303 	struct drm_i915_private *i915 = to_i915(dev);
304 	struct drm_i915_gem_caching *args = data;
305 	struct drm_i915_gem_object *obj;
306 	enum i915_cache_level level;
307 	int ret = 0;
308 
309 	if (IS_DGFX(i915))
310 		return -ENODEV;
311 
312 	switch (args->caching) {
313 	case I915_CACHING_NONE:
314 		level = I915_CACHE_NONE;
315 		break;
316 	case I915_CACHING_CACHED:
317 		/*
318 		 * Due to a HW issue on BXT A stepping, GPU stores via a
319 		 * snooped mapping may leave stale data in a corresponding CPU
320 		 * cacheline, whereas normally such cachelines would get
321 		 * invalidated.
322 		 */
323 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
324 			return -ENODEV;
325 
326 		level = I915_CACHE_LLC;
327 		break;
328 	case I915_CACHING_DISPLAY:
329 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
330 		break;
331 	default:
332 		return -EINVAL;
333 	}
334 
335 	obj = i915_gem_object_lookup(file, args->handle);
336 	if (!obj)
337 		return -ENOENT;
338 
339 	/*
340 	 * The caching mode of proxy object is handled by its generator, and
341 	 * not allowed to be changed by userspace.
342 	 */
343 	if (i915_gem_object_is_proxy(obj)) {
344 		/*
345 		 * Silently allow cached for userptr; the vulkan driver
346 		 * sets all objects to cached
347 		 */
348 		if (!i915_gem_object_is_userptr(obj) ||
349 		    args->caching != I915_CACHING_CACHED)
350 			ret = -ENXIO;
351 
352 		goto out;
353 	}
354 
355 	ret = i915_gem_object_lock_interruptible(obj, NULL);
356 	if (ret)
357 		goto out;
358 
359 	ret = i915_gem_object_set_cache_level(obj, level);
360 	i915_gem_object_unlock(obj);
361 
362 out:
363 	i915_gem_object_put(obj);
364 	return ret;
365 }
366 
367 /*
368  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
369  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
370  * (for pageflips). We only flush the caches while preparing the buffer for
371  * display, the callers are responsible for frontbuffer flush.
372  */
373 struct i915_vma *
374 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
375 				     struct i915_gem_ww_ctx *ww,
376 				     u32 alignment,
377 				     const struct i915_ggtt_view *view,
378 				     unsigned int flags)
379 {
380 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
381 	struct i915_vma *vma;
382 	int ret;
383 
384 	/* Frame buffer must be in LMEM */
385 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
386 		return ERR_PTR(-EINVAL);
387 
388 	/*
389 	 * The display engine is not coherent with the LLC cache on gen6.  As
390 	 * a result, we make sure that the pinning that is about to occur is
391 	 * done with uncached PTEs. This is lowest common denominator for all
392 	 * chipsets.
393 	 *
394 	 * However for gen6+, we could do better by using the GFDT bit instead
395 	 * of uncaching, which would allow us to flush all the LLC-cached data
396 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
397 	 */
398 	ret = i915_gem_object_set_cache_level(obj,
399 					      HAS_WT(i915) ?
400 					      I915_CACHE_WT : I915_CACHE_NONE);
401 	if (ret)
402 		return ERR_PTR(ret);
403 
404 	/*
405 	 * As the user may map the buffer once pinned in the display plane
406 	 * (e.g. libkms for the bootup splash), we have to ensure that we
407 	 * always use map_and_fenceable for all scanout buffers. However,
408 	 * it may simply be too big to fit into mappable, in which case
409 	 * put it anyway and hope that userspace can cope (but always first
410 	 * try to preserve the existing ABI).
411 	 */
412 	vma = ERR_PTR(-ENOSPC);
413 	if ((flags & PIN_MAPPABLE) == 0 &&
414 	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
415 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
416 						  flags | PIN_MAPPABLE |
417 						  PIN_NONBLOCK);
418 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
419 		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
420 						  alignment, flags);
421 	if (IS_ERR(vma))
422 		return vma;
423 
424 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
425 	i915_vma_mark_scanout(vma);
426 
427 	i915_gem_object_flush_if_display_locked(obj);
428 
429 	return vma;
430 }
431 
432 /**
433  * Moves a single object to the CPU read, and possibly write domain.
434  * @obj: object to act on
435  * @write: requesting write or read-only access
436  *
437  * This function returns when the move is complete, including waiting on
438  * flushes to occur.
439  */
440 int
441 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
442 {
443 	int ret;
444 
445 	assert_object_held(obj);
446 
447 	ret = i915_gem_object_wait(obj,
448 				   I915_WAIT_INTERRUPTIBLE |
449 				   (write ? I915_WAIT_ALL : 0),
450 				   MAX_SCHEDULE_TIMEOUT);
451 	if (ret)
452 		return ret;
453 
454 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
455 
456 	/* Flush the CPU cache if it's still invalid. */
457 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
458 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
459 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
460 	}
461 
462 	/* It should now be out of any other write domains, and we can update
463 	 * the domain values for our changes.
464 	 */
465 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
466 
467 	/* If we're writing through the CPU, then the GPU read domains will
468 	 * need to be invalidated at next use.
469 	 */
470 	if (write)
471 		__start_cpu_write(obj);
472 
473 	return 0;
474 }
475 
476 /**
477  * Called when user space prepares to use an object with the CPU, either
478  * through the mmap ioctl's mapping or a GTT mapping.
479  * @dev: drm device
480  * @data: ioctl data blob
481  * @file: drm file
482  */
483 int
484 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
485 			  struct drm_file *file)
486 {
487 	struct drm_i915_gem_set_domain *args = data;
488 	struct drm_i915_gem_object *obj;
489 	u32 read_domains = args->read_domains;
490 	u32 write_domain = args->write_domain;
491 	int err;
492 
493 	if (IS_DGFX(to_i915(dev)))
494 		return -ENODEV;
495 
496 	/* Only handle setting domains to types used by the CPU. */
497 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
498 		return -EINVAL;
499 
500 	/*
501 	 * Having something in the write domain implies it's in the read
502 	 * domain, and only that read domain.  Enforce that in the request.
503 	 */
504 	if (write_domain && read_domains != write_domain)
505 		return -EINVAL;
506 
507 	if (!read_domains)
508 		return 0;
509 
510 	obj = i915_gem_object_lookup(file, args->handle);
511 	if (!obj)
512 		return -ENOENT;
513 
514 	/*
515 	 * Try to flush the object off the GPU without holding the lock.
516 	 * We will repeat the flush holding the lock in the normal manner
517 	 * to catch cases where we are gazumped.
518 	 */
519 	err = i915_gem_object_wait(obj,
520 				   I915_WAIT_INTERRUPTIBLE |
521 				   I915_WAIT_PRIORITY |
522 				   (write_domain ? I915_WAIT_ALL : 0),
523 				   MAX_SCHEDULE_TIMEOUT);
524 	if (err)
525 		goto out;
526 
527 	if (i915_gem_object_is_userptr(obj)) {
528 		/*
529 		 * Try to grab userptr pages, iris uses set_domain to check
530 		 * userptr validity
531 		 */
532 		err = i915_gem_object_userptr_validate(obj);
533 		if (!err)
534 			err = i915_gem_object_wait(obj,
535 						   I915_WAIT_INTERRUPTIBLE |
536 						   I915_WAIT_PRIORITY |
537 						   (write_domain ? I915_WAIT_ALL : 0),
538 						   MAX_SCHEDULE_TIMEOUT);
539 		goto out;
540 	}
541 
542 	/*
543 	 * Proxy objects do not control access to the backing storage, ergo
544 	 * they cannot be used as a means to manipulate the cache domain
545 	 * tracking for that backing storage. The proxy object is always
546 	 * considered to be outside of any cache domain.
547 	 */
548 	if (i915_gem_object_is_proxy(obj)) {
549 		err = -ENXIO;
550 		goto out;
551 	}
552 
553 	err = i915_gem_object_lock_interruptible(obj, NULL);
554 	if (err)
555 		goto out;
556 
557 	/*
558 	 * Flush and acquire obj->pages so that we are coherent through
559 	 * direct access in memory with previous cached writes through
560 	 * shmemfs and that our cache domain tracking remains valid.
561 	 * For example, if the obj->filp was moved to swap without us
562 	 * being notified and releasing the pages, we would mistakenly
563 	 * continue to assume that the obj remained out of the CPU cached
564 	 * domain.
565 	 */
566 	err = i915_gem_object_pin_pages(obj);
567 	if (err)
568 		goto out_unlock;
569 
570 	/*
571 	 * Already in the desired write domain? Nothing for us to do!
572 	 *
573 	 * We apply a little bit of cunning here to catch a broader set of
574 	 * no-ops. If obj->write_domain is set, we must be in the same
575 	 * obj->read_domains, and only that domain. Therefore, if that
576 	 * obj->write_domain matches the request read_domains, we are
577 	 * already in the same read/write domain and can skip the operation,
578 	 * without having to further check the requested write_domain.
579 	 */
580 	if (READ_ONCE(obj->write_domain) == read_domains)
581 		goto out_unpin;
582 
583 	if (read_domains & I915_GEM_DOMAIN_WC)
584 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
585 	else if (read_domains & I915_GEM_DOMAIN_GTT)
586 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
587 	else
588 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
589 
590 out_unpin:
591 	i915_gem_object_unpin_pages(obj);
592 
593 out_unlock:
594 	i915_gem_object_unlock(obj);
595 
596 	if (!err && write_domain)
597 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
598 
599 out:
600 	i915_gem_object_put(obj);
601 	return err;
602 }
603 
604 /*
605  * Pins the specified object's pages and synchronizes the object with
606  * GPU accesses. Sets needs_clflush to non-zero if the caller should
607  * flush the object from the CPU cache.
608  */
609 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
610 				 unsigned int *needs_clflush)
611 {
612 	int ret;
613 
614 	*needs_clflush = 0;
615 	if (!i915_gem_object_has_struct_page(obj))
616 		return -ENODEV;
617 
618 	assert_object_held(obj);
619 
620 	ret = i915_gem_object_wait(obj,
621 				   I915_WAIT_INTERRUPTIBLE,
622 				   MAX_SCHEDULE_TIMEOUT);
623 	if (ret)
624 		return ret;
625 
626 	ret = i915_gem_object_pin_pages(obj);
627 	if (ret)
628 		return ret;
629 
630 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
631 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
632 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
633 		if (ret)
634 			goto err_unpin;
635 		else
636 			goto out;
637 	}
638 
639 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
640 
641 	/* If we're not in the cpu read domain, set ourself into the gtt
642 	 * read domain and manually flush cachelines (if required). This
643 	 * optimizes for the case when the gpu will dirty the data
644 	 * anyway again before the next pread happens.
645 	 */
646 	if (!obj->cache_dirty &&
647 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
648 		*needs_clflush = CLFLUSH_BEFORE;
649 
650 out:
651 	/* return with the pages pinned */
652 	return 0;
653 
654 err_unpin:
655 	i915_gem_object_unpin_pages(obj);
656 	return ret;
657 }
658 
659 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
660 				  unsigned int *needs_clflush)
661 {
662 	int ret;
663 
664 	*needs_clflush = 0;
665 	if (!i915_gem_object_has_struct_page(obj))
666 		return -ENODEV;
667 
668 	assert_object_held(obj);
669 
670 	ret = i915_gem_object_wait(obj,
671 				   I915_WAIT_INTERRUPTIBLE |
672 				   I915_WAIT_ALL,
673 				   MAX_SCHEDULE_TIMEOUT);
674 	if (ret)
675 		return ret;
676 
677 	ret = i915_gem_object_pin_pages(obj);
678 	if (ret)
679 		return ret;
680 
681 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
682 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
683 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
684 		if (ret)
685 			goto err_unpin;
686 		else
687 			goto out;
688 	}
689 
690 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
691 
692 	/* If we're not in the cpu write domain, set ourself into the
693 	 * gtt write domain and manually flush cachelines (as required).
694 	 * This optimizes for the case when the gpu will use the data
695 	 * right away and we therefore have to clflush anyway.
696 	 */
697 	if (!obj->cache_dirty) {
698 		*needs_clflush |= CLFLUSH_AFTER;
699 
700 		/*
701 		 * Same trick applies to invalidate partially written
702 		 * cachelines read before writing.
703 		 */
704 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
705 			*needs_clflush |= CLFLUSH_BEFORE;
706 	}
707 
708 out:
709 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
710 	obj->mm.dirty = true;
711 	/* return with the pages pinned */
712 	return 0;
713 
714 err_unpin:
715 	i915_gem_object_unpin_pages(obj);
716 	return ret;
717 }
718