xref: /openbmc/linux/drivers/gpu/drm/i915/i915_gem.c (revision 4fc4dca8)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41 
42 #include "gt/intel_engine_pm.h"
43 #include "gt/intel_gt_pm.h"
44 #include "gt/intel_mocs.h"
45 #include "gt/intel_reset.h"
46 #include "gt/intel_workarounds.h"
47 
48 #include "i915_drv.h"
49 #include "i915_gem_clflush.h"
50 #include "i915_gemfs.h"
51 #include "i915_gem_pm.h"
52 #include "i915_trace.h"
53 #include "i915_vgpu.h"
54 
55 #include "intel_display.h"
56 #include "intel_drv.h"
57 #include "intel_frontbuffer.h"
58 #include "intel_pm.h"
59 
60 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
61 
62 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
63 {
64 	if (obj->cache_dirty)
65 		return false;
66 
67 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
68 		return true;
69 
70 	return obj->pin_global; /* currently in use by HW, keep flushed */
71 }
72 
73 static int
74 insert_mappable_node(struct i915_ggtt *ggtt,
75                      struct drm_mm_node *node, u32 size)
76 {
77 	memset(node, 0, sizeof(*node));
78 	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
79 					   size, 0, I915_COLOR_UNEVICTABLE,
80 					   0, ggtt->mappable_end,
81 					   DRM_MM_INSERT_LOW);
82 }
83 
84 static void
85 remove_mappable_node(struct drm_mm_node *node)
86 {
87 	drm_mm_remove_node(node);
88 }
89 
90 /* some bookkeeping */
91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
92 				  u64 size)
93 {
94 	spin_lock(&dev_priv->mm.object_stat_lock);
95 	dev_priv->mm.object_count++;
96 	dev_priv->mm.object_memory += size;
97 	spin_unlock(&dev_priv->mm.object_stat_lock);
98 }
99 
100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
101 				     u64 size)
102 {
103 	spin_lock(&dev_priv->mm.object_stat_lock);
104 	dev_priv->mm.object_count--;
105 	dev_priv->mm.object_memory -= size;
106 	spin_unlock(&dev_priv->mm.object_stat_lock);
107 }
108 
109 int
110 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
111 			    struct drm_file *file)
112 {
113 	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
114 	struct drm_i915_gem_get_aperture *args = data;
115 	struct i915_vma *vma;
116 	u64 pinned;
117 
118 	mutex_lock(&ggtt->vm.mutex);
119 
120 	pinned = ggtt->vm.reserved;
121 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
122 		if (i915_vma_is_pinned(vma))
123 			pinned += vma->node.size;
124 
125 	mutex_unlock(&ggtt->vm.mutex);
126 
127 	args->aper_size = ggtt->vm.total;
128 	args->aper_available_size = args->aper_size - pinned;
129 
130 	return 0;
131 }
132 
133 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
134 {
135 	struct address_space *mapping = obj->base.filp->f_mapping;
136 	drm_dma_handle_t *phys;
137 	struct sg_table *st;
138 	struct scatterlist *sg;
139 	char *vaddr;
140 	int i;
141 	int err;
142 
143 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
144 		return -EINVAL;
145 
146 	/* Always aligning to the object size, allows a single allocation
147 	 * to handle all possible callers, and given typical object sizes,
148 	 * the alignment of the buddy allocation will naturally match.
149 	 */
150 	phys = drm_pci_alloc(obj->base.dev,
151 			     roundup_pow_of_two(obj->base.size),
152 			     roundup_pow_of_two(obj->base.size));
153 	if (!phys)
154 		return -ENOMEM;
155 
156 	vaddr = phys->vaddr;
157 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
158 		struct page *page;
159 		char *src;
160 
161 		page = shmem_read_mapping_page(mapping, i);
162 		if (IS_ERR(page)) {
163 			err = PTR_ERR(page);
164 			goto err_phys;
165 		}
166 
167 		src = kmap_atomic(page);
168 		memcpy(vaddr, src, PAGE_SIZE);
169 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
170 		kunmap_atomic(src);
171 
172 		put_page(page);
173 		vaddr += PAGE_SIZE;
174 	}
175 
176 	i915_gem_chipset_flush(to_i915(obj->base.dev));
177 
178 	st = kmalloc(sizeof(*st), GFP_KERNEL);
179 	if (!st) {
180 		err = -ENOMEM;
181 		goto err_phys;
182 	}
183 
184 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
185 		kfree(st);
186 		err = -ENOMEM;
187 		goto err_phys;
188 	}
189 
190 	sg = st->sgl;
191 	sg->offset = 0;
192 	sg->length = obj->base.size;
193 
194 	sg_dma_address(sg) = phys->busaddr;
195 	sg_dma_len(sg) = obj->base.size;
196 
197 	obj->phys_handle = phys;
198 
199 	__i915_gem_object_set_pages(obj, st, sg->length);
200 
201 	return 0;
202 
203 err_phys:
204 	drm_pci_free(obj->base.dev, phys);
205 
206 	return err;
207 }
208 
209 static void __start_cpu_write(struct drm_i915_gem_object *obj)
210 {
211 	obj->read_domains = I915_GEM_DOMAIN_CPU;
212 	obj->write_domain = I915_GEM_DOMAIN_CPU;
213 	if (cpu_write_needs_clflush(obj))
214 		obj->cache_dirty = true;
215 }
216 
217 void
218 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
219 				struct sg_table *pages,
220 				bool needs_clflush)
221 {
222 	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
223 
224 	if (obj->mm.madv == I915_MADV_DONTNEED)
225 		obj->mm.dirty = false;
226 
227 	if (needs_clflush &&
228 	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
229 	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
230 		drm_clflush_sg(pages);
231 
232 	__start_cpu_write(obj);
233 }
234 
235 static void
236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
237 			       struct sg_table *pages)
238 {
239 	__i915_gem_object_release_shmem(obj, pages, false);
240 
241 	if (obj->mm.dirty) {
242 		struct address_space *mapping = obj->base.filp->f_mapping;
243 		char *vaddr = obj->phys_handle->vaddr;
244 		int i;
245 
246 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
247 			struct page *page;
248 			char *dst;
249 
250 			page = shmem_read_mapping_page(mapping, i);
251 			if (IS_ERR(page))
252 				continue;
253 
254 			dst = kmap_atomic(page);
255 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
256 			memcpy(dst, vaddr, PAGE_SIZE);
257 			kunmap_atomic(dst);
258 
259 			set_page_dirty(page);
260 			if (obj->mm.madv == I915_MADV_WILLNEED)
261 				mark_page_accessed(page);
262 			put_page(page);
263 			vaddr += PAGE_SIZE;
264 		}
265 		obj->mm.dirty = false;
266 	}
267 
268 	sg_free_table(pages);
269 	kfree(pages);
270 
271 	drm_pci_free(obj->base.dev, obj->phys_handle);
272 }
273 
274 static void
275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
276 {
277 	i915_gem_object_unpin_pages(obj);
278 }
279 
280 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
281 	.get_pages = i915_gem_object_get_pages_phys,
282 	.put_pages = i915_gem_object_put_pages_phys,
283 	.release = i915_gem_object_release_phys,
284 };
285 
286 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
287 
288 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
289 {
290 	struct i915_vma *vma;
291 	LIST_HEAD(still_in_list);
292 	int ret;
293 
294 	lockdep_assert_held(&obj->base.dev->struct_mutex);
295 
296 	/* Closed vma are removed from the obj->vma_list - but they may
297 	 * still have an active binding on the object. To remove those we
298 	 * must wait for all rendering to complete to the object (as unbinding
299 	 * must anyway), and retire the requests.
300 	 */
301 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
302 	if (ret)
303 		return ret;
304 
305 	spin_lock(&obj->vma.lock);
306 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
307 						       struct i915_vma,
308 						       obj_link))) {
309 		list_move_tail(&vma->obj_link, &still_in_list);
310 		spin_unlock(&obj->vma.lock);
311 
312 		ret = i915_vma_unbind(vma);
313 
314 		spin_lock(&obj->vma.lock);
315 	}
316 	list_splice(&still_in_list, &obj->vma.list);
317 	spin_unlock(&obj->vma.lock);
318 
319 	return ret;
320 }
321 
322 static long
323 i915_gem_object_wait_fence(struct dma_fence *fence,
324 			   unsigned int flags,
325 			   long timeout)
326 {
327 	struct i915_request *rq;
328 
329 	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
330 
331 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
332 		return timeout;
333 
334 	if (!dma_fence_is_i915(fence))
335 		return dma_fence_wait_timeout(fence,
336 					      flags & I915_WAIT_INTERRUPTIBLE,
337 					      timeout);
338 
339 	rq = to_request(fence);
340 	if (i915_request_completed(rq))
341 		goto out;
342 
343 	timeout = i915_request_wait(rq, flags, timeout);
344 
345 out:
346 	if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
347 		i915_request_retire_upto(rq);
348 
349 	return timeout;
350 }
351 
352 static long
353 i915_gem_object_wait_reservation(struct reservation_object *resv,
354 				 unsigned int flags,
355 				 long timeout)
356 {
357 	unsigned int seq = __read_seqcount_begin(&resv->seq);
358 	struct dma_fence *excl;
359 	bool prune_fences = false;
360 
361 	if (flags & I915_WAIT_ALL) {
362 		struct dma_fence **shared;
363 		unsigned int count, i;
364 		int ret;
365 
366 		ret = reservation_object_get_fences_rcu(resv,
367 							&excl, &count, &shared);
368 		if (ret)
369 			return ret;
370 
371 		for (i = 0; i < count; i++) {
372 			timeout = i915_gem_object_wait_fence(shared[i],
373 							     flags, timeout);
374 			if (timeout < 0)
375 				break;
376 
377 			dma_fence_put(shared[i]);
378 		}
379 
380 		for (; i < count; i++)
381 			dma_fence_put(shared[i]);
382 		kfree(shared);
383 
384 		/*
385 		 * If both shared fences and an exclusive fence exist,
386 		 * then by construction the shared fences must be later
387 		 * than the exclusive fence. If we successfully wait for
388 		 * all the shared fences, we know that the exclusive fence
389 		 * must all be signaled. If all the shared fences are
390 		 * signaled, we can prune the array and recover the
391 		 * floating references on the fences/requests.
392 		 */
393 		prune_fences = count && timeout >= 0;
394 	} else {
395 		excl = reservation_object_get_excl_rcu(resv);
396 	}
397 
398 	if (excl && timeout >= 0)
399 		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
400 
401 	dma_fence_put(excl);
402 
403 	/*
404 	 * Opportunistically prune the fences iff we know they have *all* been
405 	 * signaled and that the reservation object has not been changed (i.e.
406 	 * no new fences have been added).
407 	 */
408 	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
409 		if (reservation_object_trylock(resv)) {
410 			if (!__read_seqcount_retry(&resv->seq, seq))
411 				reservation_object_add_excl_fence(resv, NULL);
412 			reservation_object_unlock(resv);
413 		}
414 	}
415 
416 	return timeout;
417 }
418 
419 static void __fence_set_priority(struct dma_fence *fence,
420 				 const struct i915_sched_attr *attr)
421 {
422 	struct i915_request *rq;
423 	struct intel_engine_cs *engine;
424 
425 	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
426 		return;
427 
428 	rq = to_request(fence);
429 	engine = rq->engine;
430 
431 	local_bh_disable();
432 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
433 	if (engine->schedule)
434 		engine->schedule(rq, attr);
435 	rcu_read_unlock();
436 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
437 }
438 
439 static void fence_set_priority(struct dma_fence *fence,
440 			       const struct i915_sched_attr *attr)
441 {
442 	/* Recurse once into a fence-array */
443 	if (dma_fence_is_array(fence)) {
444 		struct dma_fence_array *array = to_dma_fence_array(fence);
445 		int i;
446 
447 		for (i = 0; i < array->num_fences; i++)
448 			__fence_set_priority(array->fences[i], attr);
449 	} else {
450 		__fence_set_priority(fence, attr);
451 	}
452 }
453 
454 int
455 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
456 			      unsigned int flags,
457 			      const struct i915_sched_attr *attr)
458 {
459 	struct dma_fence *excl;
460 
461 	if (flags & I915_WAIT_ALL) {
462 		struct dma_fence **shared;
463 		unsigned int count, i;
464 		int ret;
465 
466 		ret = reservation_object_get_fences_rcu(obj->resv,
467 							&excl, &count, &shared);
468 		if (ret)
469 			return ret;
470 
471 		for (i = 0; i < count; i++) {
472 			fence_set_priority(shared[i], attr);
473 			dma_fence_put(shared[i]);
474 		}
475 
476 		kfree(shared);
477 	} else {
478 		excl = reservation_object_get_excl_rcu(obj->resv);
479 	}
480 
481 	if (excl) {
482 		fence_set_priority(excl, attr);
483 		dma_fence_put(excl);
484 	}
485 	return 0;
486 }
487 
488 /**
489  * Waits for rendering to the object to be completed
490  * @obj: i915 gem object
491  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
492  * @timeout: how long to wait
493  */
494 int
495 i915_gem_object_wait(struct drm_i915_gem_object *obj,
496 		     unsigned int flags,
497 		     long timeout)
498 {
499 	might_sleep();
500 	GEM_BUG_ON(timeout < 0);
501 
502 	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
503 	return timeout < 0 ? timeout : 0;
504 }
505 
506 static int
507 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
508 		     struct drm_i915_gem_pwrite *args,
509 		     struct drm_file *file)
510 {
511 	void *vaddr = obj->phys_handle->vaddr + args->offset;
512 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
513 
514 	/* We manually control the domain here and pretend that it
515 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
516 	 */
517 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
518 	if (copy_from_user(vaddr, user_data, args->size))
519 		return -EFAULT;
520 
521 	drm_clflush_virt_range(vaddr, args->size);
522 	i915_gem_chipset_flush(to_i915(obj->base.dev));
523 
524 	intel_fb_obj_flush(obj, ORIGIN_CPU);
525 	return 0;
526 }
527 
528 static int
529 i915_gem_create(struct drm_file *file,
530 		struct drm_i915_private *dev_priv,
531 		u64 *size_p,
532 		u32 *handle_p)
533 {
534 	struct drm_i915_gem_object *obj;
535 	u32 handle;
536 	u64 size;
537 	int ret;
538 
539 	size = round_up(*size_p, PAGE_SIZE);
540 	if (size == 0)
541 		return -EINVAL;
542 
543 	/* Allocate the new object */
544 	obj = i915_gem_object_create(dev_priv, size);
545 	if (IS_ERR(obj))
546 		return PTR_ERR(obj);
547 
548 	ret = drm_gem_handle_create(file, &obj->base, &handle);
549 	/* drop reference from allocate - handle holds it now */
550 	i915_gem_object_put(obj);
551 	if (ret)
552 		return ret;
553 
554 	*handle_p = handle;
555 	*size_p = size;
556 	return 0;
557 }
558 
559 int
560 i915_gem_dumb_create(struct drm_file *file,
561 		     struct drm_device *dev,
562 		     struct drm_mode_create_dumb *args)
563 {
564 	int cpp = DIV_ROUND_UP(args->bpp, 8);
565 	u32 format;
566 
567 	switch (cpp) {
568 	case 1:
569 		format = DRM_FORMAT_C8;
570 		break;
571 	case 2:
572 		format = DRM_FORMAT_RGB565;
573 		break;
574 	case 4:
575 		format = DRM_FORMAT_XRGB8888;
576 		break;
577 	default:
578 		return -EINVAL;
579 	}
580 
581 	/* have to work out size/pitch and return them */
582 	args->pitch = ALIGN(args->width * cpp, 64);
583 
584 	/* align stride to page size so that we can remap */
585 	if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
586 						    DRM_FORMAT_MOD_LINEAR))
587 		args->pitch = ALIGN(args->pitch, 4096);
588 
589 	args->size = args->pitch * args->height;
590 	return i915_gem_create(file, to_i915(dev),
591 			       &args->size, &args->handle);
592 }
593 
594 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
595 {
596 	return !(obj->cache_level == I915_CACHE_NONE ||
597 		 obj->cache_level == I915_CACHE_WT);
598 }
599 
600 /**
601  * Creates a new mm object and returns a handle to it.
602  * @dev: drm device pointer
603  * @data: ioctl data blob
604  * @file: drm file pointer
605  */
606 int
607 i915_gem_create_ioctl(struct drm_device *dev, void *data,
608 		      struct drm_file *file)
609 {
610 	struct drm_i915_private *dev_priv = to_i915(dev);
611 	struct drm_i915_gem_create *args = data;
612 
613 	i915_gem_flush_free_objects(dev_priv);
614 
615 	return i915_gem_create(file, dev_priv,
616 			       &args->size, &args->handle);
617 }
618 
619 static inline enum fb_op_origin
620 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
621 {
622 	return (domain == I915_GEM_DOMAIN_GTT ?
623 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
624 }
625 
626 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
627 {
628 	intel_wakeref_t wakeref;
629 
630 	/*
631 	 * No actual flushing is required for the GTT write domain for reads
632 	 * from the GTT domain. Writes to it "immediately" go to main memory
633 	 * as far as we know, so there's no chipset flush. It also doesn't
634 	 * land in the GPU render cache.
635 	 *
636 	 * However, we do have to enforce the order so that all writes through
637 	 * the GTT land before any writes to the device, such as updates to
638 	 * the GATT itself.
639 	 *
640 	 * We also have to wait a bit for the writes to land from the GTT.
641 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
642 	 * timing. This issue has only been observed when switching quickly
643 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
644 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
645 	 * system agents we cannot reproduce this behaviour, until Cannonlake
646 	 * that was!).
647 	 */
648 
649 	wmb();
650 
651 	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
652 		return;
653 
654 	i915_gem_chipset_flush(dev_priv);
655 
656 	with_intel_runtime_pm(dev_priv, wakeref) {
657 		spin_lock_irq(&dev_priv->uncore.lock);
658 
659 		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
660 
661 		spin_unlock_irq(&dev_priv->uncore.lock);
662 	}
663 }
664 
665 static void
666 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
667 {
668 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
669 	struct i915_vma *vma;
670 
671 	if (!(obj->write_domain & flush_domains))
672 		return;
673 
674 	switch (obj->write_domain) {
675 	case I915_GEM_DOMAIN_GTT:
676 		i915_gem_flush_ggtt_writes(dev_priv);
677 
678 		intel_fb_obj_flush(obj,
679 				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
680 
681 		for_each_ggtt_vma(vma, obj) {
682 			if (vma->iomap)
683 				continue;
684 
685 			i915_vma_unset_ggtt_write(vma);
686 		}
687 		break;
688 
689 	case I915_GEM_DOMAIN_WC:
690 		wmb();
691 		break;
692 
693 	case I915_GEM_DOMAIN_CPU:
694 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
695 		break;
696 
697 	case I915_GEM_DOMAIN_RENDER:
698 		if (gpu_write_needs_clflush(obj))
699 			obj->cache_dirty = true;
700 		break;
701 	}
702 
703 	obj->write_domain = 0;
704 }
705 
706 /*
707  * Pins the specified object's pages and synchronizes the object with
708  * GPU accesses. Sets needs_clflush to non-zero if the caller should
709  * flush the object from the CPU cache.
710  */
711 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
712 				    unsigned int *needs_clflush)
713 {
714 	int ret;
715 
716 	lockdep_assert_held(&obj->base.dev->struct_mutex);
717 
718 	*needs_clflush = 0;
719 	if (!i915_gem_object_has_struct_page(obj))
720 		return -ENODEV;
721 
722 	ret = i915_gem_object_wait(obj,
723 				   I915_WAIT_INTERRUPTIBLE |
724 				   I915_WAIT_LOCKED,
725 				   MAX_SCHEDULE_TIMEOUT);
726 	if (ret)
727 		return ret;
728 
729 	ret = i915_gem_object_pin_pages(obj);
730 	if (ret)
731 		return ret;
732 
733 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
734 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
735 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
736 		if (ret)
737 			goto err_unpin;
738 		else
739 			goto out;
740 	}
741 
742 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
743 
744 	/* If we're not in the cpu read domain, set ourself into the gtt
745 	 * read domain and manually flush cachelines (if required). This
746 	 * optimizes for the case when the gpu will dirty the data
747 	 * anyway again before the next pread happens.
748 	 */
749 	if (!obj->cache_dirty &&
750 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
751 		*needs_clflush = CLFLUSH_BEFORE;
752 
753 out:
754 	/* return with the pages pinned */
755 	return 0;
756 
757 err_unpin:
758 	i915_gem_object_unpin_pages(obj);
759 	return ret;
760 }
761 
762 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
763 				     unsigned int *needs_clflush)
764 {
765 	int ret;
766 
767 	lockdep_assert_held(&obj->base.dev->struct_mutex);
768 
769 	*needs_clflush = 0;
770 	if (!i915_gem_object_has_struct_page(obj))
771 		return -ENODEV;
772 
773 	ret = i915_gem_object_wait(obj,
774 				   I915_WAIT_INTERRUPTIBLE |
775 				   I915_WAIT_LOCKED |
776 				   I915_WAIT_ALL,
777 				   MAX_SCHEDULE_TIMEOUT);
778 	if (ret)
779 		return ret;
780 
781 	ret = i915_gem_object_pin_pages(obj);
782 	if (ret)
783 		return ret;
784 
785 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
786 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
787 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
788 		if (ret)
789 			goto err_unpin;
790 		else
791 			goto out;
792 	}
793 
794 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
795 
796 	/* If we're not in the cpu write domain, set ourself into the
797 	 * gtt write domain and manually flush cachelines (as required).
798 	 * This optimizes for the case when the gpu will use the data
799 	 * right away and we therefore have to clflush anyway.
800 	 */
801 	if (!obj->cache_dirty) {
802 		*needs_clflush |= CLFLUSH_AFTER;
803 
804 		/*
805 		 * Same trick applies to invalidate partially written
806 		 * cachelines read before writing.
807 		 */
808 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
809 			*needs_clflush |= CLFLUSH_BEFORE;
810 	}
811 
812 out:
813 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
814 	obj->mm.dirty = true;
815 	/* return with the pages pinned */
816 	return 0;
817 
818 err_unpin:
819 	i915_gem_object_unpin_pages(obj);
820 	return ret;
821 }
822 
823 static int
824 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
825 	    bool needs_clflush)
826 {
827 	char *vaddr;
828 	int ret;
829 
830 	vaddr = kmap(page);
831 
832 	if (needs_clflush)
833 		drm_clflush_virt_range(vaddr + offset, len);
834 
835 	ret = __copy_to_user(user_data, vaddr + offset, len);
836 
837 	kunmap(page);
838 
839 	return ret ? -EFAULT : 0;
840 }
841 
842 static int
843 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
844 		     struct drm_i915_gem_pread *args)
845 {
846 	char __user *user_data;
847 	u64 remain;
848 	unsigned int needs_clflush;
849 	unsigned int idx, offset;
850 	int ret;
851 
852 	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
853 	if (ret)
854 		return ret;
855 
856 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
857 	mutex_unlock(&obj->base.dev->struct_mutex);
858 	if (ret)
859 		return ret;
860 
861 	remain = args->size;
862 	user_data = u64_to_user_ptr(args->data_ptr);
863 	offset = offset_in_page(args->offset);
864 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
865 		struct page *page = i915_gem_object_get_page(obj, idx);
866 		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
867 
868 		ret = shmem_pread(page, offset, length, user_data,
869 				  needs_clflush);
870 		if (ret)
871 			break;
872 
873 		remain -= length;
874 		user_data += length;
875 		offset = 0;
876 	}
877 
878 	i915_gem_obj_finish_shmem_access(obj);
879 	return ret;
880 }
881 
882 static inline bool
883 gtt_user_read(struct io_mapping *mapping,
884 	      loff_t base, int offset,
885 	      char __user *user_data, int length)
886 {
887 	void __iomem *vaddr;
888 	unsigned long unwritten;
889 
890 	/* We can use the cpu mem copy function because this is X86. */
891 	vaddr = io_mapping_map_atomic_wc(mapping, base);
892 	unwritten = __copy_to_user_inatomic(user_data,
893 					    (void __force *)vaddr + offset,
894 					    length);
895 	io_mapping_unmap_atomic(vaddr);
896 	if (unwritten) {
897 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
898 		unwritten = copy_to_user(user_data,
899 					 (void __force *)vaddr + offset,
900 					 length);
901 		io_mapping_unmap(vaddr);
902 	}
903 	return unwritten;
904 }
905 
906 static int
907 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
908 		   const struct drm_i915_gem_pread *args)
909 {
910 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
911 	struct i915_ggtt *ggtt = &i915->ggtt;
912 	intel_wakeref_t wakeref;
913 	struct drm_mm_node node;
914 	struct i915_vma *vma;
915 	void __user *user_data;
916 	u64 remain, offset;
917 	int ret;
918 
919 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
920 	if (ret)
921 		return ret;
922 
923 	wakeref = intel_runtime_pm_get(i915);
924 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
925 				       PIN_MAPPABLE |
926 				       PIN_NONFAULT |
927 				       PIN_NONBLOCK);
928 	if (!IS_ERR(vma)) {
929 		node.start = i915_ggtt_offset(vma);
930 		node.allocated = false;
931 		ret = i915_vma_put_fence(vma);
932 		if (ret) {
933 			i915_vma_unpin(vma);
934 			vma = ERR_PTR(ret);
935 		}
936 	}
937 	if (IS_ERR(vma)) {
938 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
939 		if (ret)
940 			goto out_unlock;
941 		GEM_BUG_ON(!node.allocated);
942 	}
943 
944 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
945 	if (ret)
946 		goto out_unpin;
947 
948 	mutex_unlock(&i915->drm.struct_mutex);
949 
950 	user_data = u64_to_user_ptr(args->data_ptr);
951 	remain = args->size;
952 	offset = args->offset;
953 
954 	while (remain > 0) {
955 		/* Operation in this page
956 		 *
957 		 * page_base = page offset within aperture
958 		 * page_offset = offset within page
959 		 * page_length = bytes to copy for this page
960 		 */
961 		u32 page_base = node.start;
962 		unsigned page_offset = offset_in_page(offset);
963 		unsigned page_length = PAGE_SIZE - page_offset;
964 		page_length = remain < page_length ? remain : page_length;
965 		if (node.allocated) {
966 			wmb();
967 			ggtt->vm.insert_page(&ggtt->vm,
968 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
969 					     node.start, I915_CACHE_NONE, 0);
970 			wmb();
971 		} else {
972 			page_base += offset & PAGE_MASK;
973 		}
974 
975 		if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
976 				  user_data, page_length)) {
977 			ret = -EFAULT;
978 			break;
979 		}
980 
981 		remain -= page_length;
982 		user_data += page_length;
983 		offset += page_length;
984 	}
985 
986 	mutex_lock(&i915->drm.struct_mutex);
987 out_unpin:
988 	if (node.allocated) {
989 		wmb();
990 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
991 		remove_mappable_node(&node);
992 	} else {
993 		i915_vma_unpin(vma);
994 	}
995 out_unlock:
996 	intel_runtime_pm_put(i915, wakeref);
997 	mutex_unlock(&i915->drm.struct_mutex);
998 
999 	return ret;
1000 }
1001 
1002 /**
1003  * Reads data from the object referenced by handle.
1004  * @dev: drm device pointer
1005  * @data: ioctl data blob
1006  * @file: drm file pointer
1007  *
1008  * On error, the contents of *data are undefined.
1009  */
1010 int
1011 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1012 		     struct drm_file *file)
1013 {
1014 	struct drm_i915_gem_pread *args = data;
1015 	struct drm_i915_gem_object *obj;
1016 	int ret;
1017 
1018 	if (args->size == 0)
1019 		return 0;
1020 
1021 	if (!access_ok(u64_to_user_ptr(args->data_ptr),
1022 		       args->size))
1023 		return -EFAULT;
1024 
1025 	obj = i915_gem_object_lookup(file, args->handle);
1026 	if (!obj)
1027 		return -ENOENT;
1028 
1029 	/* Bounds check source.  */
1030 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1031 		ret = -EINVAL;
1032 		goto out;
1033 	}
1034 
1035 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1036 
1037 	ret = i915_gem_object_wait(obj,
1038 				   I915_WAIT_INTERRUPTIBLE,
1039 				   MAX_SCHEDULE_TIMEOUT);
1040 	if (ret)
1041 		goto out;
1042 
1043 	ret = i915_gem_object_pin_pages(obj);
1044 	if (ret)
1045 		goto out;
1046 
1047 	ret = i915_gem_shmem_pread(obj, args);
1048 	if (ret == -EFAULT || ret == -ENODEV)
1049 		ret = i915_gem_gtt_pread(obj, args);
1050 
1051 	i915_gem_object_unpin_pages(obj);
1052 out:
1053 	i915_gem_object_put(obj);
1054 	return ret;
1055 }
1056 
1057 /* This is the fast write path which cannot handle
1058  * page faults in the source data
1059  */
1060 
1061 static inline bool
1062 ggtt_write(struct io_mapping *mapping,
1063 	   loff_t base, int offset,
1064 	   char __user *user_data, int length)
1065 {
1066 	void __iomem *vaddr;
1067 	unsigned long unwritten;
1068 
1069 	/* We can use the cpu mem copy function because this is X86. */
1070 	vaddr = io_mapping_map_atomic_wc(mapping, base);
1071 	unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1072 						      user_data, length);
1073 	io_mapping_unmap_atomic(vaddr);
1074 	if (unwritten) {
1075 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1076 		unwritten = copy_from_user((void __force *)vaddr + offset,
1077 					   user_data, length);
1078 		io_mapping_unmap(vaddr);
1079 	}
1080 
1081 	return unwritten;
1082 }
1083 
1084 /**
1085  * This is the fast pwrite path, where we copy the data directly from the
1086  * user into the GTT, uncached.
1087  * @obj: i915 GEM object
1088  * @args: pwrite arguments structure
1089  */
1090 static int
1091 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1092 			 const struct drm_i915_gem_pwrite *args)
1093 {
1094 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1095 	struct i915_ggtt *ggtt = &i915->ggtt;
1096 	intel_wakeref_t wakeref;
1097 	struct drm_mm_node node;
1098 	struct i915_vma *vma;
1099 	u64 remain, offset;
1100 	void __user *user_data;
1101 	int ret;
1102 
1103 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1104 	if (ret)
1105 		return ret;
1106 
1107 	if (i915_gem_object_has_struct_page(obj)) {
1108 		/*
1109 		 * Avoid waking the device up if we can fallback, as
1110 		 * waking/resuming is very slow (worst-case 10-100 ms
1111 		 * depending on PCI sleeps and our own resume time).
1112 		 * This easily dwarfs any performance advantage from
1113 		 * using the cache bypass of indirect GGTT access.
1114 		 */
1115 		wakeref = intel_runtime_pm_get_if_in_use(i915);
1116 		if (!wakeref) {
1117 			ret = -EFAULT;
1118 			goto out_unlock;
1119 		}
1120 	} else {
1121 		/* No backing pages, no fallback, we must force GGTT access */
1122 		wakeref = intel_runtime_pm_get(i915);
1123 	}
1124 
1125 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1126 				       PIN_MAPPABLE |
1127 				       PIN_NONFAULT |
1128 				       PIN_NONBLOCK);
1129 	if (!IS_ERR(vma)) {
1130 		node.start = i915_ggtt_offset(vma);
1131 		node.allocated = false;
1132 		ret = i915_vma_put_fence(vma);
1133 		if (ret) {
1134 			i915_vma_unpin(vma);
1135 			vma = ERR_PTR(ret);
1136 		}
1137 	}
1138 	if (IS_ERR(vma)) {
1139 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1140 		if (ret)
1141 			goto out_rpm;
1142 		GEM_BUG_ON(!node.allocated);
1143 	}
1144 
1145 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1146 	if (ret)
1147 		goto out_unpin;
1148 
1149 	mutex_unlock(&i915->drm.struct_mutex);
1150 
1151 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1152 
1153 	user_data = u64_to_user_ptr(args->data_ptr);
1154 	offset = args->offset;
1155 	remain = args->size;
1156 	while (remain) {
1157 		/* Operation in this page
1158 		 *
1159 		 * page_base = page offset within aperture
1160 		 * page_offset = offset within page
1161 		 * page_length = bytes to copy for this page
1162 		 */
1163 		u32 page_base = node.start;
1164 		unsigned int page_offset = offset_in_page(offset);
1165 		unsigned int page_length = PAGE_SIZE - page_offset;
1166 		page_length = remain < page_length ? remain : page_length;
1167 		if (node.allocated) {
1168 			wmb(); /* flush the write before we modify the GGTT */
1169 			ggtt->vm.insert_page(&ggtt->vm,
1170 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1171 					     node.start, I915_CACHE_NONE, 0);
1172 			wmb(); /* flush modifications to the GGTT (insert_page) */
1173 		} else {
1174 			page_base += offset & PAGE_MASK;
1175 		}
1176 		/* If we get a fault while copying data, then (presumably) our
1177 		 * source page isn't available.  Return the error and we'll
1178 		 * retry in the slow path.
1179 		 * If the object is non-shmem backed, we retry again with the
1180 		 * path that handles page fault.
1181 		 */
1182 		if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1183 			       user_data, page_length)) {
1184 			ret = -EFAULT;
1185 			break;
1186 		}
1187 
1188 		remain -= page_length;
1189 		user_data += page_length;
1190 		offset += page_length;
1191 	}
1192 	intel_fb_obj_flush(obj, ORIGIN_CPU);
1193 
1194 	mutex_lock(&i915->drm.struct_mutex);
1195 out_unpin:
1196 	if (node.allocated) {
1197 		wmb();
1198 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1199 		remove_mappable_node(&node);
1200 	} else {
1201 		i915_vma_unpin(vma);
1202 	}
1203 out_rpm:
1204 	intel_runtime_pm_put(i915, wakeref);
1205 out_unlock:
1206 	mutex_unlock(&i915->drm.struct_mutex);
1207 	return ret;
1208 }
1209 
1210 /* Per-page copy function for the shmem pwrite fastpath.
1211  * Flushes invalid cachelines before writing to the target if
1212  * needs_clflush_before is set and flushes out any written cachelines after
1213  * writing if needs_clflush is set.
1214  */
1215 static int
1216 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1217 	     bool needs_clflush_before,
1218 	     bool needs_clflush_after)
1219 {
1220 	char *vaddr;
1221 	int ret;
1222 
1223 	vaddr = kmap(page);
1224 
1225 	if (needs_clflush_before)
1226 		drm_clflush_virt_range(vaddr + offset, len);
1227 
1228 	ret = __copy_from_user(vaddr + offset, user_data, len);
1229 	if (!ret && needs_clflush_after)
1230 		drm_clflush_virt_range(vaddr + offset, len);
1231 
1232 	kunmap(page);
1233 
1234 	return ret ? -EFAULT : 0;
1235 }
1236 
1237 static int
1238 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1239 		      const struct drm_i915_gem_pwrite *args)
1240 {
1241 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1242 	void __user *user_data;
1243 	u64 remain;
1244 	unsigned int partial_cacheline_write;
1245 	unsigned int needs_clflush;
1246 	unsigned int offset, idx;
1247 	int ret;
1248 
1249 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1250 	if (ret)
1251 		return ret;
1252 
1253 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1254 	mutex_unlock(&i915->drm.struct_mutex);
1255 	if (ret)
1256 		return ret;
1257 
1258 	/* If we don't overwrite a cacheline completely we need to be
1259 	 * careful to have up-to-date data by first clflushing. Don't
1260 	 * overcomplicate things and flush the entire patch.
1261 	 */
1262 	partial_cacheline_write = 0;
1263 	if (needs_clflush & CLFLUSH_BEFORE)
1264 		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1265 
1266 	user_data = u64_to_user_ptr(args->data_ptr);
1267 	remain = args->size;
1268 	offset = offset_in_page(args->offset);
1269 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1270 		struct page *page = i915_gem_object_get_page(obj, idx);
1271 		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1272 
1273 		ret = shmem_pwrite(page, offset, length, user_data,
1274 				   (offset | length) & partial_cacheline_write,
1275 				   needs_clflush & CLFLUSH_AFTER);
1276 		if (ret)
1277 			break;
1278 
1279 		remain -= length;
1280 		user_data += length;
1281 		offset = 0;
1282 	}
1283 
1284 	intel_fb_obj_flush(obj, ORIGIN_CPU);
1285 	i915_gem_obj_finish_shmem_access(obj);
1286 	return ret;
1287 }
1288 
1289 /**
1290  * Writes data to the object referenced by handle.
1291  * @dev: drm device
1292  * @data: ioctl data blob
1293  * @file: drm file
1294  *
1295  * On error, the contents of the buffer that were to be modified are undefined.
1296  */
1297 int
1298 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1299 		      struct drm_file *file)
1300 {
1301 	struct drm_i915_gem_pwrite *args = data;
1302 	struct drm_i915_gem_object *obj;
1303 	int ret;
1304 
1305 	if (args->size == 0)
1306 		return 0;
1307 
1308 	if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1309 		return -EFAULT;
1310 
1311 	obj = i915_gem_object_lookup(file, args->handle);
1312 	if (!obj)
1313 		return -ENOENT;
1314 
1315 	/* Bounds check destination. */
1316 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1317 		ret = -EINVAL;
1318 		goto err;
1319 	}
1320 
1321 	/* Writes not allowed into this read-only object */
1322 	if (i915_gem_object_is_readonly(obj)) {
1323 		ret = -EINVAL;
1324 		goto err;
1325 	}
1326 
1327 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1328 
1329 	ret = -ENODEV;
1330 	if (obj->ops->pwrite)
1331 		ret = obj->ops->pwrite(obj, args);
1332 	if (ret != -ENODEV)
1333 		goto err;
1334 
1335 	ret = i915_gem_object_wait(obj,
1336 				   I915_WAIT_INTERRUPTIBLE |
1337 				   I915_WAIT_ALL,
1338 				   MAX_SCHEDULE_TIMEOUT);
1339 	if (ret)
1340 		goto err;
1341 
1342 	ret = i915_gem_object_pin_pages(obj);
1343 	if (ret)
1344 		goto err;
1345 
1346 	ret = -EFAULT;
1347 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1348 	 * it would end up going through the fenced access, and we'll get
1349 	 * different detiling behavior between reading and writing.
1350 	 * pread/pwrite currently are reading and writing from the CPU
1351 	 * perspective, requiring manual detiling by the client.
1352 	 */
1353 	if (!i915_gem_object_has_struct_page(obj) ||
1354 	    cpu_write_needs_clflush(obj))
1355 		/* Note that the gtt paths might fail with non-page-backed user
1356 		 * pointers (e.g. gtt mappings when moving data between
1357 		 * textures). Fallback to the shmem path in that case.
1358 		 */
1359 		ret = i915_gem_gtt_pwrite_fast(obj, args);
1360 
1361 	if (ret == -EFAULT || ret == -ENOSPC) {
1362 		if (obj->phys_handle)
1363 			ret = i915_gem_phys_pwrite(obj, args, file);
1364 		else
1365 			ret = i915_gem_shmem_pwrite(obj, args);
1366 	}
1367 
1368 	i915_gem_object_unpin_pages(obj);
1369 err:
1370 	i915_gem_object_put(obj);
1371 	return ret;
1372 }
1373 
1374 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1375 {
1376 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1377 	struct list_head *list;
1378 	struct i915_vma *vma;
1379 
1380 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1381 
1382 	mutex_lock(&i915->ggtt.vm.mutex);
1383 	for_each_ggtt_vma(vma, obj) {
1384 		if (!drm_mm_node_allocated(&vma->node))
1385 			continue;
1386 
1387 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1388 	}
1389 	mutex_unlock(&i915->ggtt.vm.mutex);
1390 
1391 	spin_lock(&i915->mm.obj_lock);
1392 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1393 	list_move_tail(&obj->mm.link, list);
1394 	spin_unlock(&i915->mm.obj_lock);
1395 }
1396 
1397 /**
1398  * Called when user space prepares to use an object with the CPU, either
1399  * through the mmap ioctl's mapping or a GTT mapping.
1400  * @dev: drm device
1401  * @data: ioctl data blob
1402  * @file: drm file
1403  */
1404 int
1405 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1406 			  struct drm_file *file)
1407 {
1408 	struct drm_i915_gem_set_domain *args = data;
1409 	struct drm_i915_gem_object *obj;
1410 	u32 read_domains = args->read_domains;
1411 	u32 write_domain = args->write_domain;
1412 	int err;
1413 
1414 	/* Only handle setting domains to types used by the CPU. */
1415 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1416 		return -EINVAL;
1417 
1418 	/*
1419 	 * Having something in the write domain implies it's in the read
1420 	 * domain, and only that read domain.  Enforce that in the request.
1421 	 */
1422 	if (write_domain && read_domains != write_domain)
1423 		return -EINVAL;
1424 
1425 	if (!read_domains)
1426 		return 0;
1427 
1428 	obj = i915_gem_object_lookup(file, args->handle);
1429 	if (!obj)
1430 		return -ENOENT;
1431 
1432 	/*
1433 	 * Already in the desired write domain? Nothing for us to do!
1434 	 *
1435 	 * We apply a little bit of cunning here to catch a broader set of
1436 	 * no-ops. If obj->write_domain is set, we must be in the same
1437 	 * obj->read_domains, and only that domain. Therefore, if that
1438 	 * obj->write_domain matches the request read_domains, we are
1439 	 * already in the same read/write domain and can skip the operation,
1440 	 * without having to further check the requested write_domain.
1441 	 */
1442 	if (READ_ONCE(obj->write_domain) == read_domains) {
1443 		err = 0;
1444 		goto out;
1445 	}
1446 
1447 	/*
1448 	 * Try to flush the object off the GPU without holding the lock.
1449 	 * We will repeat the flush holding the lock in the normal manner
1450 	 * to catch cases where we are gazumped.
1451 	 */
1452 	err = i915_gem_object_wait(obj,
1453 				   I915_WAIT_INTERRUPTIBLE |
1454 				   I915_WAIT_PRIORITY |
1455 				   (write_domain ? I915_WAIT_ALL : 0),
1456 				   MAX_SCHEDULE_TIMEOUT);
1457 	if (err)
1458 		goto out;
1459 
1460 	/*
1461 	 * Proxy objects do not control access to the backing storage, ergo
1462 	 * they cannot be used as a means to manipulate the cache domain
1463 	 * tracking for that backing storage. The proxy object is always
1464 	 * considered to be outside of any cache domain.
1465 	 */
1466 	if (i915_gem_object_is_proxy(obj)) {
1467 		err = -ENXIO;
1468 		goto out;
1469 	}
1470 
1471 	/*
1472 	 * Flush and acquire obj->pages so that we are coherent through
1473 	 * direct access in memory with previous cached writes through
1474 	 * shmemfs and that our cache domain tracking remains valid.
1475 	 * For example, if the obj->filp was moved to swap without us
1476 	 * being notified and releasing the pages, we would mistakenly
1477 	 * continue to assume that the obj remained out of the CPU cached
1478 	 * domain.
1479 	 */
1480 	err = i915_gem_object_pin_pages(obj);
1481 	if (err)
1482 		goto out;
1483 
1484 	err = i915_mutex_lock_interruptible(dev);
1485 	if (err)
1486 		goto out_unpin;
1487 
1488 	if (read_domains & I915_GEM_DOMAIN_WC)
1489 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1490 	else if (read_domains & I915_GEM_DOMAIN_GTT)
1491 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1492 	else
1493 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1494 
1495 	/* And bump the LRU for this access */
1496 	i915_gem_object_bump_inactive_ggtt(obj);
1497 
1498 	mutex_unlock(&dev->struct_mutex);
1499 
1500 	if (write_domain != 0)
1501 		intel_fb_obj_invalidate(obj,
1502 					fb_write_origin(obj, write_domain));
1503 
1504 out_unpin:
1505 	i915_gem_object_unpin_pages(obj);
1506 out:
1507 	i915_gem_object_put(obj);
1508 	return err;
1509 }
1510 
1511 /**
1512  * Called when user space has done writes to this buffer
1513  * @dev: drm device
1514  * @data: ioctl data blob
1515  * @file: drm file
1516  */
1517 int
1518 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1519 			 struct drm_file *file)
1520 {
1521 	struct drm_i915_gem_sw_finish *args = data;
1522 	struct drm_i915_gem_object *obj;
1523 
1524 	obj = i915_gem_object_lookup(file, args->handle);
1525 	if (!obj)
1526 		return -ENOENT;
1527 
1528 	/*
1529 	 * Proxy objects are barred from CPU access, so there is no
1530 	 * need to ban sw_finish as it is a nop.
1531 	 */
1532 
1533 	/* Pinned buffers may be scanout, so flush the cache */
1534 	i915_gem_object_flush_if_display(obj);
1535 	i915_gem_object_put(obj);
1536 
1537 	return 0;
1538 }
1539 
1540 static inline bool
1541 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1542 	      unsigned long addr, unsigned long size)
1543 {
1544 	if (vma->vm_file != filp)
1545 		return false;
1546 
1547 	return vma->vm_start == addr &&
1548 	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1549 }
1550 
1551 /**
1552  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1553  *			 it is mapped to.
1554  * @dev: drm device
1555  * @data: ioctl data blob
1556  * @file: drm file
1557  *
1558  * While the mapping holds a reference on the contents of the object, it doesn't
1559  * imply a ref on the object itself.
1560  *
1561  * IMPORTANT:
1562  *
1563  * DRM driver writers who look a this function as an example for how to do GEM
1564  * mmap support, please don't implement mmap support like here. The modern way
1565  * to implement DRM mmap support is with an mmap offset ioctl (like
1566  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1567  * That way debug tooling like valgrind will understand what's going on, hiding
1568  * the mmap call in a driver private ioctl will break that. The i915 driver only
1569  * does cpu mmaps this way because we didn't know better.
1570  */
1571 int
1572 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1573 		    struct drm_file *file)
1574 {
1575 	struct drm_i915_gem_mmap *args = data;
1576 	struct drm_i915_gem_object *obj;
1577 	unsigned long addr;
1578 
1579 	if (args->flags & ~(I915_MMAP_WC))
1580 		return -EINVAL;
1581 
1582 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1583 		return -ENODEV;
1584 
1585 	obj = i915_gem_object_lookup(file, args->handle);
1586 	if (!obj)
1587 		return -ENOENT;
1588 
1589 	/* prime objects have no backing filp to GEM mmap
1590 	 * pages from.
1591 	 */
1592 	if (!obj->base.filp) {
1593 		addr = -ENXIO;
1594 		goto err;
1595 	}
1596 
1597 	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1598 		addr = -EINVAL;
1599 		goto err;
1600 	}
1601 
1602 	addr = vm_mmap(obj->base.filp, 0, args->size,
1603 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1604 		       args->offset);
1605 	if (IS_ERR_VALUE(addr))
1606 		goto err;
1607 
1608 	if (args->flags & I915_MMAP_WC) {
1609 		struct mm_struct *mm = current->mm;
1610 		struct vm_area_struct *vma;
1611 
1612 		if (down_write_killable(&mm->mmap_sem)) {
1613 			addr = -EINTR;
1614 			goto err;
1615 		}
1616 		vma = find_vma(mm, addr);
1617 		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1618 			vma->vm_page_prot =
1619 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1620 		else
1621 			addr = -ENOMEM;
1622 		up_write(&mm->mmap_sem);
1623 		if (IS_ERR_VALUE(addr))
1624 			goto err;
1625 
1626 		/* This may race, but that's ok, it only gets set */
1627 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1628 	}
1629 	i915_gem_object_put(obj);
1630 
1631 	args->addr_ptr = (u64)addr;
1632 	return 0;
1633 
1634 err:
1635 	i915_gem_object_put(obj);
1636 	return addr;
1637 }
1638 
1639 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1640 {
1641 	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1642 }
1643 
1644 /**
1645  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1646  *
1647  * A history of the GTT mmap interface:
1648  *
1649  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1650  *     aligned and suitable for fencing, and still fit into the available
1651  *     mappable space left by the pinned display objects. A classic problem
1652  *     we called the page-fault-of-doom where we would ping-pong between
1653  *     two objects that could not fit inside the GTT and so the memcpy
1654  *     would page one object in at the expense of the other between every
1655  *     single byte.
1656  *
1657  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1658  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1659  *     object is too large for the available space (or simply too large
1660  *     for the mappable aperture!), a view is created instead and faulted
1661  *     into userspace. (This view is aligned and sized appropriately for
1662  *     fenced access.)
1663  *
1664  * 2 - Recognise WC as a separate cache domain so that we can flush the
1665  *     delayed writes via GTT before performing direct access via WC.
1666  *
1667  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1668  *     pagefault; swapin remains transparent.
1669  *
1670  * Restrictions:
1671  *
1672  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1673  *    hangs on some architectures, corruption on others. An attempt to service
1674  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1675  *
1676  *  * the object must be able to fit into RAM (physical memory, though no
1677  *    limited to the mappable aperture).
1678  *
1679  *
1680  * Caveats:
1681  *
1682  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1683  *    all data to system memory. Subsequent access will not be synchronized.
1684  *
1685  *  * all mappings are revoked on runtime device suspend.
1686  *
1687  *  * there are only 8, 16 or 32 fence registers to share between all users
1688  *    (older machines require fence register for display and blitter access
1689  *    as well). Contention of the fence registers will cause the previous users
1690  *    to be unmapped and any new access will generate new page faults.
1691  *
1692  *  * running out of memory while servicing a fault may generate a SIGBUS,
1693  *    rather than the expected SIGSEGV.
1694  */
1695 int i915_gem_mmap_gtt_version(void)
1696 {
1697 	return 3;
1698 }
1699 
1700 static inline struct i915_ggtt_view
1701 compute_partial_view(const struct drm_i915_gem_object *obj,
1702 		     pgoff_t page_offset,
1703 		     unsigned int chunk)
1704 {
1705 	struct i915_ggtt_view view;
1706 
1707 	if (i915_gem_object_is_tiled(obj))
1708 		chunk = roundup(chunk, tile_row_pages(obj));
1709 
1710 	view.type = I915_GGTT_VIEW_PARTIAL;
1711 	view.partial.offset = rounddown(page_offset, chunk);
1712 	view.partial.size =
1713 		min_t(unsigned int, chunk,
1714 		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1715 
1716 	/* If the partial covers the entire object, just create a normal VMA. */
1717 	if (chunk >= obj->base.size >> PAGE_SHIFT)
1718 		view.type = I915_GGTT_VIEW_NORMAL;
1719 
1720 	return view;
1721 }
1722 
1723 /**
1724  * i915_gem_fault - fault a page into the GTT
1725  * @vmf: fault info
1726  *
1727  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1728  * from userspace.  The fault handler takes care of binding the object to
1729  * the GTT (if needed), allocating and programming a fence register (again,
1730  * only if needed based on whether the old reg is still valid or the object
1731  * is tiled) and inserting a new PTE into the faulting process.
1732  *
1733  * Note that the faulting process may involve evicting existing objects
1734  * from the GTT and/or fence registers to make room.  So performance may
1735  * suffer if the GTT working set is large or there are few fence registers
1736  * left.
1737  *
1738  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1739  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1740  */
1741 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1742 {
1743 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1744 	struct vm_area_struct *area = vmf->vma;
1745 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1746 	struct drm_device *dev = obj->base.dev;
1747 	struct drm_i915_private *dev_priv = to_i915(dev);
1748 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1749 	bool write = area->vm_flags & VM_WRITE;
1750 	intel_wakeref_t wakeref;
1751 	struct i915_vma *vma;
1752 	pgoff_t page_offset;
1753 	int srcu;
1754 	int ret;
1755 
1756 	/* Sanity check that we allow writing into this object */
1757 	if (i915_gem_object_is_readonly(obj) && write)
1758 		return VM_FAULT_SIGBUS;
1759 
1760 	/* We don't use vmf->pgoff since that has the fake offset */
1761 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1762 
1763 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1764 
1765 	ret = i915_gem_object_pin_pages(obj);
1766 	if (ret)
1767 		goto err;
1768 
1769 	wakeref = intel_runtime_pm_get(dev_priv);
1770 
1771 	srcu = i915_reset_trylock(dev_priv);
1772 	if (srcu < 0) {
1773 		ret = srcu;
1774 		goto err_rpm;
1775 	}
1776 
1777 	ret = i915_mutex_lock_interruptible(dev);
1778 	if (ret)
1779 		goto err_reset;
1780 
1781 	/* Access to snoopable pages through the GTT is incoherent. */
1782 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1783 		ret = -EFAULT;
1784 		goto err_unlock;
1785 	}
1786 
1787 	/* Now pin it into the GTT as needed */
1788 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1789 				       PIN_MAPPABLE |
1790 				       PIN_NONBLOCK |
1791 				       PIN_NONFAULT);
1792 	if (IS_ERR(vma)) {
1793 		/* Use a partial view if it is bigger than available space */
1794 		struct i915_ggtt_view view =
1795 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1796 		unsigned int flags;
1797 
1798 		flags = PIN_MAPPABLE;
1799 		if (view.type == I915_GGTT_VIEW_NORMAL)
1800 			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1801 
1802 		/*
1803 		 * Userspace is now writing through an untracked VMA, abandon
1804 		 * all hope that the hardware is able to track future writes.
1805 		 */
1806 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1807 
1808 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1809 		if (IS_ERR(vma) && !view.type) {
1810 			flags = PIN_MAPPABLE;
1811 			view.type = I915_GGTT_VIEW_PARTIAL;
1812 			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1813 		}
1814 	}
1815 	if (IS_ERR(vma)) {
1816 		ret = PTR_ERR(vma);
1817 		goto err_unlock;
1818 	}
1819 
1820 	ret = i915_vma_pin_fence(vma);
1821 	if (ret)
1822 		goto err_unpin;
1823 
1824 	/* Finally, remap it using the new GTT offset */
1825 	ret = remap_io_mapping(area,
1826 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1827 			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1828 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
1829 			       &ggtt->iomap);
1830 	if (ret)
1831 		goto err_fence;
1832 
1833 	/* Mark as being mmapped into userspace for later revocation */
1834 	assert_rpm_wakelock_held(dev_priv);
1835 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1836 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1837 	GEM_BUG_ON(!obj->userfault_count);
1838 
1839 	i915_vma_set_ggtt_write(vma);
1840 
1841 err_fence:
1842 	i915_vma_unpin_fence(vma);
1843 err_unpin:
1844 	__i915_vma_unpin(vma);
1845 err_unlock:
1846 	mutex_unlock(&dev->struct_mutex);
1847 err_reset:
1848 	i915_reset_unlock(dev_priv, srcu);
1849 err_rpm:
1850 	intel_runtime_pm_put(dev_priv, wakeref);
1851 	i915_gem_object_unpin_pages(obj);
1852 err:
1853 	switch (ret) {
1854 	case -EIO:
1855 		/*
1856 		 * We eat errors when the gpu is terminally wedged to avoid
1857 		 * userspace unduly crashing (gl has no provisions for mmaps to
1858 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1859 		 * and so needs to be reported.
1860 		 */
1861 		if (!i915_terminally_wedged(dev_priv))
1862 			return VM_FAULT_SIGBUS;
1863 		/* else: fall through */
1864 	case -EAGAIN:
1865 		/*
1866 		 * EAGAIN means the gpu is hung and we'll wait for the error
1867 		 * handler to reset everything when re-faulting in
1868 		 * i915_mutex_lock_interruptible.
1869 		 */
1870 	case 0:
1871 	case -ERESTARTSYS:
1872 	case -EINTR:
1873 	case -EBUSY:
1874 		/*
1875 		 * EBUSY is ok: this just means that another thread
1876 		 * already did the job.
1877 		 */
1878 		return VM_FAULT_NOPAGE;
1879 	case -ENOMEM:
1880 		return VM_FAULT_OOM;
1881 	case -ENOSPC:
1882 	case -EFAULT:
1883 		return VM_FAULT_SIGBUS;
1884 	default:
1885 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1886 		return VM_FAULT_SIGBUS;
1887 	}
1888 }
1889 
1890 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1891 {
1892 	struct i915_vma *vma;
1893 
1894 	GEM_BUG_ON(!obj->userfault_count);
1895 
1896 	obj->userfault_count = 0;
1897 	list_del(&obj->userfault_link);
1898 	drm_vma_node_unmap(&obj->base.vma_node,
1899 			   obj->base.dev->anon_inode->i_mapping);
1900 
1901 	for_each_ggtt_vma(vma, obj)
1902 		i915_vma_unset_userfault(vma);
1903 }
1904 
1905 /**
1906  * i915_gem_release_mmap - remove physical page mappings
1907  * @obj: obj in question
1908  *
1909  * Preserve the reservation of the mmapping with the DRM core code, but
1910  * relinquish ownership of the pages back to the system.
1911  *
1912  * It is vital that we remove the page mapping if we have mapped a tiled
1913  * object through the GTT and then lose the fence register due to
1914  * resource pressure. Similarly if the object has been moved out of the
1915  * aperture, than pages mapped into userspace must be revoked. Removing the
1916  * mapping will then trigger a page fault on the next user access, allowing
1917  * fixup by i915_gem_fault().
1918  */
1919 void
1920 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1921 {
1922 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1923 	intel_wakeref_t wakeref;
1924 
1925 	/* Serialisation between user GTT access and our code depends upon
1926 	 * revoking the CPU's PTE whilst the mutex is held. The next user
1927 	 * pagefault then has to wait until we release the mutex.
1928 	 *
1929 	 * Note that RPM complicates somewhat by adding an additional
1930 	 * requirement that operations to the GGTT be made holding the RPM
1931 	 * wakeref.
1932 	 */
1933 	lockdep_assert_held(&i915->drm.struct_mutex);
1934 	wakeref = intel_runtime_pm_get(i915);
1935 
1936 	if (!obj->userfault_count)
1937 		goto out;
1938 
1939 	__i915_gem_object_release_mmap(obj);
1940 
1941 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
1942 	 * memory transactions from userspace before we return. The TLB
1943 	 * flushing implied above by changing the PTE above *should* be
1944 	 * sufficient, an extra barrier here just provides us with a bit
1945 	 * of paranoid documentation about our requirement to serialise
1946 	 * memory writes before touching registers / GSM.
1947 	 */
1948 	wmb();
1949 
1950 out:
1951 	intel_runtime_pm_put(i915, wakeref);
1952 }
1953 
1954 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1955 {
1956 	struct drm_i915_gem_object *obj, *on;
1957 	int i;
1958 
1959 	/*
1960 	 * Only called during RPM suspend. All users of the userfault_list
1961 	 * must be holding an RPM wakeref to ensure that this can not
1962 	 * run concurrently with themselves (and use the struct_mutex for
1963 	 * protection between themselves).
1964 	 */
1965 
1966 	list_for_each_entry_safe(obj, on,
1967 				 &dev_priv->mm.userfault_list, userfault_link)
1968 		__i915_gem_object_release_mmap(obj);
1969 
1970 	/* The fence will be lost when the device powers down. If any were
1971 	 * in use by hardware (i.e. they are pinned), we should not be powering
1972 	 * down! All other fences will be reacquired by the user upon waking.
1973 	 */
1974 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
1975 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1976 
1977 		/* Ideally we want to assert that the fence register is not
1978 		 * live at this point (i.e. that no piece of code will be
1979 		 * trying to write through fence + GTT, as that both violates
1980 		 * our tracking of activity and associated locking/barriers,
1981 		 * but also is illegal given that the hw is powered down).
1982 		 *
1983 		 * Previously we used reg->pin_count as a "liveness" indicator.
1984 		 * That is not sufficient, and we need a more fine-grained
1985 		 * tool if we want to have a sanity check here.
1986 		 */
1987 
1988 		if (!reg->vma)
1989 			continue;
1990 
1991 		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1992 		reg->dirty = true;
1993 	}
1994 }
1995 
1996 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1997 {
1998 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1999 	int err;
2000 
2001 	err = drm_gem_create_mmap_offset(&obj->base);
2002 	if (likely(!err))
2003 		return 0;
2004 
2005 	/* Attempt to reap some mmap space from dead objects */
2006 	do {
2007 		err = i915_gem_wait_for_idle(dev_priv,
2008 					     I915_WAIT_INTERRUPTIBLE,
2009 					     MAX_SCHEDULE_TIMEOUT);
2010 		if (err)
2011 			break;
2012 
2013 		i915_gem_drain_freed_objects(dev_priv);
2014 		err = drm_gem_create_mmap_offset(&obj->base);
2015 		if (!err)
2016 			break;
2017 
2018 	} while (flush_delayed_work(&dev_priv->gem.retire_work));
2019 
2020 	return err;
2021 }
2022 
2023 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2024 {
2025 	drm_gem_free_mmap_offset(&obj->base);
2026 }
2027 
2028 int
2029 i915_gem_mmap_gtt(struct drm_file *file,
2030 		  struct drm_device *dev,
2031 		  u32 handle,
2032 		  u64 *offset)
2033 {
2034 	struct drm_i915_gem_object *obj;
2035 	int ret;
2036 
2037 	obj = i915_gem_object_lookup(file, handle);
2038 	if (!obj)
2039 		return -ENOENT;
2040 
2041 	ret = i915_gem_object_create_mmap_offset(obj);
2042 	if (ret == 0)
2043 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2044 
2045 	i915_gem_object_put(obj);
2046 	return ret;
2047 }
2048 
2049 /**
2050  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2051  * @dev: DRM device
2052  * @data: GTT mapping ioctl data
2053  * @file: GEM object info
2054  *
2055  * Simply returns the fake offset to userspace so it can mmap it.
2056  * The mmap call will end up in drm_gem_mmap(), which will set things
2057  * up so we can get faults in the handler above.
2058  *
2059  * The fault handler will take care of binding the object into the GTT
2060  * (since it may have been evicted to make room for something), allocating
2061  * a fence register, and mapping the appropriate aperture address into
2062  * userspace.
2063  */
2064 int
2065 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2066 			struct drm_file *file)
2067 {
2068 	struct drm_i915_gem_mmap_gtt *args = data;
2069 
2070 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2071 }
2072 
2073 /* Immediately discard the backing storage */
2074 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2075 {
2076 	i915_gem_object_free_mmap_offset(obj);
2077 
2078 	if (obj->base.filp == NULL)
2079 		return;
2080 
2081 	/* Our goal here is to return as much of the memory as
2082 	 * is possible back to the system as we are called from OOM.
2083 	 * To do this we must instruct the shmfs to drop all of its
2084 	 * backing pages, *now*.
2085 	 */
2086 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2087 	obj->mm.madv = __I915_MADV_PURGED;
2088 	obj->mm.pages = ERR_PTR(-EFAULT);
2089 }
2090 
2091 /*
2092  * Move pages to appropriate lru and release the pagevec, decrementing the
2093  * ref count of those pages.
2094  */
2095 static void check_release_pagevec(struct pagevec *pvec)
2096 {
2097 	check_move_unevictable_pages(pvec);
2098 	__pagevec_release(pvec);
2099 	cond_resched();
2100 }
2101 
2102 static void
2103 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2104 			      struct sg_table *pages)
2105 {
2106 	struct sgt_iter sgt_iter;
2107 	struct pagevec pvec;
2108 	struct page *page;
2109 
2110 	__i915_gem_object_release_shmem(obj, pages, true);
2111 	i915_gem_gtt_finish_pages(obj, pages);
2112 
2113 	if (i915_gem_object_needs_bit17_swizzle(obj))
2114 		i915_gem_object_save_bit_17_swizzle(obj, pages);
2115 
2116 	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2117 
2118 	pagevec_init(&pvec);
2119 	for_each_sgt_page(page, sgt_iter, pages) {
2120 		if (obj->mm.dirty)
2121 			set_page_dirty(page);
2122 
2123 		if (obj->mm.madv == I915_MADV_WILLNEED)
2124 			mark_page_accessed(page);
2125 
2126 		if (!pagevec_add(&pvec, page))
2127 			check_release_pagevec(&pvec);
2128 	}
2129 	if (pagevec_count(&pvec))
2130 		check_release_pagevec(&pvec);
2131 	obj->mm.dirty = false;
2132 
2133 	sg_free_table(pages);
2134 	kfree(pages);
2135 }
2136 
2137 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2138 {
2139 	struct radix_tree_iter iter;
2140 	void __rcu **slot;
2141 
2142 	rcu_read_lock();
2143 	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2144 		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2145 	rcu_read_unlock();
2146 }
2147 
2148 static struct sg_table *
2149 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2150 {
2151 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
2152 	struct sg_table *pages;
2153 
2154 	pages = fetch_and_zero(&obj->mm.pages);
2155 	if (IS_ERR_OR_NULL(pages))
2156 		return pages;
2157 
2158 	spin_lock(&i915->mm.obj_lock);
2159 	list_del(&obj->mm.link);
2160 	spin_unlock(&i915->mm.obj_lock);
2161 
2162 	if (obj->mm.mapping) {
2163 		void *ptr;
2164 
2165 		ptr = page_mask_bits(obj->mm.mapping);
2166 		if (is_vmalloc_addr(ptr))
2167 			vunmap(ptr);
2168 		else
2169 			kunmap(kmap_to_page(ptr));
2170 
2171 		obj->mm.mapping = NULL;
2172 	}
2173 
2174 	__i915_gem_object_reset_page_iter(obj);
2175 	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2176 
2177 	return pages;
2178 }
2179 
2180 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2181 				enum i915_mm_subclass subclass)
2182 {
2183 	struct sg_table *pages;
2184 	int ret;
2185 
2186 	if (i915_gem_object_has_pinned_pages(obj))
2187 		return -EBUSY;
2188 
2189 	GEM_BUG_ON(obj->bind_count);
2190 
2191 	/* May be called by shrinker from within get_pages() (on another bo) */
2192 	mutex_lock_nested(&obj->mm.lock, subclass);
2193 	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2194 		ret = -EBUSY;
2195 		goto unlock;
2196 	}
2197 
2198 	/*
2199 	 * ->put_pages might need to allocate memory for the bit17 swizzle
2200 	 * array, hence protect them from being reaped by removing them from gtt
2201 	 * lists early.
2202 	 */
2203 	pages = __i915_gem_object_unset_pages(obj);
2204 
2205 	/*
2206 	 * XXX Temporary hijinx to avoid updating all backends to handle
2207 	 * NULL pages. In the future, when we have more asynchronous
2208 	 * get_pages backends we should be better able to handle the
2209 	 * cancellation of the async task in a more uniform manner.
2210 	 */
2211 	if (!pages && !i915_gem_object_needs_async_cancel(obj))
2212 		pages = ERR_PTR(-EINVAL);
2213 
2214 	if (!IS_ERR(pages))
2215 		obj->ops->put_pages(obj, pages);
2216 
2217 	ret = 0;
2218 unlock:
2219 	mutex_unlock(&obj->mm.lock);
2220 
2221 	return ret;
2222 }
2223 
2224 bool i915_sg_trim(struct sg_table *orig_st)
2225 {
2226 	struct sg_table new_st;
2227 	struct scatterlist *sg, *new_sg;
2228 	unsigned int i;
2229 
2230 	if (orig_st->nents == orig_st->orig_nents)
2231 		return false;
2232 
2233 	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2234 		return false;
2235 
2236 	new_sg = new_st.sgl;
2237 	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2238 		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2239 		sg_dma_address(new_sg) = sg_dma_address(sg);
2240 		sg_dma_len(new_sg) = sg_dma_len(sg);
2241 
2242 		new_sg = sg_next(new_sg);
2243 	}
2244 	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2245 
2246 	sg_free_table(orig_st);
2247 
2248 	*orig_st = new_st;
2249 	return true;
2250 }
2251 
2252 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2253 {
2254 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2255 	const unsigned long page_count = obj->base.size / PAGE_SIZE;
2256 	unsigned long i;
2257 	struct address_space *mapping;
2258 	struct sg_table *st;
2259 	struct scatterlist *sg;
2260 	struct sgt_iter sgt_iter;
2261 	struct page *page;
2262 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2263 	unsigned int max_segment = i915_sg_segment_size();
2264 	unsigned int sg_page_sizes;
2265 	struct pagevec pvec;
2266 	gfp_t noreclaim;
2267 	int ret;
2268 
2269 	/*
2270 	 * Assert that the object is not currently in any GPU domain. As it
2271 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2272 	 * a GPU cache
2273 	 */
2274 	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2275 	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2276 
2277 	/*
2278 	 * If there's no chance of allocating enough pages for the whole
2279 	 * object, bail early.
2280 	 */
2281 	if (page_count > totalram_pages())
2282 		return -ENOMEM;
2283 
2284 	st = kmalloc(sizeof(*st), GFP_KERNEL);
2285 	if (st == NULL)
2286 		return -ENOMEM;
2287 
2288 rebuild_st:
2289 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2290 		kfree(st);
2291 		return -ENOMEM;
2292 	}
2293 
2294 	/*
2295 	 * Get the list of pages out of our struct file.  They'll be pinned
2296 	 * at this point until we release them.
2297 	 *
2298 	 * Fail silently without starting the shrinker
2299 	 */
2300 	mapping = obj->base.filp->f_mapping;
2301 	mapping_set_unevictable(mapping);
2302 	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2303 	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2304 
2305 	sg = st->sgl;
2306 	st->nents = 0;
2307 	sg_page_sizes = 0;
2308 	for (i = 0; i < page_count; i++) {
2309 		const unsigned int shrink[] = {
2310 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2311 			0,
2312 		}, *s = shrink;
2313 		gfp_t gfp = noreclaim;
2314 
2315 		do {
2316 			cond_resched();
2317 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2318 			if (!IS_ERR(page))
2319 				break;
2320 
2321 			if (!*s) {
2322 				ret = PTR_ERR(page);
2323 				goto err_sg;
2324 			}
2325 
2326 			i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2327 
2328 			/*
2329 			 * We've tried hard to allocate the memory by reaping
2330 			 * our own buffer, now let the real VM do its job and
2331 			 * go down in flames if truly OOM.
2332 			 *
2333 			 * However, since graphics tend to be disposable,
2334 			 * defer the oom here by reporting the ENOMEM back
2335 			 * to userspace.
2336 			 */
2337 			if (!*s) {
2338 				/* reclaim and warn, but no oom */
2339 				gfp = mapping_gfp_mask(mapping);
2340 
2341 				/*
2342 				 * Our bo are always dirty and so we require
2343 				 * kswapd to reclaim our pages (direct reclaim
2344 				 * does not effectively begin pageout of our
2345 				 * buffers on its own). However, direct reclaim
2346 				 * only waits for kswapd when under allocation
2347 				 * congestion. So as a result __GFP_RECLAIM is
2348 				 * unreliable and fails to actually reclaim our
2349 				 * dirty pages -- unless you try over and over
2350 				 * again with !__GFP_NORETRY. However, we still
2351 				 * want to fail this allocation rather than
2352 				 * trigger the out-of-memory killer and for
2353 				 * this we want __GFP_RETRY_MAYFAIL.
2354 				 */
2355 				gfp |= __GFP_RETRY_MAYFAIL;
2356 			}
2357 		} while (1);
2358 
2359 		if (!i ||
2360 		    sg->length >= max_segment ||
2361 		    page_to_pfn(page) != last_pfn + 1) {
2362 			if (i) {
2363 				sg_page_sizes |= sg->length;
2364 				sg = sg_next(sg);
2365 			}
2366 			st->nents++;
2367 			sg_set_page(sg, page, PAGE_SIZE, 0);
2368 		} else {
2369 			sg->length += PAGE_SIZE;
2370 		}
2371 		last_pfn = page_to_pfn(page);
2372 
2373 		/* Check that the i965g/gm workaround works. */
2374 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2375 	}
2376 	if (sg) { /* loop terminated early; short sg table */
2377 		sg_page_sizes |= sg->length;
2378 		sg_mark_end(sg);
2379 	}
2380 
2381 	/* Trim unused sg entries to avoid wasting memory. */
2382 	i915_sg_trim(st);
2383 
2384 	ret = i915_gem_gtt_prepare_pages(obj, st);
2385 	if (ret) {
2386 		/*
2387 		 * DMA remapping failed? One possible cause is that
2388 		 * it could not reserve enough large entries, asking
2389 		 * for PAGE_SIZE chunks instead may be helpful.
2390 		 */
2391 		if (max_segment > PAGE_SIZE) {
2392 			for_each_sgt_page(page, sgt_iter, st)
2393 				put_page(page);
2394 			sg_free_table(st);
2395 
2396 			max_segment = PAGE_SIZE;
2397 			goto rebuild_st;
2398 		} else {
2399 			dev_warn(&dev_priv->drm.pdev->dev,
2400 				 "Failed to DMA remap %lu pages\n",
2401 				 page_count);
2402 			goto err_pages;
2403 		}
2404 	}
2405 
2406 	if (i915_gem_object_needs_bit17_swizzle(obj))
2407 		i915_gem_object_do_bit_17_swizzle(obj, st);
2408 
2409 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
2410 
2411 	return 0;
2412 
2413 err_sg:
2414 	sg_mark_end(sg);
2415 err_pages:
2416 	mapping_clear_unevictable(mapping);
2417 	pagevec_init(&pvec);
2418 	for_each_sgt_page(page, sgt_iter, st) {
2419 		if (!pagevec_add(&pvec, page))
2420 			check_release_pagevec(&pvec);
2421 	}
2422 	if (pagevec_count(&pvec))
2423 		check_release_pagevec(&pvec);
2424 	sg_free_table(st);
2425 	kfree(st);
2426 
2427 	/*
2428 	 * shmemfs first checks if there is enough memory to allocate the page
2429 	 * and reports ENOSPC should there be insufficient, along with the usual
2430 	 * ENOMEM for a genuine allocation failure.
2431 	 *
2432 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2433 	 * space and so want to translate the error from shmemfs back to our
2434 	 * usual understanding of ENOMEM.
2435 	 */
2436 	if (ret == -ENOSPC)
2437 		ret = -ENOMEM;
2438 
2439 	return ret;
2440 }
2441 
2442 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2443 				 struct sg_table *pages,
2444 				 unsigned int sg_page_sizes)
2445 {
2446 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
2447 	unsigned long supported = INTEL_INFO(i915)->page_sizes;
2448 	int i;
2449 
2450 	lockdep_assert_held(&obj->mm.lock);
2451 
2452 	/* Make the pages coherent with the GPU (flushing any swapin). */
2453 	if (obj->cache_dirty) {
2454 		obj->write_domain = 0;
2455 		if (i915_gem_object_has_struct_page(obj))
2456 			drm_clflush_sg(pages);
2457 		obj->cache_dirty = false;
2458 	}
2459 
2460 	obj->mm.get_page.sg_pos = pages->sgl;
2461 	obj->mm.get_page.sg_idx = 0;
2462 
2463 	obj->mm.pages = pages;
2464 
2465 	if (i915_gem_object_is_tiled(obj) &&
2466 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2467 		GEM_BUG_ON(obj->mm.quirked);
2468 		__i915_gem_object_pin_pages(obj);
2469 		obj->mm.quirked = true;
2470 	}
2471 
2472 	GEM_BUG_ON(!sg_page_sizes);
2473 	obj->mm.page_sizes.phys = sg_page_sizes;
2474 
2475 	/*
2476 	 * Calculate the supported page-sizes which fit into the given
2477 	 * sg_page_sizes. This will give us the page-sizes which we may be able
2478 	 * to use opportunistically when later inserting into the GTT. For
2479 	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
2480 	 * 64K or 4K pages, although in practice this will depend on a number of
2481 	 * other factors.
2482 	 */
2483 	obj->mm.page_sizes.sg = 0;
2484 	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2485 		if (obj->mm.page_sizes.phys & ~0u << i)
2486 			obj->mm.page_sizes.sg |= BIT(i);
2487 	}
2488 	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2489 
2490 	spin_lock(&i915->mm.obj_lock);
2491 	list_add(&obj->mm.link, &i915->mm.unbound_list);
2492 	spin_unlock(&i915->mm.obj_lock);
2493 }
2494 
2495 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2496 {
2497 	int err;
2498 
2499 	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2500 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2501 		return -EFAULT;
2502 	}
2503 
2504 	err = obj->ops->get_pages(obj);
2505 	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2506 
2507 	return err;
2508 }
2509 
2510 /* Ensure that the associated pages are gathered from the backing storage
2511  * and pinned into our object. i915_gem_object_pin_pages() may be called
2512  * multiple times before they are released by a single call to
2513  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2514  * either as a result of memory pressure (reaping pages under the shrinker)
2515  * or as the object is itself released.
2516  */
2517 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2518 {
2519 	int err;
2520 
2521 	err = mutex_lock_interruptible(&obj->mm.lock);
2522 	if (err)
2523 		return err;
2524 
2525 	if (unlikely(!i915_gem_object_has_pages(obj))) {
2526 		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2527 
2528 		err = ____i915_gem_object_get_pages(obj);
2529 		if (err)
2530 			goto unlock;
2531 
2532 		smp_mb__before_atomic();
2533 	}
2534 	atomic_inc(&obj->mm.pages_pin_count);
2535 
2536 unlock:
2537 	mutex_unlock(&obj->mm.lock);
2538 	return err;
2539 }
2540 
2541 /* The 'mapping' part of i915_gem_object_pin_map() below */
2542 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2543 				 enum i915_map_type type)
2544 {
2545 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2546 	struct sg_table *sgt = obj->mm.pages;
2547 	struct sgt_iter sgt_iter;
2548 	struct page *page;
2549 	struct page *stack_pages[32];
2550 	struct page **pages = stack_pages;
2551 	unsigned long i = 0;
2552 	pgprot_t pgprot;
2553 	void *addr;
2554 
2555 	/* A single page can always be kmapped */
2556 	if (n_pages == 1 && type == I915_MAP_WB)
2557 		return kmap(sg_page(sgt->sgl));
2558 
2559 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2560 		/* Too big for stack -- allocate temporary array instead */
2561 		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2562 		if (!pages)
2563 			return NULL;
2564 	}
2565 
2566 	for_each_sgt_page(page, sgt_iter, sgt)
2567 		pages[i++] = page;
2568 
2569 	/* Check that we have the expected number of pages */
2570 	GEM_BUG_ON(i != n_pages);
2571 
2572 	switch (type) {
2573 	default:
2574 		MISSING_CASE(type);
2575 		/* fallthrough to use PAGE_KERNEL anyway */
2576 	case I915_MAP_WB:
2577 		pgprot = PAGE_KERNEL;
2578 		break;
2579 	case I915_MAP_WC:
2580 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2581 		break;
2582 	}
2583 	addr = vmap(pages, n_pages, 0, pgprot);
2584 
2585 	if (pages != stack_pages)
2586 		kvfree(pages);
2587 
2588 	return addr;
2589 }
2590 
2591 /* get, pin, and map the pages of the object into kernel space */
2592 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2593 			      enum i915_map_type type)
2594 {
2595 	enum i915_map_type has_type;
2596 	bool pinned;
2597 	void *ptr;
2598 	int ret;
2599 
2600 	if (unlikely(!i915_gem_object_has_struct_page(obj)))
2601 		return ERR_PTR(-ENXIO);
2602 
2603 	ret = mutex_lock_interruptible(&obj->mm.lock);
2604 	if (ret)
2605 		return ERR_PTR(ret);
2606 
2607 	pinned = !(type & I915_MAP_OVERRIDE);
2608 	type &= ~I915_MAP_OVERRIDE;
2609 
2610 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2611 		if (unlikely(!i915_gem_object_has_pages(obj))) {
2612 			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2613 
2614 			ret = ____i915_gem_object_get_pages(obj);
2615 			if (ret)
2616 				goto err_unlock;
2617 
2618 			smp_mb__before_atomic();
2619 		}
2620 		atomic_inc(&obj->mm.pages_pin_count);
2621 		pinned = false;
2622 	}
2623 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2624 
2625 	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2626 	if (ptr && has_type != type) {
2627 		if (pinned) {
2628 			ret = -EBUSY;
2629 			goto err_unpin;
2630 		}
2631 
2632 		if (is_vmalloc_addr(ptr))
2633 			vunmap(ptr);
2634 		else
2635 			kunmap(kmap_to_page(ptr));
2636 
2637 		ptr = obj->mm.mapping = NULL;
2638 	}
2639 
2640 	if (!ptr) {
2641 		ptr = i915_gem_object_map(obj, type);
2642 		if (!ptr) {
2643 			ret = -ENOMEM;
2644 			goto err_unpin;
2645 		}
2646 
2647 		obj->mm.mapping = page_pack_bits(ptr, type);
2648 	}
2649 
2650 out_unlock:
2651 	mutex_unlock(&obj->mm.lock);
2652 	return ptr;
2653 
2654 err_unpin:
2655 	atomic_dec(&obj->mm.pages_pin_count);
2656 err_unlock:
2657 	ptr = ERR_PTR(ret);
2658 	goto out_unlock;
2659 }
2660 
2661 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2662 				 unsigned long offset,
2663 				 unsigned long size)
2664 {
2665 	enum i915_map_type has_type;
2666 	void *ptr;
2667 
2668 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2669 	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2670 				     offset, size, obj->base.size));
2671 
2672 	obj->mm.dirty = true;
2673 
2674 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2675 		return;
2676 
2677 	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2678 	if (has_type == I915_MAP_WC)
2679 		return;
2680 
2681 	drm_clflush_virt_range(ptr + offset, size);
2682 	if (size == obj->base.size) {
2683 		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2684 		obj->cache_dirty = false;
2685 	}
2686 }
2687 
2688 static int
2689 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2690 			   const struct drm_i915_gem_pwrite *arg)
2691 {
2692 	struct address_space *mapping = obj->base.filp->f_mapping;
2693 	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2694 	u64 remain, offset;
2695 	unsigned int pg;
2696 
2697 	/* Caller already validated user args */
2698 	GEM_BUG_ON(!access_ok(user_data, arg->size));
2699 
2700 	/*
2701 	 * Before we instantiate/pin the backing store for our use, we
2702 	 * can prepopulate the shmemfs filp efficiently using a write into
2703 	 * the pagecache. We avoid the penalty of instantiating all the
2704 	 * pages, important if the user is just writing to a few and never
2705 	 * uses the object on the GPU, and using a direct write into shmemfs
2706 	 * allows it to avoid the cost of retrieving a page (either swapin
2707 	 * or clearing-before-use) before it is overwritten.
2708 	 */
2709 	if (i915_gem_object_has_pages(obj))
2710 		return -ENODEV;
2711 
2712 	if (obj->mm.madv != I915_MADV_WILLNEED)
2713 		return -EFAULT;
2714 
2715 	/*
2716 	 * Before the pages are instantiated the object is treated as being
2717 	 * in the CPU domain. The pages will be clflushed as required before
2718 	 * use, and we can freely write into the pages directly. If userspace
2719 	 * races pwrite with any other operation; corruption will ensue -
2720 	 * that is userspace's prerogative!
2721 	 */
2722 
2723 	remain = arg->size;
2724 	offset = arg->offset;
2725 	pg = offset_in_page(offset);
2726 
2727 	do {
2728 		unsigned int len, unwritten;
2729 		struct page *page;
2730 		void *data, *vaddr;
2731 		int err;
2732 		char c;
2733 
2734 		len = PAGE_SIZE - pg;
2735 		if (len > remain)
2736 			len = remain;
2737 
2738 		/* Prefault the user page to reduce potential recursion */
2739 		err = __get_user(c, user_data);
2740 		if (err)
2741 			return err;
2742 
2743 		err = __get_user(c, user_data + len - 1);
2744 		if (err)
2745 			return err;
2746 
2747 		err = pagecache_write_begin(obj->base.filp, mapping,
2748 					    offset, len, 0,
2749 					    &page, &data);
2750 		if (err < 0)
2751 			return err;
2752 
2753 		vaddr = kmap_atomic(page);
2754 		unwritten = __copy_from_user_inatomic(vaddr + pg,
2755 						      user_data,
2756 						      len);
2757 		kunmap_atomic(vaddr);
2758 
2759 		err = pagecache_write_end(obj->base.filp, mapping,
2760 					  offset, len, len - unwritten,
2761 					  page, data);
2762 		if (err < 0)
2763 			return err;
2764 
2765 		/* We don't handle -EFAULT, leave it to the caller to check */
2766 		if (unwritten)
2767 			return -ENODEV;
2768 
2769 		remain -= len;
2770 		user_data += len;
2771 		offset += len;
2772 		pg = 0;
2773 	} while (remain);
2774 
2775 	return 0;
2776 }
2777 
2778 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2779 {
2780 	struct drm_i915_private *i915 = to_i915(gem->dev);
2781 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
2782 	struct drm_i915_file_private *fpriv = file->driver_priv;
2783 	struct i915_lut_handle *lut, *ln;
2784 
2785 	mutex_lock(&i915->drm.struct_mutex);
2786 
2787 	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2788 		struct i915_gem_context *ctx = lut->ctx;
2789 		struct i915_vma *vma;
2790 
2791 		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2792 		if (ctx->file_priv != fpriv)
2793 			continue;
2794 
2795 		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2796 		GEM_BUG_ON(vma->obj != obj);
2797 
2798 		/* We allow the process to have multiple handles to the same
2799 		 * vma, in the same fd namespace, by virtue of flink/open.
2800 		 */
2801 		GEM_BUG_ON(!vma->open_count);
2802 		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
2803 			i915_vma_close(vma);
2804 
2805 		list_del(&lut->obj_link);
2806 		list_del(&lut->ctx_link);
2807 
2808 		i915_lut_handle_free(lut);
2809 		__i915_gem_object_release_unless_active(obj);
2810 	}
2811 
2812 	mutex_unlock(&i915->drm.struct_mutex);
2813 }
2814 
2815 static unsigned long to_wait_timeout(s64 timeout_ns)
2816 {
2817 	if (timeout_ns < 0)
2818 		return MAX_SCHEDULE_TIMEOUT;
2819 
2820 	if (timeout_ns == 0)
2821 		return 0;
2822 
2823 	return nsecs_to_jiffies_timeout(timeout_ns);
2824 }
2825 
2826 /**
2827  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2828  * @dev: drm device pointer
2829  * @data: ioctl data blob
2830  * @file: drm file pointer
2831  *
2832  * Returns 0 if successful, else an error is returned with the remaining time in
2833  * the timeout parameter.
2834  *  -ETIME: object is still busy after timeout
2835  *  -ERESTARTSYS: signal interrupted the wait
2836  *  -ENONENT: object doesn't exist
2837  * Also possible, but rare:
2838  *  -EAGAIN: incomplete, restart syscall
2839  *  -ENOMEM: damn
2840  *  -ENODEV: Internal IRQ fail
2841  *  -E?: The add request failed
2842  *
2843  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2844  * non-zero timeout parameter the wait ioctl will wait for the given number of
2845  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2846  * without holding struct_mutex the object may become re-busied before this
2847  * function completes. A similar but shorter * race condition exists in the busy
2848  * ioctl
2849  */
2850 int
2851 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2852 {
2853 	struct drm_i915_gem_wait *args = data;
2854 	struct drm_i915_gem_object *obj;
2855 	ktime_t start;
2856 	long ret;
2857 
2858 	if (args->flags != 0)
2859 		return -EINVAL;
2860 
2861 	obj = i915_gem_object_lookup(file, args->bo_handle);
2862 	if (!obj)
2863 		return -ENOENT;
2864 
2865 	start = ktime_get();
2866 
2867 	ret = i915_gem_object_wait(obj,
2868 				   I915_WAIT_INTERRUPTIBLE |
2869 				   I915_WAIT_PRIORITY |
2870 				   I915_WAIT_ALL,
2871 				   to_wait_timeout(args->timeout_ns));
2872 
2873 	if (args->timeout_ns > 0) {
2874 		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2875 		if (args->timeout_ns < 0)
2876 			args->timeout_ns = 0;
2877 
2878 		/*
2879 		 * Apparently ktime isn't accurate enough and occasionally has a
2880 		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
2881 		 * things up to make the test happy. We allow up to 1 jiffy.
2882 		 *
2883 		 * This is a regression from the timespec->ktime conversion.
2884 		 */
2885 		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
2886 			args->timeout_ns = 0;
2887 
2888 		/* Asked to wait beyond the jiffie/scheduler precision? */
2889 		if (ret == -ETIME && args->timeout_ns)
2890 			ret = -EAGAIN;
2891 	}
2892 
2893 	i915_gem_object_put(obj);
2894 	return ret;
2895 }
2896 
2897 static int wait_for_engines(struct drm_i915_private *i915)
2898 {
2899 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
2900 		dev_err(i915->drm.dev,
2901 			"Failed to idle engines, declaring wedged!\n");
2902 		GEM_TRACE_DUMP();
2903 		i915_gem_set_wedged(i915);
2904 		return -EIO;
2905 	}
2906 
2907 	return 0;
2908 }
2909 
2910 static long
2911 wait_for_timelines(struct drm_i915_private *i915,
2912 		   unsigned int flags, long timeout)
2913 {
2914 	struct i915_gt_timelines *gt = &i915->gt.timelines;
2915 	struct i915_timeline *tl;
2916 
2917 	mutex_lock(&gt->mutex);
2918 	list_for_each_entry(tl, &gt->active_list, link) {
2919 		struct i915_request *rq;
2920 
2921 		rq = i915_active_request_get_unlocked(&tl->last_request);
2922 		if (!rq)
2923 			continue;
2924 
2925 		mutex_unlock(&gt->mutex);
2926 
2927 		/*
2928 		 * "Race-to-idle".
2929 		 *
2930 		 * Switching to the kernel context is often used a synchronous
2931 		 * step prior to idling, e.g. in suspend for flushing all
2932 		 * current operations to memory before sleeping. These we
2933 		 * want to complete as quickly as possible to avoid prolonged
2934 		 * stalls, so allow the gpu to boost to maximum clocks.
2935 		 */
2936 		if (flags & I915_WAIT_FOR_IDLE_BOOST)
2937 			gen6_rps_boost(rq);
2938 
2939 		timeout = i915_request_wait(rq, flags, timeout);
2940 		i915_request_put(rq);
2941 		if (timeout < 0)
2942 			return timeout;
2943 
2944 		/* restart after reacquiring the lock */
2945 		mutex_lock(&gt->mutex);
2946 		tl = list_entry(&gt->active_list, typeof(*tl), link);
2947 	}
2948 	mutex_unlock(&gt->mutex);
2949 
2950 	return timeout;
2951 }
2952 
2953 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
2954 			   unsigned int flags, long timeout)
2955 {
2956 	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
2957 		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
2958 		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
2959 		  yesno(i915->gt.awake));
2960 
2961 	/* If the device is asleep, we have no requests outstanding */
2962 	if (!READ_ONCE(i915->gt.awake))
2963 		return 0;
2964 
2965 	timeout = wait_for_timelines(i915, flags, timeout);
2966 	if (timeout < 0)
2967 		return timeout;
2968 
2969 	if (flags & I915_WAIT_LOCKED) {
2970 		int err;
2971 
2972 		lockdep_assert_held(&i915->drm.struct_mutex);
2973 
2974 		err = wait_for_engines(i915);
2975 		if (err)
2976 			return err;
2977 
2978 		i915_retire_requests(i915);
2979 	}
2980 
2981 	return 0;
2982 }
2983 
2984 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
2985 {
2986 	/*
2987 	 * We manually flush the CPU domain so that we can override and
2988 	 * force the flush for the display, and perform it asyncrhonously.
2989 	 */
2990 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
2991 	if (obj->cache_dirty)
2992 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
2993 	obj->write_domain = 0;
2994 }
2995 
2996 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
2997 {
2998 	if (!READ_ONCE(obj->pin_global))
2999 		return;
3000 
3001 	mutex_lock(&obj->base.dev->struct_mutex);
3002 	__i915_gem_object_flush_for_display(obj);
3003 	mutex_unlock(&obj->base.dev->struct_mutex);
3004 }
3005 
3006 /**
3007  * Moves a single object to the WC read, and possibly write domain.
3008  * @obj: object to act on
3009  * @write: ask for write access or read only
3010  *
3011  * This function returns when the move is complete, including waiting on
3012  * flushes to occur.
3013  */
3014 int
3015 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3016 {
3017 	int ret;
3018 
3019 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3020 
3021 	ret = i915_gem_object_wait(obj,
3022 				   I915_WAIT_INTERRUPTIBLE |
3023 				   I915_WAIT_LOCKED |
3024 				   (write ? I915_WAIT_ALL : 0),
3025 				   MAX_SCHEDULE_TIMEOUT);
3026 	if (ret)
3027 		return ret;
3028 
3029 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
3030 		return 0;
3031 
3032 	/* Flush and acquire obj->pages so that we are coherent through
3033 	 * direct access in memory with previous cached writes through
3034 	 * shmemfs and that our cache domain tracking remains valid.
3035 	 * For example, if the obj->filp was moved to swap without us
3036 	 * being notified and releasing the pages, we would mistakenly
3037 	 * continue to assume that the obj remained out of the CPU cached
3038 	 * domain.
3039 	 */
3040 	ret = i915_gem_object_pin_pages(obj);
3041 	if (ret)
3042 		return ret;
3043 
3044 	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3045 
3046 	/* Serialise direct access to this object with the barriers for
3047 	 * coherent writes from the GPU, by effectively invalidating the
3048 	 * WC domain upon first access.
3049 	 */
3050 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3051 		mb();
3052 
3053 	/* It should now be out of any other write domains, and we can update
3054 	 * the domain values for our changes.
3055 	 */
3056 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3057 	obj->read_domains |= I915_GEM_DOMAIN_WC;
3058 	if (write) {
3059 		obj->read_domains = I915_GEM_DOMAIN_WC;
3060 		obj->write_domain = I915_GEM_DOMAIN_WC;
3061 		obj->mm.dirty = true;
3062 	}
3063 
3064 	i915_gem_object_unpin_pages(obj);
3065 	return 0;
3066 }
3067 
3068 /**
3069  * Moves a single object to the GTT read, and possibly write domain.
3070  * @obj: object to act on
3071  * @write: ask for write access or read only
3072  *
3073  * This function returns when the move is complete, including waiting on
3074  * flushes to occur.
3075  */
3076 int
3077 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3078 {
3079 	int ret;
3080 
3081 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3082 
3083 	ret = i915_gem_object_wait(obj,
3084 				   I915_WAIT_INTERRUPTIBLE |
3085 				   I915_WAIT_LOCKED |
3086 				   (write ? I915_WAIT_ALL : 0),
3087 				   MAX_SCHEDULE_TIMEOUT);
3088 	if (ret)
3089 		return ret;
3090 
3091 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3092 		return 0;
3093 
3094 	/* Flush and acquire obj->pages so that we are coherent through
3095 	 * direct access in memory with previous cached writes through
3096 	 * shmemfs and that our cache domain tracking remains valid.
3097 	 * For example, if the obj->filp was moved to swap without us
3098 	 * being notified and releasing the pages, we would mistakenly
3099 	 * continue to assume that the obj remained out of the CPU cached
3100 	 * domain.
3101 	 */
3102 	ret = i915_gem_object_pin_pages(obj);
3103 	if (ret)
3104 		return ret;
3105 
3106 	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3107 
3108 	/* Serialise direct access to this object with the barriers for
3109 	 * coherent writes from the GPU, by effectively invalidating the
3110 	 * GTT domain upon first access.
3111 	 */
3112 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3113 		mb();
3114 
3115 	/* It should now be out of any other write domains, and we can update
3116 	 * the domain values for our changes.
3117 	 */
3118 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3119 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
3120 	if (write) {
3121 		obj->read_domains = I915_GEM_DOMAIN_GTT;
3122 		obj->write_domain = I915_GEM_DOMAIN_GTT;
3123 		obj->mm.dirty = true;
3124 	}
3125 
3126 	i915_gem_object_unpin_pages(obj);
3127 	return 0;
3128 }
3129 
3130 /**
3131  * Changes the cache-level of an object across all VMA.
3132  * @obj: object to act on
3133  * @cache_level: new cache level to set for the object
3134  *
3135  * After this function returns, the object will be in the new cache-level
3136  * across all GTT and the contents of the backing storage will be coherent,
3137  * with respect to the new cache-level. In order to keep the backing storage
3138  * coherent for all users, we only allow a single cache level to be set
3139  * globally on the object and prevent it from being changed whilst the
3140  * hardware is reading from the object. That is if the object is currently
3141  * on the scanout it will be set to uncached (or equivalent display
3142  * cache coherency) and all non-MOCS GPU access will also be uncached so
3143  * that all direct access to the scanout remains coherent.
3144  */
3145 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3146 				    enum i915_cache_level cache_level)
3147 {
3148 	struct i915_vma *vma;
3149 	int ret;
3150 
3151 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3152 
3153 	if (obj->cache_level == cache_level)
3154 		return 0;
3155 
3156 	/* Inspect the list of currently bound VMA and unbind any that would
3157 	 * be invalid given the new cache-level. This is principally to
3158 	 * catch the issue of the CS prefetch crossing page boundaries and
3159 	 * reading an invalid PTE on older architectures.
3160 	 */
3161 restart:
3162 	list_for_each_entry(vma, &obj->vma.list, obj_link) {
3163 		if (!drm_mm_node_allocated(&vma->node))
3164 			continue;
3165 
3166 		if (i915_vma_is_pinned(vma)) {
3167 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3168 			return -EBUSY;
3169 		}
3170 
3171 		if (!i915_vma_is_closed(vma) &&
3172 		    i915_gem_valid_gtt_space(vma, cache_level))
3173 			continue;
3174 
3175 		ret = i915_vma_unbind(vma);
3176 		if (ret)
3177 			return ret;
3178 
3179 		/* As unbinding may affect other elements in the
3180 		 * obj->vma_list (due to side-effects from retiring
3181 		 * an active vma), play safe and restart the iterator.
3182 		 */
3183 		goto restart;
3184 	}
3185 
3186 	/* We can reuse the existing drm_mm nodes but need to change the
3187 	 * cache-level on the PTE. We could simply unbind them all and
3188 	 * rebind with the correct cache-level on next use. However since
3189 	 * we already have a valid slot, dma mapping, pages etc, we may as
3190 	 * rewrite the PTE in the belief that doing so tramples upon less
3191 	 * state and so involves less work.
3192 	 */
3193 	if (obj->bind_count) {
3194 		/* Before we change the PTE, the GPU must not be accessing it.
3195 		 * If we wait upon the object, we know that all the bound
3196 		 * VMA are no longer active.
3197 		 */
3198 		ret = i915_gem_object_wait(obj,
3199 					   I915_WAIT_INTERRUPTIBLE |
3200 					   I915_WAIT_LOCKED |
3201 					   I915_WAIT_ALL,
3202 					   MAX_SCHEDULE_TIMEOUT);
3203 		if (ret)
3204 			return ret;
3205 
3206 		if (!HAS_LLC(to_i915(obj->base.dev)) &&
3207 		    cache_level != I915_CACHE_NONE) {
3208 			/* Access to snoopable pages through the GTT is
3209 			 * incoherent and on some machines causes a hard
3210 			 * lockup. Relinquish the CPU mmaping to force
3211 			 * userspace to refault in the pages and we can
3212 			 * then double check if the GTT mapping is still
3213 			 * valid for that pointer access.
3214 			 */
3215 			i915_gem_release_mmap(obj);
3216 
3217 			/* As we no longer need a fence for GTT access,
3218 			 * we can relinquish it now (and so prevent having
3219 			 * to steal a fence from someone else on the next
3220 			 * fence request). Note GPU activity would have
3221 			 * dropped the fence as all snoopable access is
3222 			 * supposed to be linear.
3223 			 */
3224 			for_each_ggtt_vma(vma, obj) {
3225 				ret = i915_vma_put_fence(vma);
3226 				if (ret)
3227 					return ret;
3228 			}
3229 		} else {
3230 			/* We either have incoherent backing store and
3231 			 * so no GTT access or the architecture is fully
3232 			 * coherent. In such cases, existing GTT mmaps
3233 			 * ignore the cache bit in the PTE and we can
3234 			 * rewrite it without confusing the GPU or having
3235 			 * to force userspace to fault back in its mmaps.
3236 			 */
3237 		}
3238 
3239 		list_for_each_entry(vma, &obj->vma.list, obj_link) {
3240 			if (!drm_mm_node_allocated(&vma->node))
3241 				continue;
3242 
3243 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3244 			if (ret)
3245 				return ret;
3246 		}
3247 	}
3248 
3249 	list_for_each_entry(vma, &obj->vma.list, obj_link)
3250 		vma->node.color = cache_level;
3251 	i915_gem_object_set_cache_coherency(obj, cache_level);
3252 	obj->cache_dirty = true; /* Always invalidate stale cachelines */
3253 
3254 	return 0;
3255 }
3256 
3257 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3258 			       struct drm_file *file)
3259 {
3260 	struct drm_i915_gem_caching *args = data;
3261 	struct drm_i915_gem_object *obj;
3262 	int err = 0;
3263 
3264 	rcu_read_lock();
3265 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3266 	if (!obj) {
3267 		err = -ENOENT;
3268 		goto out;
3269 	}
3270 
3271 	switch (obj->cache_level) {
3272 	case I915_CACHE_LLC:
3273 	case I915_CACHE_L3_LLC:
3274 		args->caching = I915_CACHING_CACHED;
3275 		break;
3276 
3277 	case I915_CACHE_WT:
3278 		args->caching = I915_CACHING_DISPLAY;
3279 		break;
3280 
3281 	default:
3282 		args->caching = I915_CACHING_NONE;
3283 		break;
3284 	}
3285 out:
3286 	rcu_read_unlock();
3287 	return err;
3288 }
3289 
3290 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3291 			       struct drm_file *file)
3292 {
3293 	struct drm_i915_private *i915 = to_i915(dev);
3294 	struct drm_i915_gem_caching *args = data;
3295 	struct drm_i915_gem_object *obj;
3296 	enum i915_cache_level level;
3297 	int ret = 0;
3298 
3299 	switch (args->caching) {
3300 	case I915_CACHING_NONE:
3301 		level = I915_CACHE_NONE;
3302 		break;
3303 	case I915_CACHING_CACHED:
3304 		/*
3305 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3306 		 * snooped mapping may leave stale data in a corresponding CPU
3307 		 * cacheline, whereas normally such cachelines would get
3308 		 * invalidated.
3309 		 */
3310 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3311 			return -ENODEV;
3312 
3313 		level = I915_CACHE_LLC;
3314 		break;
3315 	case I915_CACHING_DISPLAY:
3316 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3317 		break;
3318 	default:
3319 		return -EINVAL;
3320 	}
3321 
3322 	obj = i915_gem_object_lookup(file, args->handle);
3323 	if (!obj)
3324 		return -ENOENT;
3325 
3326 	/*
3327 	 * The caching mode of proxy object is handled by its generator, and
3328 	 * not allowed to be changed by userspace.
3329 	 */
3330 	if (i915_gem_object_is_proxy(obj)) {
3331 		ret = -ENXIO;
3332 		goto out;
3333 	}
3334 
3335 	if (obj->cache_level == level)
3336 		goto out;
3337 
3338 	ret = i915_gem_object_wait(obj,
3339 				   I915_WAIT_INTERRUPTIBLE,
3340 				   MAX_SCHEDULE_TIMEOUT);
3341 	if (ret)
3342 		goto out;
3343 
3344 	ret = i915_mutex_lock_interruptible(dev);
3345 	if (ret)
3346 		goto out;
3347 
3348 	ret = i915_gem_object_set_cache_level(obj, level);
3349 	mutex_unlock(&dev->struct_mutex);
3350 
3351 out:
3352 	i915_gem_object_put(obj);
3353 	return ret;
3354 }
3355 
3356 /*
3357  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3358  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3359  * (for pageflips). We only flush the caches while preparing the buffer for
3360  * display, the callers are responsible for frontbuffer flush.
3361  */
3362 struct i915_vma *
3363 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3364 				     u32 alignment,
3365 				     const struct i915_ggtt_view *view,
3366 				     unsigned int flags)
3367 {
3368 	struct i915_vma *vma;
3369 	int ret;
3370 
3371 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3372 
3373 	/* Mark the global pin early so that we account for the
3374 	 * display coherency whilst setting up the cache domains.
3375 	 */
3376 	obj->pin_global++;
3377 
3378 	/* The display engine is not coherent with the LLC cache on gen6.  As
3379 	 * a result, we make sure that the pinning that is about to occur is
3380 	 * done with uncached PTEs. This is lowest common denominator for all
3381 	 * chipsets.
3382 	 *
3383 	 * However for gen6+, we could do better by using the GFDT bit instead
3384 	 * of uncaching, which would allow us to flush all the LLC-cached data
3385 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3386 	 */
3387 	ret = i915_gem_object_set_cache_level(obj,
3388 					      HAS_WT(to_i915(obj->base.dev)) ?
3389 					      I915_CACHE_WT : I915_CACHE_NONE);
3390 	if (ret) {
3391 		vma = ERR_PTR(ret);
3392 		goto err_unpin_global;
3393 	}
3394 
3395 	/* As the user may map the buffer once pinned in the display plane
3396 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3397 	 * always use map_and_fenceable for all scanout buffers. However,
3398 	 * it may simply be too big to fit into mappable, in which case
3399 	 * put it anyway and hope that userspace can cope (but always first
3400 	 * try to preserve the existing ABI).
3401 	 */
3402 	vma = ERR_PTR(-ENOSPC);
3403 	if ((flags & PIN_MAPPABLE) == 0 &&
3404 	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
3405 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3406 					       flags |
3407 					       PIN_MAPPABLE |
3408 					       PIN_NONBLOCK);
3409 	if (IS_ERR(vma))
3410 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3411 	if (IS_ERR(vma))
3412 		goto err_unpin_global;
3413 
3414 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3415 
3416 	__i915_gem_object_flush_for_display(obj);
3417 
3418 	/* It should now be out of any other write domains, and we can update
3419 	 * the domain values for our changes.
3420 	 */
3421 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
3422 
3423 	return vma;
3424 
3425 err_unpin_global:
3426 	obj->pin_global--;
3427 	return vma;
3428 }
3429 
3430 void
3431 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3432 {
3433 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3434 
3435 	if (WARN_ON(vma->obj->pin_global == 0))
3436 		return;
3437 
3438 	if (--vma->obj->pin_global == 0)
3439 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3440 
3441 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3442 	i915_gem_object_bump_inactive_ggtt(vma->obj);
3443 
3444 	i915_vma_unpin(vma);
3445 }
3446 
3447 /**
3448  * Moves a single object to the CPU read, and possibly write domain.
3449  * @obj: object to act on
3450  * @write: requesting write or read-only access
3451  *
3452  * This function returns when the move is complete, including waiting on
3453  * flushes to occur.
3454  */
3455 int
3456 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3457 {
3458 	int ret;
3459 
3460 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3461 
3462 	ret = i915_gem_object_wait(obj,
3463 				   I915_WAIT_INTERRUPTIBLE |
3464 				   I915_WAIT_LOCKED |
3465 				   (write ? I915_WAIT_ALL : 0),
3466 				   MAX_SCHEDULE_TIMEOUT);
3467 	if (ret)
3468 		return ret;
3469 
3470 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3471 
3472 	/* Flush the CPU cache if it's still invalid. */
3473 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3474 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3475 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
3476 	}
3477 
3478 	/* It should now be out of any other write domains, and we can update
3479 	 * the domain values for our changes.
3480 	 */
3481 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3482 
3483 	/* If we're writing through the CPU, then the GPU read domains will
3484 	 * need to be invalidated at next use.
3485 	 */
3486 	if (write)
3487 		__start_cpu_write(obj);
3488 
3489 	return 0;
3490 }
3491 
3492 /* Throttle our rendering by waiting until the ring has completed our requests
3493  * emitted over 20 msec ago.
3494  *
3495  * Note that if we were to use the current jiffies each time around the loop,
3496  * we wouldn't escape the function with any frames outstanding if the time to
3497  * render a frame was over 20ms.
3498  *
3499  * This should get us reasonable parallelism between CPU and GPU but also
3500  * relatively low latency when blocking on a particular request to finish.
3501  */
3502 static int
3503 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3504 {
3505 	struct drm_i915_private *dev_priv = to_i915(dev);
3506 	struct drm_i915_file_private *file_priv = file->driver_priv;
3507 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3508 	struct i915_request *request, *target = NULL;
3509 	long ret;
3510 
3511 	/* ABI: return -EIO if already wedged */
3512 	ret = i915_terminally_wedged(dev_priv);
3513 	if (ret)
3514 		return ret;
3515 
3516 	spin_lock(&file_priv->mm.lock);
3517 	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3518 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3519 			break;
3520 
3521 		if (target) {
3522 			list_del(&target->client_link);
3523 			target->file_priv = NULL;
3524 		}
3525 
3526 		target = request;
3527 	}
3528 	if (target)
3529 		i915_request_get(target);
3530 	spin_unlock(&file_priv->mm.lock);
3531 
3532 	if (target == NULL)
3533 		return 0;
3534 
3535 	ret = i915_request_wait(target,
3536 				I915_WAIT_INTERRUPTIBLE,
3537 				MAX_SCHEDULE_TIMEOUT);
3538 	i915_request_put(target);
3539 
3540 	return ret < 0 ? ret : 0;
3541 }
3542 
3543 struct i915_vma *
3544 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3545 			 const struct i915_ggtt_view *view,
3546 			 u64 size,
3547 			 u64 alignment,
3548 			 u64 flags)
3549 {
3550 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3551 	struct i915_address_space *vm = &dev_priv->ggtt.vm;
3552 	struct i915_vma *vma;
3553 	int ret;
3554 
3555 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3556 
3557 	if (flags & PIN_MAPPABLE &&
3558 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3559 		/* If the required space is larger than the available
3560 		 * aperture, we will not able to find a slot for the
3561 		 * object and unbinding the object now will be in
3562 		 * vain. Worse, doing so may cause us to ping-pong
3563 		 * the object in and out of the Global GTT and
3564 		 * waste a lot of cycles under the mutex.
3565 		 */
3566 		if (obj->base.size > dev_priv->ggtt.mappable_end)
3567 			return ERR_PTR(-E2BIG);
3568 
3569 		/* If NONBLOCK is set the caller is optimistically
3570 		 * trying to cache the full object within the mappable
3571 		 * aperture, and *must* have a fallback in place for
3572 		 * situations where we cannot bind the object. We
3573 		 * can be a little more lax here and use the fallback
3574 		 * more often to avoid costly migrations of ourselves
3575 		 * and other objects within the aperture.
3576 		 *
3577 		 * Half-the-aperture is used as a simple heuristic.
3578 		 * More interesting would to do search for a free
3579 		 * block prior to making the commitment to unbind.
3580 		 * That caters for the self-harm case, and with a
3581 		 * little more heuristics (e.g. NOFAULT, NOEVICT)
3582 		 * we could try to minimise harm to others.
3583 		 */
3584 		if (flags & PIN_NONBLOCK &&
3585 		    obj->base.size > dev_priv->ggtt.mappable_end / 2)
3586 			return ERR_PTR(-ENOSPC);
3587 	}
3588 
3589 	vma = i915_vma_instance(obj, vm, view);
3590 	if (IS_ERR(vma))
3591 		return vma;
3592 
3593 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
3594 		if (flags & PIN_NONBLOCK) {
3595 			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3596 				return ERR_PTR(-ENOSPC);
3597 
3598 			if (flags & PIN_MAPPABLE &&
3599 			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3600 				return ERR_PTR(-ENOSPC);
3601 		}
3602 
3603 		WARN(i915_vma_is_pinned(vma),
3604 		     "bo is already pinned in ggtt with incorrect alignment:"
3605 		     " offset=%08x, req.alignment=%llx,"
3606 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3607 		     i915_ggtt_offset(vma), alignment,
3608 		     !!(flags & PIN_MAPPABLE),
3609 		     i915_vma_is_map_and_fenceable(vma));
3610 		ret = i915_vma_unbind(vma);
3611 		if (ret)
3612 			return ERR_PTR(ret);
3613 	}
3614 
3615 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3616 	if (ret)
3617 		return ERR_PTR(ret);
3618 
3619 	return vma;
3620 }
3621 
3622 static __always_inline u32 __busy_read_flag(u8 id)
3623 {
3624 	if (id == (u8)I915_ENGINE_CLASS_INVALID)
3625 		return 0xffff0000u;
3626 
3627 	GEM_BUG_ON(id >= 16);
3628 	return 0x10000u << id;
3629 }
3630 
3631 static __always_inline u32 __busy_write_id(u8 id)
3632 {
3633 	/*
3634 	 * The uABI guarantees an active writer is also amongst the read
3635 	 * engines. This would be true if we accessed the activity tracking
3636 	 * under the lock, but as we perform the lookup of the object and
3637 	 * its activity locklessly we can not guarantee that the last_write
3638 	 * being active implies that we have set the same engine flag from
3639 	 * last_read - hence we always set both read and write busy for
3640 	 * last_write.
3641 	 */
3642 	if (id == (u8)I915_ENGINE_CLASS_INVALID)
3643 		return 0xffffffffu;
3644 
3645 	return (id + 1) | __busy_read_flag(id);
3646 }
3647 
3648 static __always_inline unsigned int
3649 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
3650 {
3651 	const struct i915_request *rq;
3652 
3653 	/*
3654 	 * We have to check the current hw status of the fence as the uABI
3655 	 * guarantees forward progress. We could rely on the idle worker
3656 	 * to eventually flush us, but to minimise latency just ask the
3657 	 * hardware.
3658 	 *
3659 	 * Note we only report on the status of native fences.
3660 	 */
3661 	if (!dma_fence_is_i915(fence))
3662 		return 0;
3663 
3664 	/* opencode to_request() in order to avoid const warnings */
3665 	rq = container_of(fence, const struct i915_request, fence);
3666 	if (i915_request_completed(rq))
3667 		return 0;
3668 
3669 	/* Beware type-expansion follies! */
3670 	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
3671 	return flag(rq->engine->uabi_class);
3672 }
3673 
3674 static __always_inline unsigned int
3675 busy_check_reader(const struct dma_fence *fence)
3676 {
3677 	return __busy_set_if_active(fence, __busy_read_flag);
3678 }
3679 
3680 static __always_inline unsigned int
3681 busy_check_writer(const struct dma_fence *fence)
3682 {
3683 	if (!fence)
3684 		return 0;
3685 
3686 	return __busy_set_if_active(fence, __busy_write_id);
3687 }
3688 
3689 int
3690 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3691 		    struct drm_file *file)
3692 {
3693 	struct drm_i915_gem_busy *args = data;
3694 	struct drm_i915_gem_object *obj;
3695 	struct reservation_object_list *list;
3696 	unsigned int seq;
3697 	int err;
3698 
3699 	err = -ENOENT;
3700 	rcu_read_lock();
3701 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3702 	if (!obj)
3703 		goto out;
3704 
3705 	/*
3706 	 * A discrepancy here is that we do not report the status of
3707 	 * non-i915 fences, i.e. even though we may report the object as idle,
3708 	 * a call to set-domain may still stall waiting for foreign rendering.
3709 	 * This also means that wait-ioctl may report an object as busy,
3710 	 * where busy-ioctl considers it idle.
3711 	 *
3712 	 * We trade the ability to warn of foreign fences to report on which
3713 	 * i915 engines are active for the object.
3714 	 *
3715 	 * Alternatively, we can trade that extra information on read/write
3716 	 * activity with
3717 	 *	args->busy =
3718 	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
3719 	 * to report the overall busyness. This is what the wait-ioctl does.
3720 	 *
3721 	 */
3722 retry:
3723 	seq = raw_read_seqcount(&obj->resv->seq);
3724 
3725 	/* Translate the exclusive fence to the READ *and* WRITE engine */
3726 	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3727 
3728 	/* Translate shared fences to READ set of engines */
3729 	list = rcu_dereference(obj->resv->fence);
3730 	if (list) {
3731 		unsigned int shared_count = list->shared_count, i;
3732 
3733 		for (i = 0; i < shared_count; ++i) {
3734 			struct dma_fence *fence =
3735 				rcu_dereference(list->shared[i]);
3736 
3737 			args->busy |= busy_check_reader(fence);
3738 		}
3739 	}
3740 
3741 	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3742 		goto retry;
3743 
3744 	err = 0;
3745 out:
3746 	rcu_read_unlock();
3747 	return err;
3748 }
3749 
3750 int
3751 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3752 			struct drm_file *file_priv)
3753 {
3754 	return i915_gem_ring_throttle(dev, file_priv);
3755 }
3756 
3757 int
3758 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3759 		       struct drm_file *file_priv)
3760 {
3761 	struct drm_i915_private *dev_priv = to_i915(dev);
3762 	struct drm_i915_gem_madvise *args = data;
3763 	struct drm_i915_gem_object *obj;
3764 	int err;
3765 
3766 	switch (args->madv) {
3767 	case I915_MADV_DONTNEED:
3768 	case I915_MADV_WILLNEED:
3769 	    break;
3770 	default:
3771 	    return -EINVAL;
3772 	}
3773 
3774 	obj = i915_gem_object_lookup(file_priv, args->handle);
3775 	if (!obj)
3776 		return -ENOENT;
3777 
3778 	err = mutex_lock_interruptible(&obj->mm.lock);
3779 	if (err)
3780 		goto out;
3781 
3782 	if (i915_gem_object_has_pages(obj) &&
3783 	    i915_gem_object_is_tiled(obj) &&
3784 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3785 		if (obj->mm.madv == I915_MADV_WILLNEED) {
3786 			GEM_BUG_ON(!obj->mm.quirked);
3787 			__i915_gem_object_unpin_pages(obj);
3788 			obj->mm.quirked = false;
3789 		}
3790 		if (args->madv == I915_MADV_WILLNEED) {
3791 			GEM_BUG_ON(obj->mm.quirked);
3792 			__i915_gem_object_pin_pages(obj);
3793 			obj->mm.quirked = true;
3794 		}
3795 	}
3796 
3797 	if (obj->mm.madv != __I915_MADV_PURGED)
3798 		obj->mm.madv = args->madv;
3799 
3800 	/* if the object is no longer attached, discard its backing storage */
3801 	if (obj->mm.madv == I915_MADV_DONTNEED &&
3802 	    !i915_gem_object_has_pages(obj))
3803 		__i915_gem_object_truncate(obj);
3804 
3805 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
3806 	mutex_unlock(&obj->mm.lock);
3807 
3808 out:
3809 	i915_gem_object_put(obj);
3810 	return err;
3811 }
3812 
3813 static void
3814 frontbuffer_retire(struct i915_active_request *active,
3815 		   struct i915_request *request)
3816 {
3817 	struct drm_i915_gem_object *obj =
3818 		container_of(active, typeof(*obj), frontbuffer_write);
3819 
3820 	intel_fb_obj_flush(obj, ORIGIN_CS);
3821 }
3822 
3823 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3824 			  const struct drm_i915_gem_object_ops *ops)
3825 {
3826 	mutex_init(&obj->mm.lock);
3827 
3828 	spin_lock_init(&obj->vma.lock);
3829 	INIT_LIST_HEAD(&obj->vma.list);
3830 
3831 	INIT_LIST_HEAD(&obj->lut_list);
3832 	INIT_LIST_HEAD(&obj->batch_pool_link);
3833 
3834 	init_rcu_head(&obj->rcu);
3835 
3836 	obj->ops = ops;
3837 
3838 	reservation_object_init(&obj->__builtin_resv);
3839 	obj->resv = &obj->__builtin_resv;
3840 
3841 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3842 	i915_active_request_init(&obj->frontbuffer_write,
3843 				 NULL, frontbuffer_retire);
3844 
3845 	obj->mm.madv = I915_MADV_WILLNEED;
3846 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
3847 	mutex_init(&obj->mm.get_page.lock);
3848 
3849 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3850 }
3851 
3852 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3853 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
3854 		 I915_GEM_OBJECT_IS_SHRINKABLE,
3855 
3856 	.get_pages = i915_gem_object_get_pages_gtt,
3857 	.put_pages = i915_gem_object_put_pages_gtt,
3858 
3859 	.pwrite = i915_gem_object_pwrite_gtt,
3860 };
3861 
3862 static int i915_gem_object_create_shmem(struct drm_device *dev,
3863 					struct drm_gem_object *obj,
3864 					size_t size)
3865 {
3866 	struct drm_i915_private *i915 = to_i915(dev);
3867 	unsigned long flags = VM_NORESERVE;
3868 	struct file *filp;
3869 
3870 	drm_gem_private_object_init(dev, obj, size);
3871 
3872 	if (i915->mm.gemfs)
3873 		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
3874 						 flags);
3875 	else
3876 		filp = shmem_file_setup("i915", size, flags);
3877 
3878 	if (IS_ERR(filp))
3879 		return PTR_ERR(filp);
3880 
3881 	obj->filp = filp;
3882 
3883 	return 0;
3884 }
3885 
3886 struct drm_i915_gem_object *
3887 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
3888 {
3889 	struct drm_i915_gem_object *obj;
3890 	struct address_space *mapping;
3891 	unsigned int cache_level;
3892 	gfp_t mask;
3893 	int ret;
3894 
3895 	/* There is a prevalence of the assumption that we fit the object's
3896 	 * page count inside a 32bit _signed_ variable. Let's document this and
3897 	 * catch if we ever need to fix it. In the meantime, if you do spot
3898 	 * such a local variable, please consider fixing!
3899 	 */
3900 	if (size >> PAGE_SHIFT > INT_MAX)
3901 		return ERR_PTR(-E2BIG);
3902 
3903 	if (overflows_type(size, obj->base.size))
3904 		return ERR_PTR(-E2BIG);
3905 
3906 	obj = i915_gem_object_alloc();
3907 	if (obj == NULL)
3908 		return ERR_PTR(-ENOMEM);
3909 
3910 	ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
3911 	if (ret)
3912 		goto fail;
3913 
3914 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3915 	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
3916 		/* 965gm cannot relocate objects above 4GiB. */
3917 		mask &= ~__GFP_HIGHMEM;
3918 		mask |= __GFP_DMA32;
3919 	}
3920 
3921 	mapping = obj->base.filp->f_mapping;
3922 	mapping_set_gfp_mask(mapping, mask);
3923 	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
3924 
3925 	i915_gem_object_init(obj, &i915_gem_object_ops);
3926 
3927 	obj->write_domain = I915_GEM_DOMAIN_CPU;
3928 	obj->read_domains = I915_GEM_DOMAIN_CPU;
3929 
3930 	if (HAS_LLC(dev_priv))
3931 		/* On some devices, we can have the GPU use the LLC (the CPU
3932 		 * cache) for about a 10% performance improvement
3933 		 * compared to uncached.  Graphics requests other than
3934 		 * display scanout are coherent with the CPU in
3935 		 * accessing this cache.  This means in this mode we
3936 		 * don't need to clflush on the CPU side, and on the
3937 		 * GPU side we only need to flush internal caches to
3938 		 * get data visible to the CPU.
3939 		 *
3940 		 * However, we maintain the display planes as UC, and so
3941 		 * need to rebind when first used as such.
3942 		 */
3943 		cache_level = I915_CACHE_LLC;
3944 	else
3945 		cache_level = I915_CACHE_NONE;
3946 
3947 	i915_gem_object_set_cache_coherency(obj, cache_level);
3948 
3949 	trace_i915_gem_object_create(obj);
3950 
3951 	return obj;
3952 
3953 fail:
3954 	i915_gem_object_free(obj);
3955 	return ERR_PTR(ret);
3956 }
3957 
3958 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3959 {
3960 	/* If we are the last user of the backing storage (be it shmemfs
3961 	 * pages or stolen etc), we know that the pages are going to be
3962 	 * immediately released. In this case, we can then skip copying
3963 	 * back the contents from the GPU.
3964 	 */
3965 
3966 	if (obj->mm.madv != I915_MADV_WILLNEED)
3967 		return false;
3968 
3969 	if (obj->base.filp == NULL)
3970 		return true;
3971 
3972 	/* At first glance, this looks racy, but then again so would be
3973 	 * userspace racing mmap against close. However, the first external
3974 	 * reference to the filp can only be obtained through the
3975 	 * i915_gem_mmap_ioctl() which safeguards us against the user
3976 	 * acquiring such a reference whilst we are in the middle of
3977 	 * freeing the object.
3978 	 */
3979 	return file_count(obj->base.filp) == 1;
3980 }
3981 
3982 static void __i915_gem_free_objects(struct drm_i915_private *i915,
3983 				    struct llist_node *freed)
3984 {
3985 	struct drm_i915_gem_object *obj, *on;
3986 	intel_wakeref_t wakeref;
3987 
3988 	wakeref = intel_runtime_pm_get(i915);
3989 	llist_for_each_entry_safe(obj, on, freed, freed) {
3990 		struct i915_vma *vma, *vn;
3991 
3992 		trace_i915_gem_object_destroy(obj);
3993 
3994 		mutex_lock(&i915->drm.struct_mutex);
3995 
3996 		GEM_BUG_ON(i915_gem_object_is_active(obj));
3997 		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
3998 			GEM_BUG_ON(i915_vma_is_active(vma));
3999 			vma->flags &= ~I915_VMA_PIN_MASK;
4000 			i915_vma_destroy(vma);
4001 		}
4002 		GEM_BUG_ON(!list_empty(&obj->vma.list));
4003 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4004 
4005 		/* This serializes freeing with the shrinker. Since the free
4006 		 * is delayed, first by RCU then by the workqueue, we want the
4007 		 * shrinker to be able to free pages of unreferenced objects,
4008 		 * or else we may oom whilst there are plenty of deferred
4009 		 * freed objects.
4010 		 */
4011 		if (i915_gem_object_has_pages(obj)) {
4012 			spin_lock(&i915->mm.obj_lock);
4013 			list_del_init(&obj->mm.link);
4014 			spin_unlock(&i915->mm.obj_lock);
4015 		}
4016 
4017 		mutex_unlock(&i915->drm.struct_mutex);
4018 
4019 		GEM_BUG_ON(obj->bind_count);
4020 		GEM_BUG_ON(obj->userfault_count);
4021 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4022 		GEM_BUG_ON(!list_empty(&obj->lut_list));
4023 
4024 		if (obj->ops->release)
4025 			obj->ops->release(obj);
4026 
4027 		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4028 			atomic_set(&obj->mm.pages_pin_count, 0);
4029 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4030 		GEM_BUG_ON(i915_gem_object_has_pages(obj));
4031 
4032 		if (obj->base.import_attach)
4033 			drm_prime_gem_destroy(&obj->base, NULL);
4034 
4035 		reservation_object_fini(&obj->__builtin_resv);
4036 		drm_gem_object_release(&obj->base);
4037 		i915_gem_info_remove_obj(i915, obj->base.size);
4038 
4039 		bitmap_free(obj->bit_17);
4040 		i915_gem_object_free(obj);
4041 
4042 		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4043 		atomic_dec(&i915->mm.free_count);
4044 
4045 		if (on)
4046 			cond_resched();
4047 	}
4048 	intel_runtime_pm_put(i915, wakeref);
4049 }
4050 
4051 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4052 {
4053 	struct llist_node *freed;
4054 
4055 	/* Free the oldest, most stale object to keep the free_list short */
4056 	freed = NULL;
4057 	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4058 		/* Only one consumer of llist_del_first() allowed */
4059 		spin_lock(&i915->mm.free_lock);
4060 		freed = llist_del_first(&i915->mm.free_list);
4061 		spin_unlock(&i915->mm.free_lock);
4062 	}
4063 	if (unlikely(freed)) {
4064 		freed->next = NULL;
4065 		__i915_gem_free_objects(i915, freed);
4066 	}
4067 }
4068 
4069 static void __i915_gem_free_work(struct work_struct *work)
4070 {
4071 	struct drm_i915_private *i915 =
4072 		container_of(work, struct drm_i915_private, mm.free_work);
4073 	struct llist_node *freed;
4074 
4075 	/*
4076 	 * All file-owned VMA should have been released by this point through
4077 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4078 	 * However, the object may also be bound into the global GTT (e.g.
4079 	 * older GPUs without per-process support, or for direct access through
4080 	 * the GTT either for the user or for scanout). Those VMA still need to
4081 	 * unbound now.
4082 	 */
4083 
4084 	spin_lock(&i915->mm.free_lock);
4085 	while ((freed = llist_del_all(&i915->mm.free_list))) {
4086 		spin_unlock(&i915->mm.free_lock);
4087 
4088 		__i915_gem_free_objects(i915, freed);
4089 		if (need_resched())
4090 			return;
4091 
4092 		spin_lock(&i915->mm.free_lock);
4093 	}
4094 	spin_unlock(&i915->mm.free_lock);
4095 }
4096 
4097 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4098 {
4099 	struct drm_i915_gem_object *obj =
4100 		container_of(head, typeof(*obj), rcu);
4101 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
4102 
4103 	/*
4104 	 * We reuse obj->rcu for the freed list, so we had better not treat
4105 	 * it like a rcu_head from this point forwards. And we expect all
4106 	 * objects to be freed via this path.
4107 	 */
4108 	destroy_rcu_head(&obj->rcu);
4109 
4110 	/*
4111 	 * Since we require blocking on struct_mutex to unbind the freed
4112 	 * object from the GPU before releasing resources back to the
4113 	 * system, we can not do that directly from the RCU callback (which may
4114 	 * be a softirq context), but must instead then defer that work onto a
4115 	 * kthread. We use the RCU callback rather than move the freed object
4116 	 * directly onto the work queue so that we can mix between using the
4117 	 * worker and performing frees directly from subsequent allocations for
4118 	 * crude but effective memory throttling.
4119 	 */
4120 	if (llist_add(&obj->freed, &i915->mm.free_list))
4121 		queue_work(i915->wq, &i915->mm.free_work);
4122 }
4123 
4124 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4125 {
4126 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4127 
4128 	if (obj->mm.quirked)
4129 		__i915_gem_object_unpin_pages(obj);
4130 
4131 	if (discard_backing_storage(obj))
4132 		obj->mm.madv = I915_MADV_DONTNEED;
4133 
4134 	/*
4135 	 * Before we free the object, make sure any pure RCU-only
4136 	 * read-side critical sections are complete, e.g.
4137 	 * i915_gem_busy_ioctl(). For the corresponding synchronized
4138 	 * lookup see i915_gem_object_lookup_rcu().
4139 	 */
4140 	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4141 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4142 }
4143 
4144 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4145 {
4146 	lockdep_assert_held(&obj->base.dev->struct_mutex);
4147 
4148 	if (!i915_gem_object_has_active_reference(obj) &&
4149 	    i915_gem_object_is_active(obj))
4150 		i915_gem_object_set_active_reference(obj);
4151 	else
4152 		i915_gem_object_put(obj);
4153 }
4154 
4155 void i915_gem_sanitize(struct drm_i915_private *i915)
4156 {
4157 	intel_wakeref_t wakeref;
4158 
4159 	GEM_TRACE("\n");
4160 
4161 	wakeref = intel_runtime_pm_get(i915);
4162 	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4163 
4164 	/*
4165 	 * As we have just resumed the machine and woken the device up from
4166 	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4167 	 * back to defaults, recovering from whatever wedged state we left it
4168 	 * in and so worth trying to use the device once more.
4169 	 */
4170 	if (i915_terminally_wedged(i915))
4171 		i915_gem_unset_wedged(i915);
4172 
4173 	/*
4174 	 * If we inherit context state from the BIOS or earlier occupants
4175 	 * of the GPU, the GPU may be in an inconsistent state when we
4176 	 * try to take over. The only way to remove the earlier state
4177 	 * is by resetting. However, resetting on earlier gen is tricky as
4178 	 * it may impact the display and we are uncertain about the stability
4179 	 * of the reset, so this could be applied to even earlier gen.
4180 	 */
4181 	intel_gt_sanitize(i915, false);
4182 
4183 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4184 	intel_runtime_pm_put(i915, wakeref);
4185 
4186 	mutex_lock(&i915->drm.struct_mutex);
4187 	i915_gem_contexts_lost(i915);
4188 	mutex_unlock(&i915->drm.struct_mutex);
4189 }
4190 
4191 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4192 {
4193 	if (INTEL_GEN(dev_priv) < 5 ||
4194 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4195 		return;
4196 
4197 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4198 				 DISP_TILE_SURFACE_SWIZZLING);
4199 
4200 	if (IS_GEN(dev_priv, 5))
4201 		return;
4202 
4203 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4204 	if (IS_GEN(dev_priv, 6))
4205 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4206 	else if (IS_GEN(dev_priv, 7))
4207 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4208 	else if (IS_GEN(dev_priv, 8))
4209 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4210 	else
4211 		BUG();
4212 }
4213 
4214 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4215 {
4216 	I915_WRITE(RING_CTL(base), 0);
4217 	I915_WRITE(RING_HEAD(base), 0);
4218 	I915_WRITE(RING_TAIL(base), 0);
4219 	I915_WRITE(RING_START(base), 0);
4220 }
4221 
4222 static void init_unused_rings(struct drm_i915_private *dev_priv)
4223 {
4224 	if (IS_I830(dev_priv)) {
4225 		init_unused_ring(dev_priv, PRB1_BASE);
4226 		init_unused_ring(dev_priv, SRB0_BASE);
4227 		init_unused_ring(dev_priv, SRB1_BASE);
4228 		init_unused_ring(dev_priv, SRB2_BASE);
4229 		init_unused_ring(dev_priv, SRB3_BASE);
4230 	} else if (IS_GEN(dev_priv, 2)) {
4231 		init_unused_ring(dev_priv, SRB0_BASE);
4232 		init_unused_ring(dev_priv, SRB1_BASE);
4233 	} else if (IS_GEN(dev_priv, 3)) {
4234 		init_unused_ring(dev_priv, PRB1_BASE);
4235 		init_unused_ring(dev_priv, PRB2_BASE);
4236 	}
4237 }
4238 
4239 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4240 {
4241 	int ret;
4242 
4243 	dev_priv->gt.last_init_time = ktime_get();
4244 
4245 	/* Double layer security blanket, see i915_gem_init() */
4246 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4247 
4248 	if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4249 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4250 
4251 	if (IS_HASWELL(dev_priv))
4252 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4253 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4254 
4255 	/* Apply the GT workarounds... */
4256 	intel_gt_apply_workarounds(dev_priv);
4257 	/* ...and determine whether they are sticking. */
4258 	intel_gt_verify_workarounds(dev_priv, "init");
4259 
4260 	i915_gem_init_swizzling(dev_priv);
4261 
4262 	/*
4263 	 * At least 830 can leave some of the unused rings
4264 	 * "active" (ie. head != tail) after resume which
4265 	 * will prevent c3 entry. Makes sure all unused rings
4266 	 * are totally idle.
4267 	 */
4268 	init_unused_rings(dev_priv);
4269 
4270 	BUG_ON(!dev_priv->kernel_context);
4271 	ret = i915_terminally_wedged(dev_priv);
4272 	if (ret)
4273 		goto out;
4274 
4275 	ret = i915_ppgtt_init_hw(dev_priv);
4276 	if (ret) {
4277 		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4278 		goto out;
4279 	}
4280 
4281 	ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4282 	if (ret) {
4283 		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4284 		goto out;
4285 	}
4286 
4287 	/* We can't enable contexts until all firmware is loaded */
4288 	ret = intel_uc_init_hw(dev_priv);
4289 	if (ret) {
4290 		DRM_ERROR("Enabling uc failed (%d)\n", ret);
4291 		goto out;
4292 	}
4293 
4294 	intel_mocs_init_l3cc_table(dev_priv);
4295 
4296 	/* Only when the HW is re-initialised, can we replay the requests */
4297 	ret = intel_engines_resume(dev_priv);
4298 	if (ret)
4299 		goto cleanup_uc;
4300 
4301 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4302 
4303 	intel_engines_set_scheduler_caps(dev_priv);
4304 	return 0;
4305 
4306 cleanup_uc:
4307 	intel_uc_fini_hw(dev_priv);
4308 out:
4309 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4310 
4311 	return ret;
4312 }
4313 
4314 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4315 {
4316 	struct intel_engine_cs *engine;
4317 	struct i915_gem_context *ctx;
4318 	struct i915_gem_engines *e;
4319 	enum intel_engine_id id;
4320 	int err = 0;
4321 
4322 	/*
4323 	 * As we reset the gpu during very early sanitisation, the current
4324 	 * register state on the GPU should reflect its defaults values.
4325 	 * We load a context onto the hw (with restore-inhibit), then switch
4326 	 * over to a second context to save that default register state. We
4327 	 * can then prime every new context with that state so they all start
4328 	 * from the same default HW values.
4329 	 */
4330 
4331 	ctx = i915_gem_context_create_kernel(i915, 0);
4332 	if (IS_ERR(ctx))
4333 		return PTR_ERR(ctx);
4334 
4335 	e = i915_gem_context_lock_engines(ctx);
4336 
4337 	for_each_engine(engine, i915, id) {
4338 		struct intel_context *ce = e->engines[id];
4339 		struct i915_request *rq;
4340 
4341 		rq = intel_context_create_request(ce);
4342 		if (IS_ERR(rq)) {
4343 			err = PTR_ERR(rq);
4344 			goto err_active;
4345 		}
4346 
4347 		err = 0;
4348 		if (rq->engine->init_context)
4349 			err = rq->engine->init_context(rq);
4350 
4351 		i915_request_add(rq);
4352 		if (err)
4353 			goto err_active;
4354 	}
4355 
4356 	/* Flush the default context image to memory, and enable powersaving. */
4357 	if (!i915_gem_load_power_context(i915)) {
4358 		err = -EIO;
4359 		goto err_active;
4360 	}
4361 
4362 	for_each_engine(engine, i915, id) {
4363 		struct intel_context *ce = e->engines[id];
4364 		struct i915_vma *state = ce->state;
4365 		void *vaddr;
4366 
4367 		if (!state)
4368 			continue;
4369 
4370 		GEM_BUG_ON(intel_context_is_pinned(ce));
4371 
4372 		/*
4373 		 * As we will hold a reference to the logical state, it will
4374 		 * not be torn down with the context, and importantly the
4375 		 * object will hold onto its vma (making it possible for a
4376 		 * stray GTT write to corrupt our defaults). Unmap the vma
4377 		 * from the GTT to prevent such accidents and reclaim the
4378 		 * space.
4379 		 */
4380 		err = i915_vma_unbind(state);
4381 		if (err)
4382 			goto err_active;
4383 
4384 		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4385 		if (err)
4386 			goto err_active;
4387 
4388 		engine->default_state = i915_gem_object_get(state->obj);
4389 		i915_gem_object_set_cache_coherency(engine->default_state,
4390 						    I915_CACHE_LLC);
4391 
4392 		/* Check we can acquire the image of the context state */
4393 		vaddr = i915_gem_object_pin_map(engine->default_state,
4394 						I915_MAP_FORCE_WB);
4395 		if (IS_ERR(vaddr)) {
4396 			err = PTR_ERR(vaddr);
4397 			goto err_active;
4398 		}
4399 
4400 		i915_gem_object_unpin_map(engine->default_state);
4401 	}
4402 
4403 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4404 		unsigned int found = intel_engines_has_context_isolation(i915);
4405 
4406 		/*
4407 		 * Make sure that classes with multiple engine instances all
4408 		 * share the same basic configuration.
4409 		 */
4410 		for_each_engine(engine, i915, id) {
4411 			unsigned int bit = BIT(engine->uabi_class);
4412 			unsigned int expected = engine->default_state ? bit : 0;
4413 
4414 			if ((found & bit) != expected) {
4415 				DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4416 					  engine->uabi_class, engine->name);
4417 			}
4418 		}
4419 	}
4420 
4421 out_ctx:
4422 	i915_gem_context_unlock_engines(ctx);
4423 	i915_gem_context_set_closed(ctx);
4424 	i915_gem_context_put(ctx);
4425 	return err;
4426 
4427 err_active:
4428 	/*
4429 	 * If we have to abandon now, we expect the engines to be idle
4430 	 * and ready to be torn-down. The quickest way we can accomplish
4431 	 * this is by declaring ourselves wedged.
4432 	 */
4433 	i915_gem_set_wedged(i915);
4434 	goto out_ctx;
4435 }
4436 
4437 static int
4438 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4439 {
4440 	struct drm_i915_gem_object *obj;
4441 	struct i915_vma *vma;
4442 	int ret;
4443 
4444 	obj = i915_gem_object_create_stolen(i915, size);
4445 	if (!obj)
4446 		obj = i915_gem_object_create_internal(i915, size);
4447 	if (IS_ERR(obj)) {
4448 		DRM_ERROR("Failed to allocate scratch page\n");
4449 		return PTR_ERR(obj);
4450 	}
4451 
4452 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4453 	if (IS_ERR(vma)) {
4454 		ret = PTR_ERR(vma);
4455 		goto err_unref;
4456 	}
4457 
4458 	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4459 	if (ret)
4460 		goto err_unref;
4461 
4462 	i915->gt.scratch = vma;
4463 	return 0;
4464 
4465 err_unref:
4466 	i915_gem_object_put(obj);
4467 	return ret;
4468 }
4469 
4470 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4471 {
4472 	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4473 }
4474 
4475 static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
4476 {
4477 	struct intel_engine_cs *engine;
4478 	enum intel_engine_id id;
4479 	int err = 0;
4480 
4481 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4482 		return 0;
4483 
4484 	for_each_engine(engine, i915, id) {
4485 		if (intel_engine_verify_workarounds(engine, "load"))
4486 			err = -EIO;
4487 	}
4488 
4489 	return err;
4490 }
4491 
4492 int i915_gem_init(struct drm_i915_private *dev_priv)
4493 {
4494 	int ret;
4495 
4496 	/* We need to fallback to 4K pages if host doesn't support huge gtt. */
4497 	if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4498 		mkwrite_device_info(dev_priv)->page_sizes =
4499 			I915_GTT_PAGE_SIZE_4K;
4500 
4501 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4502 
4503 	i915_timelines_init(dev_priv);
4504 
4505 	ret = i915_gem_init_userptr(dev_priv);
4506 	if (ret)
4507 		return ret;
4508 
4509 	ret = intel_uc_init_misc(dev_priv);
4510 	if (ret)
4511 		return ret;
4512 
4513 	ret = intel_wopcm_init(&dev_priv->wopcm);
4514 	if (ret)
4515 		goto err_uc_misc;
4516 
4517 	/* This is just a security blanket to placate dragons.
4518 	 * On some systems, we very sporadically observe that the first TLBs
4519 	 * used by the CS may be stale, despite us poking the TLB reset. If
4520 	 * we hold the forcewake during initialisation these problems
4521 	 * just magically go away.
4522 	 */
4523 	mutex_lock(&dev_priv->drm.struct_mutex);
4524 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4525 
4526 	ret = i915_gem_init_ggtt(dev_priv);
4527 	if (ret) {
4528 		GEM_BUG_ON(ret == -EIO);
4529 		goto err_unlock;
4530 	}
4531 
4532 	ret = i915_gem_init_scratch(dev_priv,
4533 				    IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4534 	if (ret) {
4535 		GEM_BUG_ON(ret == -EIO);
4536 		goto err_ggtt;
4537 	}
4538 
4539 	ret = intel_engines_setup(dev_priv);
4540 	if (ret) {
4541 		GEM_BUG_ON(ret == -EIO);
4542 		goto err_unlock;
4543 	}
4544 
4545 	ret = i915_gem_contexts_init(dev_priv);
4546 	if (ret) {
4547 		GEM_BUG_ON(ret == -EIO);
4548 		goto err_scratch;
4549 	}
4550 
4551 	ret = intel_engines_init(dev_priv);
4552 	if (ret) {
4553 		GEM_BUG_ON(ret == -EIO);
4554 		goto err_context;
4555 	}
4556 
4557 	intel_init_gt_powersave(dev_priv);
4558 
4559 	ret = intel_uc_init(dev_priv);
4560 	if (ret)
4561 		goto err_pm;
4562 
4563 	ret = i915_gem_init_hw(dev_priv);
4564 	if (ret)
4565 		goto err_uc_init;
4566 
4567 	/*
4568 	 * Despite its name intel_init_clock_gating applies both display
4569 	 * clock gating workarounds; GT mmio workarounds and the occasional
4570 	 * GT power context workaround. Worse, sometimes it includes a context
4571 	 * register workaround which we need to apply before we record the
4572 	 * default HW state for all contexts.
4573 	 *
4574 	 * FIXME: break up the workarounds and apply them at the right time!
4575 	 */
4576 	intel_init_clock_gating(dev_priv);
4577 
4578 	ret = intel_engines_verify_workarounds(dev_priv);
4579 	if (ret)
4580 		goto err_init_hw;
4581 
4582 	ret = __intel_engines_record_defaults(dev_priv);
4583 	if (ret)
4584 		goto err_init_hw;
4585 
4586 	if (i915_inject_load_failure()) {
4587 		ret = -ENODEV;
4588 		goto err_init_hw;
4589 	}
4590 
4591 	if (i915_inject_load_failure()) {
4592 		ret = -EIO;
4593 		goto err_init_hw;
4594 	}
4595 
4596 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4597 	mutex_unlock(&dev_priv->drm.struct_mutex);
4598 
4599 	return 0;
4600 
4601 	/*
4602 	 * Unwinding is complicated by that we want to handle -EIO to mean
4603 	 * disable GPU submission but keep KMS alive. We want to mark the
4604 	 * HW as irrevisibly wedged, but keep enough state around that the
4605 	 * driver doesn't explode during runtime.
4606 	 */
4607 err_init_hw:
4608 	mutex_unlock(&dev_priv->drm.struct_mutex);
4609 
4610 	i915_gem_set_wedged(dev_priv);
4611 	i915_gem_suspend(dev_priv);
4612 	i915_gem_suspend_late(dev_priv);
4613 
4614 	i915_gem_drain_workqueue(dev_priv);
4615 
4616 	mutex_lock(&dev_priv->drm.struct_mutex);
4617 	intel_uc_fini_hw(dev_priv);
4618 err_uc_init:
4619 	intel_uc_fini(dev_priv);
4620 err_pm:
4621 	if (ret != -EIO) {
4622 		intel_cleanup_gt_powersave(dev_priv);
4623 		intel_engines_cleanup(dev_priv);
4624 	}
4625 err_context:
4626 	if (ret != -EIO)
4627 		i915_gem_contexts_fini(dev_priv);
4628 err_scratch:
4629 	i915_gem_fini_scratch(dev_priv);
4630 err_ggtt:
4631 err_unlock:
4632 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4633 	mutex_unlock(&dev_priv->drm.struct_mutex);
4634 
4635 err_uc_misc:
4636 	intel_uc_fini_misc(dev_priv);
4637 
4638 	if (ret != -EIO) {
4639 		i915_gem_cleanup_userptr(dev_priv);
4640 		i915_timelines_fini(dev_priv);
4641 	}
4642 
4643 	if (ret == -EIO) {
4644 		mutex_lock(&dev_priv->drm.struct_mutex);
4645 
4646 		/*
4647 		 * Allow engine initialisation to fail by marking the GPU as
4648 		 * wedged. But we only want to do this where the GPU is angry,
4649 		 * for all other failure, such as an allocation failure, bail.
4650 		 */
4651 		if (!i915_reset_failed(dev_priv)) {
4652 			i915_load_error(dev_priv,
4653 					"Failed to initialize GPU, declaring it wedged!\n");
4654 			i915_gem_set_wedged(dev_priv);
4655 		}
4656 
4657 		/* Minimal basic recovery for KMS */
4658 		ret = i915_ggtt_enable_hw(dev_priv);
4659 		i915_gem_restore_gtt_mappings(dev_priv);
4660 		i915_gem_restore_fences(dev_priv);
4661 		intel_init_clock_gating(dev_priv);
4662 
4663 		mutex_unlock(&dev_priv->drm.struct_mutex);
4664 	}
4665 
4666 	i915_gem_drain_freed_objects(dev_priv);
4667 	return ret;
4668 }
4669 
4670 void i915_gem_fini(struct drm_i915_private *dev_priv)
4671 {
4672 	GEM_BUG_ON(dev_priv->gt.awake);
4673 
4674 	i915_gem_suspend_late(dev_priv);
4675 	intel_disable_gt_powersave(dev_priv);
4676 
4677 	/* Flush any outstanding unpin_work. */
4678 	i915_gem_drain_workqueue(dev_priv);
4679 
4680 	mutex_lock(&dev_priv->drm.struct_mutex);
4681 	intel_uc_fini_hw(dev_priv);
4682 	intel_uc_fini(dev_priv);
4683 	intel_engines_cleanup(dev_priv);
4684 	i915_gem_contexts_fini(dev_priv);
4685 	i915_gem_fini_scratch(dev_priv);
4686 	mutex_unlock(&dev_priv->drm.struct_mutex);
4687 
4688 	intel_wa_list_free(&dev_priv->gt_wa_list);
4689 
4690 	intel_cleanup_gt_powersave(dev_priv);
4691 
4692 	intel_uc_fini_misc(dev_priv);
4693 	i915_gem_cleanup_userptr(dev_priv);
4694 	i915_timelines_fini(dev_priv);
4695 
4696 	i915_gem_drain_freed_objects(dev_priv);
4697 
4698 	WARN_ON(!list_empty(&dev_priv->contexts.list));
4699 }
4700 
4701 void i915_gem_init_mmio(struct drm_i915_private *i915)
4702 {
4703 	i915_gem_sanitize(i915);
4704 }
4705 
4706 void
4707 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4708 {
4709 	int i;
4710 
4711 	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4712 	    !IS_CHERRYVIEW(dev_priv))
4713 		dev_priv->num_fence_regs = 32;
4714 	else if (INTEL_GEN(dev_priv) >= 4 ||
4715 		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
4716 		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
4717 		dev_priv->num_fence_regs = 16;
4718 	else
4719 		dev_priv->num_fence_regs = 8;
4720 
4721 	if (intel_vgpu_active(dev_priv))
4722 		dev_priv->num_fence_regs =
4723 				I915_READ(vgtif_reg(avail_rs.fence_num));
4724 
4725 	/* Initialize fence registers to zero */
4726 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4727 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4728 
4729 		fence->i915 = dev_priv;
4730 		fence->id = i;
4731 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4732 	}
4733 	i915_gem_restore_fences(dev_priv);
4734 
4735 	i915_gem_detect_bit_6_swizzle(dev_priv);
4736 }
4737 
4738 static void i915_gem_init__mm(struct drm_i915_private *i915)
4739 {
4740 	spin_lock_init(&i915->mm.object_stat_lock);
4741 	spin_lock_init(&i915->mm.obj_lock);
4742 	spin_lock_init(&i915->mm.free_lock);
4743 
4744 	init_llist_head(&i915->mm.free_list);
4745 
4746 	INIT_LIST_HEAD(&i915->mm.unbound_list);
4747 	INIT_LIST_HEAD(&i915->mm.bound_list);
4748 	INIT_LIST_HEAD(&i915->mm.fence_list);
4749 	INIT_LIST_HEAD(&i915->mm.userfault_list);
4750 
4751 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
4752 }
4753 
4754 int i915_gem_init_early(struct drm_i915_private *dev_priv)
4755 {
4756 	int err;
4757 
4758 	intel_gt_pm_init(dev_priv);
4759 
4760 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
4761 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
4762 
4763 	i915_gem_init__mm(dev_priv);
4764 	i915_gem_init__pm(dev_priv);
4765 
4766 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4767 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4768 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
4769 	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4770 
4771 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4772 
4773 	spin_lock_init(&dev_priv->fb_tracking.lock);
4774 
4775 	err = i915_gemfs_init(dev_priv);
4776 	if (err)
4777 		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
4778 
4779 	return 0;
4780 }
4781 
4782 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
4783 {
4784 	i915_gem_drain_freed_objects(dev_priv);
4785 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
4786 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
4787 	WARN_ON(dev_priv->mm.object_count);
4788 
4789 	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4790 
4791 	i915_gemfs_fini(dev_priv);
4792 }
4793 
4794 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4795 {
4796 	/* Discard all purgeable objects, let userspace recover those as
4797 	 * required after resuming.
4798 	 */
4799 	i915_gem_shrink_all(dev_priv);
4800 
4801 	return 0;
4802 }
4803 
4804 int i915_gem_freeze_late(struct drm_i915_private *i915)
4805 {
4806 	struct drm_i915_gem_object *obj;
4807 	struct list_head *phases[] = {
4808 		&i915->mm.unbound_list,
4809 		&i915->mm.bound_list,
4810 		NULL
4811 	}, **phase;
4812 
4813 	/*
4814 	 * Called just before we write the hibernation image.
4815 	 *
4816 	 * We need to update the domain tracking to reflect that the CPU
4817 	 * will be accessing all the pages to create and restore from the
4818 	 * hibernation, and so upon restoration those pages will be in the
4819 	 * CPU domain.
4820 	 *
4821 	 * To make sure the hibernation image contains the latest state,
4822 	 * we update that state just before writing out the image.
4823 	 *
4824 	 * To try and reduce the hibernation image, we manually shrink
4825 	 * the objects as well, see i915_gem_freeze()
4826 	 */
4827 
4828 	i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
4829 	i915_gem_drain_freed_objects(i915);
4830 
4831 	mutex_lock(&i915->drm.struct_mutex);
4832 	for (phase = phases; *phase; phase++) {
4833 		list_for_each_entry(obj, *phase, mm.link)
4834 			WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
4835 	}
4836 	mutex_unlock(&i915->drm.struct_mutex);
4837 
4838 	return 0;
4839 }
4840 
4841 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4842 {
4843 	struct drm_i915_file_private *file_priv = file->driver_priv;
4844 	struct i915_request *request;
4845 
4846 	/* Clean up our request list when the client is going away, so that
4847 	 * later retire_requests won't dereference our soon-to-be-gone
4848 	 * file_priv.
4849 	 */
4850 	spin_lock(&file_priv->mm.lock);
4851 	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
4852 		request->file_priv = NULL;
4853 	spin_unlock(&file_priv->mm.lock);
4854 }
4855 
4856 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
4857 {
4858 	struct drm_i915_file_private *file_priv;
4859 	int ret;
4860 
4861 	DRM_DEBUG("\n");
4862 
4863 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4864 	if (!file_priv)
4865 		return -ENOMEM;
4866 
4867 	file->driver_priv = file_priv;
4868 	file_priv->dev_priv = i915;
4869 	file_priv->file = file;
4870 
4871 	spin_lock_init(&file_priv->mm.lock);
4872 	INIT_LIST_HEAD(&file_priv->mm.request_list);
4873 
4874 	file_priv->bsd_engine = -1;
4875 	file_priv->hang_timestamp = jiffies;
4876 
4877 	ret = i915_gem_context_open(i915, file);
4878 	if (ret)
4879 		kfree(file_priv);
4880 
4881 	return ret;
4882 }
4883 
4884 /**
4885  * i915_gem_track_fb - update frontbuffer tracking
4886  * @old: current GEM buffer for the frontbuffer slots
4887  * @new: new GEM buffer for the frontbuffer slots
4888  * @frontbuffer_bits: bitmask of frontbuffer slots
4889  *
4890  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4891  * from @old and setting them in @new. Both @old and @new can be NULL.
4892  */
4893 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4894 		       struct drm_i915_gem_object *new,
4895 		       unsigned frontbuffer_bits)
4896 {
4897 	/* Control of individual bits within the mask are guarded by
4898 	 * the owning plane->mutex, i.e. we can never see concurrent
4899 	 * manipulation of individual bits. But since the bitfield as a whole
4900 	 * is updated using RMW, we need to use atomics in order to update
4901 	 * the bits.
4902 	 */
4903 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4904 		     BITS_PER_TYPE(atomic_t));
4905 
4906 	if (old) {
4907 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4908 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4909 	}
4910 
4911 	if (new) {
4912 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4913 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4914 	}
4915 }
4916 
4917 /* Allocate a new GEM object and fill it with the supplied data */
4918 struct drm_i915_gem_object *
4919 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
4920 			         const void *data, size_t size)
4921 {
4922 	struct drm_i915_gem_object *obj;
4923 	struct file *file;
4924 	size_t offset;
4925 	int err;
4926 
4927 	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
4928 	if (IS_ERR(obj))
4929 		return obj;
4930 
4931 	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
4932 
4933 	file = obj->base.filp;
4934 	offset = 0;
4935 	do {
4936 		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
4937 		struct page *page;
4938 		void *pgdata, *vaddr;
4939 
4940 		err = pagecache_write_begin(file, file->f_mapping,
4941 					    offset, len, 0,
4942 					    &page, &pgdata);
4943 		if (err < 0)
4944 			goto fail;
4945 
4946 		vaddr = kmap(page);
4947 		memcpy(vaddr, data, len);
4948 		kunmap(page);
4949 
4950 		err = pagecache_write_end(file, file->f_mapping,
4951 					  offset, len, len,
4952 					  page, pgdata);
4953 		if (err < 0)
4954 			goto fail;
4955 
4956 		size -= len;
4957 		data += len;
4958 		offset += len;
4959 	} while (size);
4960 
4961 	return obj;
4962 
4963 fail:
4964 	i915_gem_object_put(obj);
4965 	return ERR_PTR(err);
4966 }
4967 
4968 struct scatterlist *
4969 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4970 		       unsigned int n,
4971 		       unsigned int *offset)
4972 {
4973 	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4974 	struct scatterlist *sg;
4975 	unsigned int idx, count;
4976 
4977 	might_sleep();
4978 	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4979 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4980 
4981 	/* As we iterate forward through the sg, we record each entry in a
4982 	 * radixtree for quick repeated (backwards) lookups. If we have seen
4983 	 * this index previously, we will have an entry for it.
4984 	 *
4985 	 * Initial lookup is O(N), but this is amortized to O(1) for
4986 	 * sequential page access (where each new request is consecutive
4987 	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4988 	 * i.e. O(1) with a large constant!
4989 	 */
4990 	if (n < READ_ONCE(iter->sg_idx))
4991 		goto lookup;
4992 
4993 	mutex_lock(&iter->lock);
4994 
4995 	/* We prefer to reuse the last sg so that repeated lookup of this
4996 	 * (or the subsequent) sg are fast - comparing against the last
4997 	 * sg is faster than going through the radixtree.
4998 	 */
4999 
5000 	sg = iter->sg_pos;
5001 	idx = iter->sg_idx;
5002 	count = __sg_page_count(sg);
5003 
5004 	while (idx + count <= n) {
5005 		void *entry;
5006 		unsigned long i;
5007 		int ret;
5008 
5009 		/* If we cannot allocate and insert this entry, or the
5010 		 * individual pages from this range, cancel updating the
5011 		 * sg_idx so that on this lookup we are forced to linearly
5012 		 * scan onwards, but on future lookups we will try the
5013 		 * insertion again (in which case we need to be careful of
5014 		 * the error return reporting that we have already inserted
5015 		 * this index).
5016 		 */
5017 		ret = radix_tree_insert(&iter->radix, idx, sg);
5018 		if (ret && ret != -EEXIST)
5019 			goto scan;
5020 
5021 		entry = xa_mk_value(idx);
5022 		for (i = 1; i < count; i++) {
5023 			ret = radix_tree_insert(&iter->radix, idx + i, entry);
5024 			if (ret && ret != -EEXIST)
5025 				goto scan;
5026 		}
5027 
5028 		idx += count;
5029 		sg = ____sg_next(sg);
5030 		count = __sg_page_count(sg);
5031 	}
5032 
5033 scan:
5034 	iter->sg_pos = sg;
5035 	iter->sg_idx = idx;
5036 
5037 	mutex_unlock(&iter->lock);
5038 
5039 	if (unlikely(n < idx)) /* insertion completed by another thread */
5040 		goto lookup;
5041 
5042 	/* In case we failed to insert the entry into the radixtree, we need
5043 	 * to look beyond the current sg.
5044 	 */
5045 	while (idx + count <= n) {
5046 		idx += count;
5047 		sg = ____sg_next(sg);
5048 		count = __sg_page_count(sg);
5049 	}
5050 
5051 	*offset = n - idx;
5052 	return sg;
5053 
5054 lookup:
5055 	rcu_read_lock();
5056 
5057 	sg = radix_tree_lookup(&iter->radix, n);
5058 	GEM_BUG_ON(!sg);
5059 
5060 	/* If this index is in the middle of multi-page sg entry,
5061 	 * the radix tree will contain a value entry that points
5062 	 * to the start of that range. We will return the pointer to
5063 	 * the base page and the offset of this page within the
5064 	 * sg entry's range.
5065 	 */
5066 	*offset = 0;
5067 	if (unlikely(xa_is_value(sg))) {
5068 		unsigned long base = xa_to_value(sg);
5069 
5070 		sg = radix_tree_lookup(&iter->radix, base);
5071 		GEM_BUG_ON(!sg);
5072 
5073 		*offset = n - base;
5074 	}
5075 
5076 	rcu_read_unlock();
5077 
5078 	return sg;
5079 }
5080 
5081 struct page *
5082 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5083 {
5084 	struct scatterlist *sg;
5085 	unsigned int offset;
5086 
5087 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5088 
5089 	sg = i915_gem_object_get_sg(obj, n, &offset);
5090 	return nth_page(sg_page(sg), offset);
5091 }
5092 
5093 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5094 struct page *
5095 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5096 			       unsigned int n)
5097 {
5098 	struct page *page;
5099 
5100 	page = i915_gem_object_get_page(obj, n);
5101 	if (!obj->mm.dirty)
5102 		set_page_dirty(page);
5103 
5104 	return page;
5105 }
5106 
5107 dma_addr_t
5108 i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
5109 				    unsigned long n,
5110 				    unsigned int *len)
5111 {
5112 	struct scatterlist *sg;
5113 	unsigned int offset;
5114 
5115 	sg = i915_gem_object_get_sg(obj, n, &offset);
5116 
5117 	if (len)
5118 		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
5119 
5120 	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5121 }
5122 
5123 dma_addr_t
5124 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5125 				unsigned long n)
5126 {
5127 	return i915_gem_object_get_dma_address_len(obj, n, NULL);
5128 }
5129 
5130 
5131 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5132 {
5133 	struct sg_table *pages;
5134 	int err;
5135 
5136 	if (align > obj->base.size)
5137 		return -EINVAL;
5138 
5139 	if (obj->ops == &i915_gem_phys_ops)
5140 		return 0;
5141 
5142 	if (obj->ops != &i915_gem_object_ops)
5143 		return -EINVAL;
5144 
5145 	err = i915_gem_object_unbind(obj);
5146 	if (err)
5147 		return err;
5148 
5149 	mutex_lock(&obj->mm.lock);
5150 
5151 	if (obj->mm.madv != I915_MADV_WILLNEED) {
5152 		err = -EFAULT;
5153 		goto err_unlock;
5154 	}
5155 
5156 	if (obj->mm.quirked) {
5157 		err = -EFAULT;
5158 		goto err_unlock;
5159 	}
5160 
5161 	if (obj->mm.mapping) {
5162 		err = -EBUSY;
5163 		goto err_unlock;
5164 	}
5165 
5166 	pages = __i915_gem_object_unset_pages(obj);
5167 
5168 	obj->ops = &i915_gem_phys_ops;
5169 
5170 	err = ____i915_gem_object_get_pages(obj);
5171 	if (err)
5172 		goto err_xfer;
5173 
5174 	/* Perma-pin (until release) the physical set of pages */
5175 	__i915_gem_object_pin_pages(obj);
5176 
5177 	if (!IS_ERR_OR_NULL(pages))
5178 		i915_gem_object_ops.put_pages(obj, pages);
5179 	mutex_unlock(&obj->mm.lock);
5180 	return 0;
5181 
5182 err_xfer:
5183 	obj->ops = &i915_gem_object_ops;
5184 	if (!IS_ERR_OR_NULL(pages)) {
5185 		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5186 
5187 		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5188 	}
5189 err_unlock:
5190 	mutex_unlock(&obj->mm.lock);
5191 	return err;
5192 }
5193 
5194 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5195 #include "selftests/scatterlist.c"
5196 #include "selftests/mock_gem_device.c"
5197 #include "selftests/huge_gem_object.c"
5198 #include "selftests/huge_pages.c"
5199 #include "selftests/i915_gem_object.c"
5200 #include "selftests/i915_gem_coherency.c"
5201 #include "selftests/i915_gem.c"
5202 #endif
5203