xref: /openbmc/linux/drivers/gpu/drm/i915/i915_gem.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include "intel_frontbuffer.h"
36 #include "intel_mocs.h"
37 #include <linux/dma-fence-array.h>
38 #include <linux/reservation.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/slab.h>
41 #include <linux/swap.h>
42 #include <linux/pci.h>
43 #include <linux/dma-buf.h>
44 
45 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
48 
49 static bool cpu_cache_is_coherent(struct drm_device *dev,
50 				  enum i915_cache_level level)
51 {
52 	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
53 }
54 
55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
56 {
57 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
58 		return false;
59 
60 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
61 		return true;
62 
63 	return obj->pin_display;
64 }
65 
66 static int
67 insert_mappable_node(struct i915_ggtt *ggtt,
68                      struct drm_mm_node *node, u32 size)
69 {
70 	memset(node, 0, sizeof(*node));
71 	return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
72 						   size, 0, -1,
73 						   0, ggtt->mappable_end,
74 						   DRM_MM_SEARCH_DEFAULT,
75 						   DRM_MM_CREATE_DEFAULT);
76 }
77 
78 static void
79 remove_mappable_node(struct drm_mm_node *node)
80 {
81 	drm_mm_remove_node(node);
82 }
83 
84 /* some bookkeeping */
85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
86 				  u64 size)
87 {
88 	spin_lock(&dev_priv->mm.object_stat_lock);
89 	dev_priv->mm.object_count++;
90 	dev_priv->mm.object_memory += size;
91 	spin_unlock(&dev_priv->mm.object_stat_lock);
92 }
93 
94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
95 				     u64 size)
96 {
97 	spin_lock(&dev_priv->mm.object_stat_lock);
98 	dev_priv->mm.object_count--;
99 	dev_priv->mm.object_memory -= size;
100 	spin_unlock(&dev_priv->mm.object_stat_lock);
101 }
102 
103 static int
104 i915_gem_wait_for_error(struct i915_gpu_error *error)
105 {
106 	int ret;
107 
108 	might_sleep();
109 
110 	if (!i915_reset_in_progress(error))
111 		return 0;
112 
113 	/*
114 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
115 	 * userspace. If it takes that long something really bad is going on and
116 	 * we should simply try to bail out and fail as gracefully as possible.
117 	 */
118 	ret = wait_event_interruptible_timeout(error->reset_queue,
119 					       !i915_reset_in_progress(error),
120 					       I915_RESET_TIMEOUT);
121 	if (ret == 0) {
122 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
123 		return -EIO;
124 	} else if (ret < 0) {
125 		return ret;
126 	} else {
127 		return 0;
128 	}
129 }
130 
131 int i915_mutex_lock_interruptible(struct drm_device *dev)
132 {
133 	struct drm_i915_private *dev_priv = to_i915(dev);
134 	int ret;
135 
136 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
137 	if (ret)
138 		return ret;
139 
140 	ret = mutex_lock_interruptible(&dev->struct_mutex);
141 	if (ret)
142 		return ret;
143 
144 	return 0;
145 }
146 
147 int
148 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
149 			    struct drm_file *file)
150 {
151 	struct drm_i915_private *dev_priv = to_i915(dev);
152 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
153 	struct drm_i915_gem_get_aperture *args = data;
154 	struct i915_vma *vma;
155 	size_t pinned;
156 
157 	pinned = 0;
158 	mutex_lock(&dev->struct_mutex);
159 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
160 		if (i915_vma_is_pinned(vma))
161 			pinned += vma->node.size;
162 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
163 		if (i915_vma_is_pinned(vma))
164 			pinned += vma->node.size;
165 	mutex_unlock(&dev->struct_mutex);
166 
167 	args->aper_size = ggtt->base.total;
168 	args->aper_available_size = args->aper_size - pinned;
169 
170 	return 0;
171 }
172 
173 static struct sg_table *
174 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
175 {
176 	struct address_space *mapping = obj->base.filp->f_mapping;
177 	drm_dma_handle_t *phys;
178 	struct sg_table *st;
179 	struct scatterlist *sg;
180 	char *vaddr;
181 	int i;
182 
183 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
184 		return ERR_PTR(-EINVAL);
185 
186 	/* Always aligning to the object size, allows a single allocation
187 	 * to handle all possible callers, and given typical object sizes,
188 	 * the alignment of the buddy allocation will naturally match.
189 	 */
190 	phys = drm_pci_alloc(obj->base.dev,
191 			     obj->base.size,
192 			     roundup_pow_of_two(obj->base.size));
193 	if (!phys)
194 		return ERR_PTR(-ENOMEM);
195 
196 	vaddr = phys->vaddr;
197 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
198 		struct page *page;
199 		char *src;
200 
201 		page = shmem_read_mapping_page(mapping, i);
202 		if (IS_ERR(page)) {
203 			st = ERR_CAST(page);
204 			goto err_phys;
205 		}
206 
207 		src = kmap_atomic(page);
208 		memcpy(vaddr, src, PAGE_SIZE);
209 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
210 		kunmap_atomic(src);
211 
212 		put_page(page);
213 		vaddr += PAGE_SIZE;
214 	}
215 
216 	i915_gem_chipset_flush(to_i915(obj->base.dev));
217 
218 	st = kmalloc(sizeof(*st), GFP_KERNEL);
219 	if (!st) {
220 		st = ERR_PTR(-ENOMEM);
221 		goto err_phys;
222 	}
223 
224 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
225 		kfree(st);
226 		st = ERR_PTR(-ENOMEM);
227 		goto err_phys;
228 	}
229 
230 	sg = st->sgl;
231 	sg->offset = 0;
232 	sg->length = obj->base.size;
233 
234 	sg_dma_address(sg) = phys->busaddr;
235 	sg_dma_len(sg) = obj->base.size;
236 
237 	obj->phys_handle = phys;
238 	return st;
239 
240 err_phys:
241 	drm_pci_free(obj->base.dev, phys);
242 	return st;
243 }
244 
245 static void
246 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
247 				struct sg_table *pages)
248 {
249 	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
250 
251 	if (obj->mm.madv == I915_MADV_DONTNEED)
252 		obj->mm.dirty = false;
253 
254 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
255 	    !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
256 		drm_clflush_sg(pages);
257 
258 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
259 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
260 }
261 
262 static void
263 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
264 			       struct sg_table *pages)
265 {
266 	__i915_gem_object_release_shmem(obj, pages);
267 
268 	if (obj->mm.dirty) {
269 		struct address_space *mapping = obj->base.filp->f_mapping;
270 		char *vaddr = obj->phys_handle->vaddr;
271 		int i;
272 
273 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
274 			struct page *page;
275 			char *dst;
276 
277 			page = shmem_read_mapping_page(mapping, i);
278 			if (IS_ERR(page))
279 				continue;
280 
281 			dst = kmap_atomic(page);
282 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
283 			memcpy(dst, vaddr, PAGE_SIZE);
284 			kunmap_atomic(dst);
285 
286 			set_page_dirty(page);
287 			if (obj->mm.madv == I915_MADV_WILLNEED)
288 				mark_page_accessed(page);
289 			put_page(page);
290 			vaddr += PAGE_SIZE;
291 		}
292 		obj->mm.dirty = false;
293 	}
294 
295 	sg_free_table(pages);
296 	kfree(pages);
297 
298 	drm_pci_free(obj->base.dev, obj->phys_handle);
299 }
300 
301 static void
302 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
303 {
304 	i915_gem_object_unpin_pages(obj);
305 }
306 
307 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
308 	.get_pages = i915_gem_object_get_pages_phys,
309 	.put_pages = i915_gem_object_put_pages_phys,
310 	.release = i915_gem_object_release_phys,
311 };
312 
313 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
314 {
315 	struct i915_vma *vma;
316 	LIST_HEAD(still_in_list);
317 	int ret;
318 
319 	lockdep_assert_held(&obj->base.dev->struct_mutex);
320 
321 	/* Closed vma are removed from the obj->vma_list - but they may
322 	 * still have an active binding on the object. To remove those we
323 	 * must wait for all rendering to complete to the object (as unbinding
324 	 * must anyway), and retire the requests.
325 	 */
326 	ret = i915_gem_object_wait(obj,
327 				   I915_WAIT_INTERRUPTIBLE |
328 				   I915_WAIT_LOCKED |
329 				   I915_WAIT_ALL,
330 				   MAX_SCHEDULE_TIMEOUT,
331 				   NULL);
332 	if (ret)
333 		return ret;
334 
335 	i915_gem_retire_requests(to_i915(obj->base.dev));
336 
337 	while ((vma = list_first_entry_or_null(&obj->vma_list,
338 					       struct i915_vma,
339 					       obj_link))) {
340 		list_move_tail(&vma->obj_link, &still_in_list);
341 		ret = i915_vma_unbind(vma);
342 		if (ret)
343 			break;
344 	}
345 	list_splice(&still_in_list, &obj->vma_list);
346 
347 	return ret;
348 }
349 
350 static long
351 i915_gem_object_wait_fence(struct dma_fence *fence,
352 			   unsigned int flags,
353 			   long timeout,
354 			   struct intel_rps_client *rps)
355 {
356 	struct drm_i915_gem_request *rq;
357 
358 	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
359 
360 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
361 		return timeout;
362 
363 	if (!dma_fence_is_i915(fence))
364 		return dma_fence_wait_timeout(fence,
365 					      flags & I915_WAIT_INTERRUPTIBLE,
366 					      timeout);
367 
368 	rq = to_request(fence);
369 	if (i915_gem_request_completed(rq))
370 		goto out;
371 
372 	/* This client is about to stall waiting for the GPU. In many cases
373 	 * this is undesirable and limits the throughput of the system, as
374 	 * many clients cannot continue processing user input/output whilst
375 	 * blocked. RPS autotuning may take tens of milliseconds to respond
376 	 * to the GPU load and thus incurs additional latency for the client.
377 	 * We can circumvent that by promoting the GPU frequency to maximum
378 	 * before we wait. This makes the GPU throttle up much more quickly
379 	 * (good for benchmarks and user experience, e.g. window animations),
380 	 * but at a cost of spending more power processing the workload
381 	 * (bad for battery). Not all clients even want their results
382 	 * immediately and for them we should just let the GPU select its own
383 	 * frequency to maximise efficiency. To prevent a single client from
384 	 * forcing the clocks too high for the whole system, we only allow
385 	 * each client to waitboost once in a busy period.
386 	 */
387 	if (rps) {
388 		if (INTEL_GEN(rq->i915) >= 6)
389 			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
390 		else
391 			rps = NULL;
392 	}
393 
394 	timeout = i915_wait_request(rq, flags, timeout);
395 
396 out:
397 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
398 		i915_gem_request_retire_upto(rq);
399 
400 	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
401 		/* The GPU is now idle and this client has stalled.
402 		 * Since no other client has submitted a request in the
403 		 * meantime, assume that this client is the only one
404 		 * supplying work to the GPU but is unable to keep that
405 		 * work supplied because it is waiting. Since the GPU is
406 		 * then never kept fully busy, RPS autoclocking will
407 		 * keep the clocks relatively low, causing further delays.
408 		 * Compensate by giving the synchronous client credit for
409 		 * a waitboost next time.
410 		 */
411 		spin_lock(&rq->i915->rps.client_lock);
412 		list_del_init(&rps->link);
413 		spin_unlock(&rq->i915->rps.client_lock);
414 	}
415 
416 	return timeout;
417 }
418 
419 static long
420 i915_gem_object_wait_reservation(struct reservation_object *resv,
421 				 unsigned int flags,
422 				 long timeout,
423 				 struct intel_rps_client *rps)
424 {
425 	struct dma_fence *excl;
426 
427 	if (flags & I915_WAIT_ALL) {
428 		struct dma_fence **shared;
429 		unsigned int count, i;
430 		int ret;
431 
432 		ret = reservation_object_get_fences_rcu(resv,
433 							&excl, &count, &shared);
434 		if (ret)
435 			return ret;
436 
437 		for (i = 0; i < count; i++) {
438 			timeout = i915_gem_object_wait_fence(shared[i],
439 							     flags, timeout,
440 							     rps);
441 			if (timeout <= 0)
442 				break;
443 
444 			dma_fence_put(shared[i]);
445 		}
446 
447 		for (; i < count; i++)
448 			dma_fence_put(shared[i]);
449 		kfree(shared);
450 	} else {
451 		excl = reservation_object_get_excl_rcu(resv);
452 	}
453 
454 	if (excl && timeout > 0)
455 		timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
456 
457 	dma_fence_put(excl);
458 
459 	return timeout;
460 }
461 
462 static void __fence_set_priority(struct dma_fence *fence, int prio)
463 {
464 	struct drm_i915_gem_request *rq;
465 	struct intel_engine_cs *engine;
466 
467 	if (!dma_fence_is_i915(fence))
468 		return;
469 
470 	rq = to_request(fence);
471 	engine = rq->engine;
472 	if (!engine->schedule)
473 		return;
474 
475 	engine->schedule(rq, prio);
476 }
477 
478 static void fence_set_priority(struct dma_fence *fence, int prio)
479 {
480 	/* Recurse once into a fence-array */
481 	if (dma_fence_is_array(fence)) {
482 		struct dma_fence_array *array = to_dma_fence_array(fence);
483 		int i;
484 
485 		for (i = 0; i < array->num_fences; i++)
486 			__fence_set_priority(array->fences[i], prio);
487 	} else {
488 		__fence_set_priority(fence, prio);
489 	}
490 }
491 
492 int
493 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
494 			      unsigned int flags,
495 			      int prio)
496 {
497 	struct dma_fence *excl;
498 
499 	if (flags & I915_WAIT_ALL) {
500 		struct dma_fence **shared;
501 		unsigned int count, i;
502 		int ret;
503 
504 		ret = reservation_object_get_fences_rcu(obj->resv,
505 							&excl, &count, &shared);
506 		if (ret)
507 			return ret;
508 
509 		for (i = 0; i < count; i++) {
510 			fence_set_priority(shared[i], prio);
511 			dma_fence_put(shared[i]);
512 		}
513 
514 		kfree(shared);
515 	} else {
516 		excl = reservation_object_get_excl_rcu(obj->resv);
517 	}
518 
519 	if (excl) {
520 		fence_set_priority(excl, prio);
521 		dma_fence_put(excl);
522 	}
523 	return 0;
524 }
525 
526 /**
527  * Waits for rendering to the object to be completed
528  * @obj: i915 gem object
529  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
530  * @timeout: how long to wait
531  * @rps: client (user process) to charge for any waitboosting
532  */
533 int
534 i915_gem_object_wait(struct drm_i915_gem_object *obj,
535 		     unsigned int flags,
536 		     long timeout,
537 		     struct intel_rps_client *rps)
538 {
539 	might_sleep();
540 #if IS_ENABLED(CONFIG_LOCKDEP)
541 	GEM_BUG_ON(debug_locks &&
542 		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
543 		   !!(flags & I915_WAIT_LOCKED));
544 #endif
545 	GEM_BUG_ON(timeout < 0);
546 
547 	timeout = i915_gem_object_wait_reservation(obj->resv,
548 						   flags, timeout,
549 						   rps);
550 	return timeout < 0 ? timeout : 0;
551 }
552 
553 static struct intel_rps_client *to_rps_client(struct drm_file *file)
554 {
555 	struct drm_i915_file_private *fpriv = file->driver_priv;
556 
557 	return &fpriv->rps;
558 }
559 
560 int
561 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
562 			    int align)
563 {
564 	int ret;
565 
566 	if (align > obj->base.size)
567 		return -EINVAL;
568 
569 	if (obj->ops == &i915_gem_phys_ops)
570 		return 0;
571 
572 	if (obj->mm.madv != I915_MADV_WILLNEED)
573 		return -EFAULT;
574 
575 	if (obj->base.filp == NULL)
576 		return -EINVAL;
577 
578 	ret = i915_gem_object_unbind(obj);
579 	if (ret)
580 		return ret;
581 
582 	__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
583 	if (obj->mm.pages)
584 		return -EBUSY;
585 
586 	obj->ops = &i915_gem_phys_ops;
587 
588 	return i915_gem_object_pin_pages(obj);
589 }
590 
591 static int
592 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
593 		     struct drm_i915_gem_pwrite *args,
594 		     struct drm_file *file)
595 {
596 	struct drm_device *dev = obj->base.dev;
597 	void *vaddr = obj->phys_handle->vaddr + args->offset;
598 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
599 	int ret;
600 
601 	/* We manually control the domain here and pretend that it
602 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
603 	 */
604 	lockdep_assert_held(&obj->base.dev->struct_mutex);
605 	ret = i915_gem_object_wait(obj,
606 				   I915_WAIT_INTERRUPTIBLE |
607 				   I915_WAIT_LOCKED |
608 				   I915_WAIT_ALL,
609 				   MAX_SCHEDULE_TIMEOUT,
610 				   to_rps_client(file));
611 	if (ret)
612 		return ret;
613 
614 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
615 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
616 		unsigned long unwritten;
617 
618 		/* The physical object once assigned is fixed for the lifetime
619 		 * of the obj, so we can safely drop the lock and continue
620 		 * to access vaddr.
621 		 */
622 		mutex_unlock(&dev->struct_mutex);
623 		unwritten = copy_from_user(vaddr, user_data, args->size);
624 		mutex_lock(&dev->struct_mutex);
625 		if (unwritten) {
626 			ret = -EFAULT;
627 			goto out;
628 		}
629 	}
630 
631 	drm_clflush_virt_range(vaddr, args->size);
632 	i915_gem_chipset_flush(to_i915(dev));
633 
634 out:
635 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
636 	return ret;
637 }
638 
639 void *i915_gem_object_alloc(struct drm_device *dev)
640 {
641 	struct drm_i915_private *dev_priv = to_i915(dev);
642 	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
643 }
644 
645 void i915_gem_object_free(struct drm_i915_gem_object *obj)
646 {
647 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
648 	kmem_cache_free(dev_priv->objects, obj);
649 }
650 
651 static int
652 i915_gem_create(struct drm_file *file,
653 		struct drm_device *dev,
654 		uint64_t size,
655 		uint32_t *handle_p)
656 {
657 	struct drm_i915_gem_object *obj;
658 	int ret;
659 	u32 handle;
660 
661 	size = roundup(size, PAGE_SIZE);
662 	if (size == 0)
663 		return -EINVAL;
664 
665 	/* Allocate the new object */
666 	obj = i915_gem_object_create(dev, size);
667 	if (IS_ERR(obj))
668 		return PTR_ERR(obj);
669 
670 	ret = drm_gem_handle_create(file, &obj->base, &handle);
671 	/* drop reference from allocate - handle holds it now */
672 	i915_gem_object_put(obj);
673 	if (ret)
674 		return ret;
675 
676 	*handle_p = handle;
677 	return 0;
678 }
679 
680 int
681 i915_gem_dumb_create(struct drm_file *file,
682 		     struct drm_device *dev,
683 		     struct drm_mode_create_dumb *args)
684 {
685 	/* have to work out size/pitch and return them */
686 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
687 	args->size = args->pitch * args->height;
688 	return i915_gem_create(file, dev,
689 			       args->size, &args->handle);
690 }
691 
692 /**
693  * Creates a new mm object and returns a handle to it.
694  * @dev: drm device pointer
695  * @data: ioctl data blob
696  * @file: drm file pointer
697  */
698 int
699 i915_gem_create_ioctl(struct drm_device *dev, void *data,
700 		      struct drm_file *file)
701 {
702 	struct drm_i915_gem_create *args = data;
703 
704 	i915_gem_flush_free_objects(to_i915(dev));
705 
706 	return i915_gem_create(file, dev,
707 			       args->size, &args->handle);
708 }
709 
710 static inline int
711 __copy_to_user_swizzled(char __user *cpu_vaddr,
712 			const char *gpu_vaddr, int gpu_offset,
713 			int length)
714 {
715 	int ret, cpu_offset = 0;
716 
717 	while (length > 0) {
718 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
719 		int this_length = min(cacheline_end - gpu_offset, length);
720 		int swizzled_gpu_offset = gpu_offset ^ 64;
721 
722 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
723 				     gpu_vaddr + swizzled_gpu_offset,
724 				     this_length);
725 		if (ret)
726 			return ret + length;
727 
728 		cpu_offset += this_length;
729 		gpu_offset += this_length;
730 		length -= this_length;
731 	}
732 
733 	return 0;
734 }
735 
736 static inline int
737 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
738 			  const char __user *cpu_vaddr,
739 			  int length)
740 {
741 	int ret, cpu_offset = 0;
742 
743 	while (length > 0) {
744 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
745 		int this_length = min(cacheline_end - gpu_offset, length);
746 		int swizzled_gpu_offset = gpu_offset ^ 64;
747 
748 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
749 				       cpu_vaddr + cpu_offset,
750 				       this_length);
751 		if (ret)
752 			return ret + length;
753 
754 		cpu_offset += this_length;
755 		gpu_offset += this_length;
756 		length -= this_length;
757 	}
758 
759 	return 0;
760 }
761 
762 /*
763  * Pins the specified object's pages and synchronizes the object with
764  * GPU accesses. Sets needs_clflush to non-zero if the caller should
765  * flush the object from the CPU cache.
766  */
767 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
768 				    unsigned int *needs_clflush)
769 {
770 	int ret;
771 
772 	lockdep_assert_held(&obj->base.dev->struct_mutex);
773 
774 	*needs_clflush = 0;
775 	if (!i915_gem_object_has_struct_page(obj))
776 		return -ENODEV;
777 
778 	ret = i915_gem_object_wait(obj,
779 				   I915_WAIT_INTERRUPTIBLE |
780 				   I915_WAIT_LOCKED,
781 				   MAX_SCHEDULE_TIMEOUT,
782 				   NULL);
783 	if (ret)
784 		return ret;
785 
786 	ret = i915_gem_object_pin_pages(obj);
787 	if (ret)
788 		return ret;
789 
790 	i915_gem_object_flush_gtt_write_domain(obj);
791 
792 	/* If we're not in the cpu read domain, set ourself into the gtt
793 	 * read domain and manually flush cachelines (if required). This
794 	 * optimizes for the case when the gpu will dirty the data
795 	 * anyway again before the next pread happens.
796 	 */
797 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
798 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
799 							obj->cache_level);
800 
801 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
802 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
803 		if (ret)
804 			goto err_unpin;
805 
806 		*needs_clflush = 0;
807 	}
808 
809 	/* return with the pages pinned */
810 	return 0;
811 
812 err_unpin:
813 	i915_gem_object_unpin_pages(obj);
814 	return ret;
815 }
816 
817 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
818 				     unsigned int *needs_clflush)
819 {
820 	int ret;
821 
822 	lockdep_assert_held(&obj->base.dev->struct_mutex);
823 
824 	*needs_clflush = 0;
825 	if (!i915_gem_object_has_struct_page(obj))
826 		return -ENODEV;
827 
828 	ret = i915_gem_object_wait(obj,
829 				   I915_WAIT_INTERRUPTIBLE |
830 				   I915_WAIT_LOCKED |
831 				   I915_WAIT_ALL,
832 				   MAX_SCHEDULE_TIMEOUT,
833 				   NULL);
834 	if (ret)
835 		return ret;
836 
837 	ret = i915_gem_object_pin_pages(obj);
838 	if (ret)
839 		return ret;
840 
841 	i915_gem_object_flush_gtt_write_domain(obj);
842 
843 	/* If we're not in the cpu write domain, set ourself into the
844 	 * gtt write domain and manually flush cachelines (as required).
845 	 * This optimizes for the case when the gpu will use the data
846 	 * right away and we therefore have to clflush anyway.
847 	 */
848 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
849 		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
850 
851 	/* Same trick applies to invalidate partially written cachelines read
852 	 * before writing.
853 	 */
854 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
855 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
856 							 obj->cache_level);
857 
858 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
859 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
860 		if (ret)
861 			goto err_unpin;
862 
863 		*needs_clflush = 0;
864 	}
865 
866 	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
867 		obj->cache_dirty = true;
868 
869 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
870 	obj->mm.dirty = true;
871 	/* return with the pages pinned */
872 	return 0;
873 
874 err_unpin:
875 	i915_gem_object_unpin_pages(obj);
876 	return ret;
877 }
878 
879 static void
880 shmem_clflush_swizzled_range(char *addr, unsigned long length,
881 			     bool swizzled)
882 {
883 	if (unlikely(swizzled)) {
884 		unsigned long start = (unsigned long) addr;
885 		unsigned long end = (unsigned long) addr + length;
886 
887 		/* For swizzling simply ensure that we always flush both
888 		 * channels. Lame, but simple and it works. Swizzled
889 		 * pwrite/pread is far from a hotpath - current userspace
890 		 * doesn't use it at all. */
891 		start = round_down(start, 128);
892 		end = round_up(end, 128);
893 
894 		drm_clflush_virt_range((void *)start, end - start);
895 	} else {
896 		drm_clflush_virt_range(addr, length);
897 	}
898 
899 }
900 
901 /* Only difference to the fast-path function is that this can handle bit17
902  * and uses non-atomic copy and kmap functions. */
903 static int
904 shmem_pread_slow(struct page *page, int offset, int length,
905 		 char __user *user_data,
906 		 bool page_do_bit17_swizzling, bool needs_clflush)
907 {
908 	char *vaddr;
909 	int ret;
910 
911 	vaddr = kmap(page);
912 	if (needs_clflush)
913 		shmem_clflush_swizzled_range(vaddr + offset, length,
914 					     page_do_bit17_swizzling);
915 
916 	if (page_do_bit17_swizzling)
917 		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
918 	else
919 		ret = __copy_to_user(user_data, vaddr + offset, length);
920 	kunmap(page);
921 
922 	return ret ? - EFAULT : 0;
923 }
924 
925 static int
926 shmem_pread(struct page *page, int offset, int length, char __user *user_data,
927 	    bool page_do_bit17_swizzling, bool needs_clflush)
928 {
929 	int ret;
930 
931 	ret = -ENODEV;
932 	if (!page_do_bit17_swizzling) {
933 		char *vaddr = kmap_atomic(page);
934 
935 		if (needs_clflush)
936 			drm_clflush_virt_range(vaddr + offset, length);
937 		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
938 		kunmap_atomic(vaddr);
939 	}
940 	if (ret == 0)
941 		return 0;
942 
943 	return shmem_pread_slow(page, offset, length, user_data,
944 				page_do_bit17_swizzling, needs_clflush);
945 }
946 
947 static int
948 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
949 		     struct drm_i915_gem_pread *args)
950 {
951 	char __user *user_data;
952 	u64 remain;
953 	unsigned int obj_do_bit17_swizzling;
954 	unsigned int needs_clflush;
955 	unsigned int idx, offset;
956 	int ret;
957 
958 	obj_do_bit17_swizzling = 0;
959 	if (i915_gem_object_needs_bit17_swizzle(obj))
960 		obj_do_bit17_swizzling = BIT(17);
961 
962 	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
963 	if (ret)
964 		return ret;
965 
966 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
967 	mutex_unlock(&obj->base.dev->struct_mutex);
968 	if (ret)
969 		return ret;
970 
971 	remain = args->size;
972 	user_data = u64_to_user_ptr(args->data_ptr);
973 	offset = offset_in_page(args->offset);
974 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
975 		struct page *page = i915_gem_object_get_page(obj, idx);
976 		int length;
977 
978 		length = remain;
979 		if (offset + length > PAGE_SIZE)
980 			length = PAGE_SIZE - offset;
981 
982 		ret = shmem_pread(page, offset, length, user_data,
983 				  page_to_phys(page) & obj_do_bit17_swizzling,
984 				  needs_clflush);
985 		if (ret)
986 			break;
987 
988 		remain -= length;
989 		user_data += length;
990 		offset = 0;
991 	}
992 
993 	i915_gem_obj_finish_shmem_access(obj);
994 	return ret;
995 }
996 
997 static inline bool
998 gtt_user_read(struct io_mapping *mapping,
999 	      loff_t base, int offset,
1000 	      char __user *user_data, int length)
1001 {
1002 	void *vaddr;
1003 	unsigned long unwritten;
1004 
1005 	/* We can use the cpu mem copy function because this is X86. */
1006 	vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
1007 	unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
1008 	io_mapping_unmap_atomic(vaddr);
1009 	if (unwritten) {
1010 		vaddr = (void __force *)
1011 			io_mapping_map_wc(mapping, base, PAGE_SIZE);
1012 		unwritten = copy_to_user(user_data, vaddr + offset, length);
1013 		io_mapping_unmap(vaddr);
1014 	}
1015 	return unwritten;
1016 }
1017 
1018 static int
1019 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
1020 		   const struct drm_i915_gem_pread *args)
1021 {
1022 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1023 	struct i915_ggtt *ggtt = &i915->ggtt;
1024 	struct drm_mm_node node;
1025 	struct i915_vma *vma;
1026 	void __user *user_data;
1027 	u64 remain, offset;
1028 	int ret;
1029 
1030 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1031 	if (ret)
1032 		return ret;
1033 
1034 	intel_runtime_pm_get(i915);
1035 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1036 				       PIN_MAPPABLE | PIN_NONBLOCK);
1037 	if (!IS_ERR(vma)) {
1038 		node.start = i915_ggtt_offset(vma);
1039 		node.allocated = false;
1040 		ret = i915_vma_put_fence(vma);
1041 		if (ret) {
1042 			i915_vma_unpin(vma);
1043 			vma = ERR_PTR(ret);
1044 		}
1045 	}
1046 	if (IS_ERR(vma)) {
1047 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1048 		if (ret)
1049 			goto out_unlock;
1050 		GEM_BUG_ON(!node.allocated);
1051 	}
1052 
1053 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
1054 	if (ret)
1055 		goto out_unpin;
1056 
1057 	mutex_unlock(&i915->drm.struct_mutex);
1058 
1059 	user_data = u64_to_user_ptr(args->data_ptr);
1060 	remain = args->size;
1061 	offset = args->offset;
1062 
1063 	while (remain > 0) {
1064 		/* Operation in this page
1065 		 *
1066 		 * page_base = page offset within aperture
1067 		 * page_offset = offset within page
1068 		 * page_length = bytes to copy for this page
1069 		 */
1070 		u32 page_base = node.start;
1071 		unsigned page_offset = offset_in_page(offset);
1072 		unsigned page_length = PAGE_SIZE - page_offset;
1073 		page_length = remain < page_length ? remain : page_length;
1074 		if (node.allocated) {
1075 			wmb();
1076 			ggtt->base.insert_page(&ggtt->base,
1077 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1078 					       node.start, I915_CACHE_NONE, 0);
1079 			wmb();
1080 		} else {
1081 			page_base += offset & PAGE_MASK;
1082 		}
1083 
1084 		if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
1085 				  user_data, page_length)) {
1086 			ret = -EFAULT;
1087 			break;
1088 		}
1089 
1090 		remain -= page_length;
1091 		user_data += page_length;
1092 		offset += page_length;
1093 	}
1094 
1095 	mutex_lock(&i915->drm.struct_mutex);
1096 out_unpin:
1097 	if (node.allocated) {
1098 		wmb();
1099 		ggtt->base.clear_range(&ggtt->base,
1100 				       node.start, node.size);
1101 		remove_mappable_node(&node);
1102 	} else {
1103 		i915_vma_unpin(vma);
1104 	}
1105 out_unlock:
1106 	intel_runtime_pm_put(i915);
1107 	mutex_unlock(&i915->drm.struct_mutex);
1108 
1109 	return ret;
1110 }
1111 
1112 /**
1113  * Reads data from the object referenced by handle.
1114  * @dev: drm device pointer
1115  * @data: ioctl data blob
1116  * @file: drm file pointer
1117  *
1118  * On error, the contents of *data are undefined.
1119  */
1120 int
1121 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1122 		     struct drm_file *file)
1123 {
1124 	struct drm_i915_gem_pread *args = data;
1125 	struct drm_i915_gem_object *obj;
1126 	int ret;
1127 
1128 	if (args->size == 0)
1129 		return 0;
1130 
1131 	if (!access_ok(VERIFY_WRITE,
1132 		       u64_to_user_ptr(args->data_ptr),
1133 		       args->size))
1134 		return -EFAULT;
1135 
1136 	obj = i915_gem_object_lookup(file, args->handle);
1137 	if (!obj)
1138 		return -ENOENT;
1139 
1140 	/* Bounds check source.  */
1141 	if (args->offset > obj->base.size ||
1142 	    args->size > obj->base.size - args->offset) {
1143 		ret = -EINVAL;
1144 		goto out;
1145 	}
1146 
1147 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1148 
1149 	ret = i915_gem_object_wait(obj,
1150 				   I915_WAIT_INTERRUPTIBLE,
1151 				   MAX_SCHEDULE_TIMEOUT,
1152 				   to_rps_client(file));
1153 	if (ret)
1154 		goto out;
1155 
1156 	ret = i915_gem_object_pin_pages(obj);
1157 	if (ret)
1158 		goto out;
1159 
1160 	ret = i915_gem_shmem_pread(obj, args);
1161 	if (ret == -EFAULT || ret == -ENODEV)
1162 		ret = i915_gem_gtt_pread(obj, args);
1163 
1164 	i915_gem_object_unpin_pages(obj);
1165 out:
1166 	i915_gem_object_put(obj);
1167 	return ret;
1168 }
1169 
1170 /* This is the fast write path which cannot handle
1171  * page faults in the source data
1172  */
1173 
1174 static inline bool
1175 ggtt_write(struct io_mapping *mapping,
1176 	   loff_t base, int offset,
1177 	   char __user *user_data, int length)
1178 {
1179 	void *vaddr;
1180 	unsigned long unwritten;
1181 
1182 	/* We can use the cpu mem copy function because this is X86. */
1183 	vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
1184 	unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
1185 						      user_data, length);
1186 	io_mapping_unmap_atomic(vaddr);
1187 	if (unwritten) {
1188 		vaddr = (void __force *)
1189 			io_mapping_map_wc(mapping, base, PAGE_SIZE);
1190 		unwritten = copy_from_user(vaddr + offset, user_data, length);
1191 		io_mapping_unmap(vaddr);
1192 	}
1193 
1194 	return unwritten;
1195 }
1196 
1197 /**
1198  * This is the fast pwrite path, where we copy the data directly from the
1199  * user into the GTT, uncached.
1200  * @obj: i915 GEM object
1201  * @args: pwrite arguments structure
1202  */
1203 static int
1204 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1205 			 const struct drm_i915_gem_pwrite *args)
1206 {
1207 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1208 	struct i915_ggtt *ggtt = &i915->ggtt;
1209 	struct drm_mm_node node;
1210 	struct i915_vma *vma;
1211 	u64 remain, offset;
1212 	void __user *user_data;
1213 	int ret;
1214 
1215 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1216 	if (ret)
1217 		return ret;
1218 
1219 	intel_runtime_pm_get(i915);
1220 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1221 				       PIN_MAPPABLE | PIN_NONBLOCK);
1222 	if (!IS_ERR(vma)) {
1223 		node.start = i915_ggtt_offset(vma);
1224 		node.allocated = false;
1225 		ret = i915_vma_put_fence(vma);
1226 		if (ret) {
1227 			i915_vma_unpin(vma);
1228 			vma = ERR_PTR(ret);
1229 		}
1230 	}
1231 	if (IS_ERR(vma)) {
1232 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1233 		if (ret)
1234 			goto out_unlock;
1235 		GEM_BUG_ON(!node.allocated);
1236 	}
1237 
1238 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1239 	if (ret)
1240 		goto out_unpin;
1241 
1242 	mutex_unlock(&i915->drm.struct_mutex);
1243 
1244 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1245 
1246 	user_data = u64_to_user_ptr(args->data_ptr);
1247 	offset = args->offset;
1248 	remain = args->size;
1249 	while (remain) {
1250 		/* Operation in this page
1251 		 *
1252 		 * page_base = page offset within aperture
1253 		 * page_offset = offset within page
1254 		 * page_length = bytes to copy for this page
1255 		 */
1256 		u32 page_base = node.start;
1257 		unsigned int page_offset = offset_in_page(offset);
1258 		unsigned int page_length = PAGE_SIZE - page_offset;
1259 		page_length = remain < page_length ? remain : page_length;
1260 		if (node.allocated) {
1261 			wmb(); /* flush the write before we modify the GGTT */
1262 			ggtt->base.insert_page(&ggtt->base,
1263 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1264 					       node.start, I915_CACHE_NONE, 0);
1265 			wmb(); /* flush modifications to the GGTT (insert_page) */
1266 		} else {
1267 			page_base += offset & PAGE_MASK;
1268 		}
1269 		/* If we get a fault while copying data, then (presumably) our
1270 		 * source page isn't available.  Return the error and we'll
1271 		 * retry in the slow path.
1272 		 * If the object is non-shmem backed, we retry again with the
1273 		 * path that handles page fault.
1274 		 */
1275 		if (ggtt_write(&ggtt->mappable, page_base, page_offset,
1276 			       user_data, page_length)) {
1277 			ret = -EFAULT;
1278 			break;
1279 		}
1280 
1281 		remain -= page_length;
1282 		user_data += page_length;
1283 		offset += page_length;
1284 	}
1285 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1286 
1287 	mutex_lock(&i915->drm.struct_mutex);
1288 out_unpin:
1289 	if (node.allocated) {
1290 		wmb();
1291 		ggtt->base.clear_range(&ggtt->base,
1292 				       node.start, node.size);
1293 		remove_mappable_node(&node);
1294 	} else {
1295 		i915_vma_unpin(vma);
1296 	}
1297 out_unlock:
1298 	intel_runtime_pm_put(i915);
1299 	mutex_unlock(&i915->drm.struct_mutex);
1300 	return ret;
1301 }
1302 
1303 static int
1304 shmem_pwrite_slow(struct page *page, int offset, int length,
1305 		  char __user *user_data,
1306 		  bool page_do_bit17_swizzling,
1307 		  bool needs_clflush_before,
1308 		  bool needs_clflush_after)
1309 {
1310 	char *vaddr;
1311 	int ret;
1312 
1313 	vaddr = kmap(page);
1314 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1315 		shmem_clflush_swizzled_range(vaddr + offset, length,
1316 					     page_do_bit17_swizzling);
1317 	if (page_do_bit17_swizzling)
1318 		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1319 						length);
1320 	else
1321 		ret = __copy_from_user(vaddr + offset, user_data, length);
1322 	if (needs_clflush_after)
1323 		shmem_clflush_swizzled_range(vaddr + offset, length,
1324 					     page_do_bit17_swizzling);
1325 	kunmap(page);
1326 
1327 	return ret ? -EFAULT : 0;
1328 }
1329 
1330 /* Per-page copy function for the shmem pwrite fastpath.
1331  * Flushes invalid cachelines before writing to the target if
1332  * needs_clflush_before is set and flushes out any written cachelines after
1333  * writing if needs_clflush is set.
1334  */
1335 static int
1336 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1337 	     bool page_do_bit17_swizzling,
1338 	     bool needs_clflush_before,
1339 	     bool needs_clflush_after)
1340 {
1341 	int ret;
1342 
1343 	ret = -ENODEV;
1344 	if (!page_do_bit17_swizzling) {
1345 		char *vaddr = kmap_atomic(page);
1346 
1347 		if (needs_clflush_before)
1348 			drm_clflush_virt_range(vaddr + offset, len);
1349 		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1350 		if (needs_clflush_after)
1351 			drm_clflush_virt_range(vaddr + offset, len);
1352 
1353 		kunmap_atomic(vaddr);
1354 	}
1355 	if (ret == 0)
1356 		return ret;
1357 
1358 	return shmem_pwrite_slow(page, offset, len, user_data,
1359 				 page_do_bit17_swizzling,
1360 				 needs_clflush_before,
1361 				 needs_clflush_after);
1362 }
1363 
1364 static int
1365 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1366 		      const struct drm_i915_gem_pwrite *args)
1367 {
1368 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1369 	void __user *user_data;
1370 	u64 remain;
1371 	unsigned int obj_do_bit17_swizzling;
1372 	unsigned int partial_cacheline_write;
1373 	unsigned int needs_clflush;
1374 	unsigned int offset, idx;
1375 	int ret;
1376 
1377 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1378 	if (ret)
1379 		return ret;
1380 
1381 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1382 	mutex_unlock(&i915->drm.struct_mutex);
1383 	if (ret)
1384 		return ret;
1385 
1386 	obj_do_bit17_swizzling = 0;
1387 	if (i915_gem_object_needs_bit17_swizzle(obj))
1388 		obj_do_bit17_swizzling = BIT(17);
1389 
1390 	/* If we don't overwrite a cacheline completely we need to be
1391 	 * careful to have up-to-date data by first clflushing. Don't
1392 	 * overcomplicate things and flush the entire patch.
1393 	 */
1394 	partial_cacheline_write = 0;
1395 	if (needs_clflush & CLFLUSH_BEFORE)
1396 		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1397 
1398 	user_data = u64_to_user_ptr(args->data_ptr);
1399 	remain = args->size;
1400 	offset = offset_in_page(args->offset);
1401 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1402 		struct page *page = i915_gem_object_get_page(obj, idx);
1403 		int length;
1404 
1405 		length = remain;
1406 		if (offset + length > PAGE_SIZE)
1407 			length = PAGE_SIZE - offset;
1408 
1409 		ret = shmem_pwrite(page, offset, length, user_data,
1410 				   page_to_phys(page) & obj_do_bit17_swizzling,
1411 				   (offset | length) & partial_cacheline_write,
1412 				   needs_clflush & CLFLUSH_AFTER);
1413 		if (ret)
1414 			break;
1415 
1416 		remain -= length;
1417 		user_data += length;
1418 		offset = 0;
1419 	}
1420 
1421 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1422 	i915_gem_obj_finish_shmem_access(obj);
1423 	return ret;
1424 }
1425 
1426 /**
1427  * Writes data to the object referenced by handle.
1428  * @dev: drm device
1429  * @data: ioctl data blob
1430  * @file: drm file
1431  *
1432  * On error, the contents of the buffer that were to be modified are undefined.
1433  */
1434 int
1435 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1436 		      struct drm_file *file)
1437 {
1438 	struct drm_i915_gem_pwrite *args = data;
1439 	struct drm_i915_gem_object *obj;
1440 	int ret;
1441 
1442 	if (args->size == 0)
1443 		return 0;
1444 
1445 	if (!access_ok(VERIFY_READ,
1446 		       u64_to_user_ptr(args->data_ptr),
1447 		       args->size))
1448 		return -EFAULT;
1449 
1450 	obj = i915_gem_object_lookup(file, args->handle);
1451 	if (!obj)
1452 		return -ENOENT;
1453 
1454 	/* Bounds check destination. */
1455 	if (args->offset > obj->base.size ||
1456 	    args->size > obj->base.size - args->offset) {
1457 		ret = -EINVAL;
1458 		goto err;
1459 	}
1460 
1461 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1462 
1463 	ret = i915_gem_object_wait(obj,
1464 				   I915_WAIT_INTERRUPTIBLE |
1465 				   I915_WAIT_ALL,
1466 				   MAX_SCHEDULE_TIMEOUT,
1467 				   to_rps_client(file));
1468 	if (ret)
1469 		goto err;
1470 
1471 	ret = i915_gem_object_pin_pages(obj);
1472 	if (ret)
1473 		goto err;
1474 
1475 	ret = -EFAULT;
1476 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1477 	 * it would end up going through the fenced access, and we'll get
1478 	 * different detiling behavior between reading and writing.
1479 	 * pread/pwrite currently are reading and writing from the CPU
1480 	 * perspective, requiring manual detiling by the client.
1481 	 */
1482 	if (!i915_gem_object_has_struct_page(obj) ||
1483 	    cpu_write_needs_clflush(obj))
1484 		/* Note that the gtt paths might fail with non-page-backed user
1485 		 * pointers (e.g. gtt mappings when moving data between
1486 		 * textures). Fallback to the shmem path in that case.
1487 		 */
1488 		ret = i915_gem_gtt_pwrite_fast(obj, args);
1489 
1490 	if (ret == -EFAULT || ret == -ENOSPC) {
1491 		if (obj->phys_handle)
1492 			ret = i915_gem_phys_pwrite(obj, args, file);
1493 		else
1494 			ret = i915_gem_shmem_pwrite(obj, args);
1495 	}
1496 
1497 	i915_gem_object_unpin_pages(obj);
1498 err:
1499 	i915_gem_object_put(obj);
1500 	return ret;
1501 }
1502 
1503 static inline enum fb_op_origin
1504 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1505 {
1506 	return (domain == I915_GEM_DOMAIN_GTT ?
1507 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1508 }
1509 
1510 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1511 {
1512 	struct drm_i915_private *i915;
1513 	struct list_head *list;
1514 	struct i915_vma *vma;
1515 
1516 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
1517 		if (!i915_vma_is_ggtt(vma))
1518 			continue;
1519 
1520 		if (i915_vma_is_active(vma))
1521 			continue;
1522 
1523 		if (!drm_mm_node_allocated(&vma->node))
1524 			continue;
1525 
1526 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1527 	}
1528 
1529 	i915 = to_i915(obj->base.dev);
1530 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1531 	list_move_tail(&obj->global_link, list);
1532 }
1533 
1534 /**
1535  * Called when user space prepares to use an object with the CPU, either
1536  * through the mmap ioctl's mapping or a GTT mapping.
1537  * @dev: drm device
1538  * @data: ioctl data blob
1539  * @file: drm file
1540  */
1541 int
1542 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1543 			  struct drm_file *file)
1544 {
1545 	struct drm_i915_gem_set_domain *args = data;
1546 	struct drm_i915_gem_object *obj;
1547 	uint32_t read_domains = args->read_domains;
1548 	uint32_t write_domain = args->write_domain;
1549 	int err;
1550 
1551 	/* Only handle setting domains to types used by the CPU. */
1552 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1553 		return -EINVAL;
1554 
1555 	/* Having something in the write domain implies it's in the read
1556 	 * domain, and only that read domain.  Enforce that in the request.
1557 	 */
1558 	if (write_domain != 0 && read_domains != write_domain)
1559 		return -EINVAL;
1560 
1561 	obj = i915_gem_object_lookup(file, args->handle);
1562 	if (!obj)
1563 		return -ENOENT;
1564 
1565 	/* Try to flush the object off the GPU without holding the lock.
1566 	 * We will repeat the flush holding the lock in the normal manner
1567 	 * to catch cases where we are gazumped.
1568 	 */
1569 	err = i915_gem_object_wait(obj,
1570 				   I915_WAIT_INTERRUPTIBLE |
1571 				   (write_domain ? I915_WAIT_ALL : 0),
1572 				   MAX_SCHEDULE_TIMEOUT,
1573 				   to_rps_client(file));
1574 	if (err)
1575 		goto out;
1576 
1577 	/* Flush and acquire obj->pages so that we are coherent through
1578 	 * direct access in memory with previous cached writes through
1579 	 * shmemfs and that our cache domain tracking remains valid.
1580 	 * For example, if the obj->filp was moved to swap without us
1581 	 * being notified and releasing the pages, we would mistakenly
1582 	 * continue to assume that the obj remained out of the CPU cached
1583 	 * domain.
1584 	 */
1585 	err = i915_gem_object_pin_pages(obj);
1586 	if (err)
1587 		goto out;
1588 
1589 	err = i915_mutex_lock_interruptible(dev);
1590 	if (err)
1591 		goto out_unpin;
1592 
1593 	if (read_domains & I915_GEM_DOMAIN_GTT)
1594 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1595 	else
1596 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1597 
1598 	/* And bump the LRU for this access */
1599 	i915_gem_object_bump_inactive_ggtt(obj);
1600 
1601 	mutex_unlock(&dev->struct_mutex);
1602 
1603 	if (write_domain != 0)
1604 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1605 
1606 out_unpin:
1607 	i915_gem_object_unpin_pages(obj);
1608 out:
1609 	i915_gem_object_put(obj);
1610 	return err;
1611 }
1612 
1613 /**
1614  * Called when user space has done writes to this buffer
1615  * @dev: drm device
1616  * @data: ioctl data blob
1617  * @file: drm file
1618  */
1619 int
1620 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1621 			 struct drm_file *file)
1622 {
1623 	struct drm_i915_gem_sw_finish *args = data;
1624 	struct drm_i915_gem_object *obj;
1625 	int err = 0;
1626 
1627 	obj = i915_gem_object_lookup(file, args->handle);
1628 	if (!obj)
1629 		return -ENOENT;
1630 
1631 	/* Pinned buffers may be scanout, so flush the cache */
1632 	if (READ_ONCE(obj->pin_display)) {
1633 		err = i915_mutex_lock_interruptible(dev);
1634 		if (!err) {
1635 			i915_gem_object_flush_cpu_write_domain(obj);
1636 			mutex_unlock(&dev->struct_mutex);
1637 		}
1638 	}
1639 
1640 	i915_gem_object_put(obj);
1641 	return err;
1642 }
1643 
1644 /**
1645  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1646  *			 it is mapped to.
1647  * @dev: drm device
1648  * @data: ioctl data blob
1649  * @file: drm file
1650  *
1651  * While the mapping holds a reference on the contents of the object, it doesn't
1652  * imply a ref on the object itself.
1653  *
1654  * IMPORTANT:
1655  *
1656  * DRM driver writers who look a this function as an example for how to do GEM
1657  * mmap support, please don't implement mmap support like here. The modern way
1658  * to implement DRM mmap support is with an mmap offset ioctl (like
1659  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1660  * That way debug tooling like valgrind will understand what's going on, hiding
1661  * the mmap call in a driver private ioctl will break that. The i915 driver only
1662  * does cpu mmaps this way because we didn't know better.
1663  */
1664 int
1665 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1666 		    struct drm_file *file)
1667 {
1668 	struct drm_i915_gem_mmap *args = data;
1669 	struct drm_i915_gem_object *obj;
1670 	unsigned long addr;
1671 
1672 	if (args->flags & ~(I915_MMAP_WC))
1673 		return -EINVAL;
1674 
1675 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1676 		return -ENODEV;
1677 
1678 	obj = i915_gem_object_lookup(file, args->handle);
1679 	if (!obj)
1680 		return -ENOENT;
1681 
1682 	/* prime objects have no backing filp to GEM mmap
1683 	 * pages from.
1684 	 */
1685 	if (!obj->base.filp) {
1686 		i915_gem_object_put(obj);
1687 		return -EINVAL;
1688 	}
1689 
1690 	addr = vm_mmap(obj->base.filp, 0, args->size,
1691 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1692 		       args->offset);
1693 	if (args->flags & I915_MMAP_WC) {
1694 		struct mm_struct *mm = current->mm;
1695 		struct vm_area_struct *vma;
1696 
1697 		if (down_write_killable(&mm->mmap_sem)) {
1698 			i915_gem_object_put(obj);
1699 			return -EINTR;
1700 		}
1701 		vma = find_vma(mm, addr);
1702 		if (vma)
1703 			vma->vm_page_prot =
1704 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1705 		else
1706 			addr = -ENOMEM;
1707 		up_write(&mm->mmap_sem);
1708 
1709 		/* This may race, but that's ok, it only gets set */
1710 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1711 	}
1712 	i915_gem_object_put(obj);
1713 	if (IS_ERR((void *)addr))
1714 		return addr;
1715 
1716 	args->addr_ptr = (uint64_t) addr;
1717 
1718 	return 0;
1719 }
1720 
1721 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1722 {
1723 	u64 size;
1724 
1725 	size = i915_gem_object_get_stride(obj);
1726 	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1727 
1728 	return size >> PAGE_SHIFT;
1729 }
1730 
1731 /**
1732  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1733  *
1734  * A history of the GTT mmap interface:
1735  *
1736  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1737  *     aligned and suitable for fencing, and still fit into the available
1738  *     mappable space left by the pinned display objects. A classic problem
1739  *     we called the page-fault-of-doom where we would ping-pong between
1740  *     two objects that could not fit inside the GTT and so the memcpy
1741  *     would page one object in at the expense of the other between every
1742  *     single byte.
1743  *
1744  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1745  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1746  *     object is too large for the available space (or simply too large
1747  *     for the mappable aperture!), a view is created instead and faulted
1748  *     into userspace. (This view is aligned and sized appropriately for
1749  *     fenced access.)
1750  *
1751  * Restrictions:
1752  *
1753  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1754  *    hangs on some architectures, corruption on others. An attempt to service
1755  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1756  *
1757  *  * the object must be able to fit into RAM (physical memory, though no
1758  *    limited to the mappable aperture).
1759  *
1760  *
1761  * Caveats:
1762  *
1763  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1764  *    all data to system memory. Subsequent access will not be synchronized.
1765  *
1766  *  * all mappings are revoked on runtime device suspend.
1767  *
1768  *  * there are only 8, 16 or 32 fence registers to share between all users
1769  *    (older machines require fence register for display and blitter access
1770  *    as well). Contention of the fence registers will cause the previous users
1771  *    to be unmapped and any new access will generate new page faults.
1772  *
1773  *  * running out of memory while servicing a fault may generate a SIGBUS,
1774  *    rather than the expected SIGSEGV.
1775  */
1776 int i915_gem_mmap_gtt_version(void)
1777 {
1778 	return 1;
1779 }
1780 
1781 /**
1782  * i915_gem_fault - fault a page into the GTT
1783  * @area: CPU VMA in question
1784  * @vmf: fault info
1785  *
1786  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1787  * from userspace.  The fault handler takes care of binding the object to
1788  * the GTT (if needed), allocating and programming a fence register (again,
1789  * only if needed based on whether the old reg is still valid or the object
1790  * is tiled) and inserting a new PTE into the faulting process.
1791  *
1792  * Note that the faulting process may involve evicting existing objects
1793  * from the GTT and/or fence registers to make room.  So performance may
1794  * suffer if the GTT working set is large or there are few fence registers
1795  * left.
1796  *
1797  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1798  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1799  */
1800 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1801 {
1802 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1803 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1804 	struct drm_device *dev = obj->base.dev;
1805 	struct drm_i915_private *dev_priv = to_i915(dev);
1806 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1807 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1808 	struct i915_vma *vma;
1809 	pgoff_t page_offset;
1810 	unsigned int flags;
1811 	int ret;
1812 
1813 	/* We don't use vmf->pgoff since that has the fake offset */
1814 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1815 
1816 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1817 
1818 	/* Try to flush the object off the GPU first without holding the lock.
1819 	 * Upon acquiring the lock, we will perform our sanity checks and then
1820 	 * repeat the flush holding the lock in the normal manner to catch cases
1821 	 * where we are gazumped.
1822 	 */
1823 	ret = i915_gem_object_wait(obj,
1824 				   I915_WAIT_INTERRUPTIBLE,
1825 				   MAX_SCHEDULE_TIMEOUT,
1826 				   NULL);
1827 	if (ret)
1828 		goto err;
1829 
1830 	ret = i915_gem_object_pin_pages(obj);
1831 	if (ret)
1832 		goto err;
1833 
1834 	intel_runtime_pm_get(dev_priv);
1835 
1836 	ret = i915_mutex_lock_interruptible(dev);
1837 	if (ret)
1838 		goto err_rpm;
1839 
1840 	/* Access to snoopable pages through the GTT is incoherent. */
1841 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1842 		ret = -EFAULT;
1843 		goto err_unlock;
1844 	}
1845 
1846 	/* If the object is smaller than a couple of partial vma, it is
1847 	 * not worth only creating a single partial vma - we may as well
1848 	 * clear enough space for the full object.
1849 	 */
1850 	flags = PIN_MAPPABLE;
1851 	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1852 		flags |= PIN_NONBLOCK | PIN_NONFAULT;
1853 
1854 	/* Now pin it into the GTT as needed */
1855 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1856 	if (IS_ERR(vma)) {
1857 		struct i915_ggtt_view view;
1858 		unsigned int chunk_size;
1859 
1860 		/* Use a partial view if it is bigger than available space */
1861 		chunk_size = MIN_CHUNK_PAGES;
1862 		if (i915_gem_object_is_tiled(obj))
1863 			chunk_size = roundup(chunk_size, tile_row_pages(obj));
1864 
1865 		memset(&view, 0, sizeof(view));
1866 		view.type = I915_GGTT_VIEW_PARTIAL;
1867 		view.params.partial.offset = rounddown(page_offset, chunk_size);
1868 		view.params.partial.size =
1869 			min_t(unsigned int, chunk_size,
1870 			      vma_pages(area) - view.params.partial.offset);
1871 
1872 		/* If the partial covers the entire object, just create a
1873 		 * normal VMA.
1874 		 */
1875 		if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1876 			view.type = I915_GGTT_VIEW_NORMAL;
1877 
1878 		/* Userspace is now writing through an untracked VMA, abandon
1879 		 * all hope that the hardware is able to track future writes.
1880 		 */
1881 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1882 
1883 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1884 	}
1885 	if (IS_ERR(vma)) {
1886 		ret = PTR_ERR(vma);
1887 		goto err_unlock;
1888 	}
1889 
1890 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1891 	if (ret)
1892 		goto err_unpin;
1893 
1894 	ret = i915_vma_get_fence(vma);
1895 	if (ret)
1896 		goto err_unpin;
1897 
1898 	/* Mark as being mmapped into userspace for later revocation */
1899 	assert_rpm_wakelock_held(dev_priv);
1900 	if (list_empty(&obj->userfault_link))
1901 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1902 
1903 	/* Finally, remap it using the new GTT offset */
1904 	ret = remap_io_mapping(area,
1905 			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1906 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1907 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
1908 			       &ggtt->mappable);
1909 
1910 err_unpin:
1911 	__i915_vma_unpin(vma);
1912 err_unlock:
1913 	mutex_unlock(&dev->struct_mutex);
1914 err_rpm:
1915 	intel_runtime_pm_put(dev_priv);
1916 	i915_gem_object_unpin_pages(obj);
1917 err:
1918 	switch (ret) {
1919 	case -EIO:
1920 		/*
1921 		 * We eat errors when the gpu is terminally wedged to avoid
1922 		 * userspace unduly crashing (gl has no provisions for mmaps to
1923 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1924 		 * and so needs to be reported.
1925 		 */
1926 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1927 			ret = VM_FAULT_SIGBUS;
1928 			break;
1929 		}
1930 	case -EAGAIN:
1931 		/*
1932 		 * EAGAIN means the gpu is hung and we'll wait for the error
1933 		 * handler to reset everything when re-faulting in
1934 		 * i915_mutex_lock_interruptible.
1935 		 */
1936 	case 0:
1937 	case -ERESTARTSYS:
1938 	case -EINTR:
1939 	case -EBUSY:
1940 		/*
1941 		 * EBUSY is ok: this just means that another thread
1942 		 * already did the job.
1943 		 */
1944 		ret = VM_FAULT_NOPAGE;
1945 		break;
1946 	case -ENOMEM:
1947 		ret = VM_FAULT_OOM;
1948 		break;
1949 	case -ENOSPC:
1950 	case -EFAULT:
1951 		ret = VM_FAULT_SIGBUS;
1952 		break;
1953 	default:
1954 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1955 		ret = VM_FAULT_SIGBUS;
1956 		break;
1957 	}
1958 	return ret;
1959 }
1960 
1961 /**
1962  * i915_gem_release_mmap - remove physical page mappings
1963  * @obj: obj in question
1964  *
1965  * Preserve the reservation of the mmapping with the DRM core code, but
1966  * relinquish ownership of the pages back to the system.
1967  *
1968  * It is vital that we remove the page mapping if we have mapped a tiled
1969  * object through the GTT and then lose the fence register due to
1970  * resource pressure. Similarly if the object has been moved out of the
1971  * aperture, than pages mapped into userspace must be revoked. Removing the
1972  * mapping will then trigger a page fault on the next user access, allowing
1973  * fixup by i915_gem_fault().
1974  */
1975 void
1976 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1977 {
1978 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1979 
1980 	/* Serialisation between user GTT access and our code depends upon
1981 	 * revoking the CPU's PTE whilst the mutex is held. The next user
1982 	 * pagefault then has to wait until we release the mutex.
1983 	 *
1984 	 * Note that RPM complicates somewhat by adding an additional
1985 	 * requirement that operations to the GGTT be made holding the RPM
1986 	 * wakeref.
1987 	 */
1988 	lockdep_assert_held(&i915->drm.struct_mutex);
1989 	intel_runtime_pm_get(i915);
1990 
1991 	if (list_empty(&obj->userfault_link))
1992 		goto out;
1993 
1994 	list_del_init(&obj->userfault_link);
1995 	drm_vma_node_unmap(&obj->base.vma_node,
1996 			   obj->base.dev->anon_inode->i_mapping);
1997 
1998 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
1999 	 * memory transactions from userspace before we return. The TLB
2000 	 * flushing implied above by changing the PTE above *should* be
2001 	 * sufficient, an extra barrier here just provides us with a bit
2002 	 * of paranoid documentation about our requirement to serialise
2003 	 * memory writes before touching registers / GSM.
2004 	 */
2005 	wmb();
2006 
2007 out:
2008 	intel_runtime_pm_put(i915);
2009 }
2010 
2011 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2012 {
2013 	struct drm_i915_gem_object *obj, *on;
2014 	int i;
2015 
2016 	/*
2017 	 * Only called during RPM suspend. All users of the userfault_list
2018 	 * must be holding an RPM wakeref to ensure that this can not
2019 	 * run concurrently with themselves (and use the struct_mutex for
2020 	 * protection between themselves).
2021 	 */
2022 
2023 	list_for_each_entry_safe(obj, on,
2024 				 &dev_priv->mm.userfault_list, userfault_link) {
2025 		list_del_init(&obj->userfault_link);
2026 		drm_vma_node_unmap(&obj->base.vma_node,
2027 				   obj->base.dev->anon_inode->i_mapping);
2028 	}
2029 
2030 	/* The fence will be lost when the device powers down. If any were
2031 	 * in use by hardware (i.e. they are pinned), we should not be powering
2032 	 * down! All other fences will be reacquired by the user upon waking.
2033 	 */
2034 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
2035 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2036 
2037 		if (WARN_ON(reg->pin_count))
2038 			continue;
2039 
2040 		if (!reg->vma)
2041 			continue;
2042 
2043 		GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
2044 		reg->dirty = true;
2045 	}
2046 }
2047 
2048 /**
2049  * i915_gem_get_ggtt_size - return required global GTT size for an object
2050  * @dev_priv: i915 device
2051  * @size: object size
2052  * @tiling_mode: tiling mode
2053  *
2054  * Return the required global GTT size for an object, taking into account
2055  * potential fence register mapping.
2056  */
2057 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
2058 			   u64 size, int tiling_mode)
2059 {
2060 	u64 ggtt_size;
2061 
2062 	GEM_BUG_ON(size == 0);
2063 
2064 	if (INTEL_GEN(dev_priv) >= 4 ||
2065 	    tiling_mode == I915_TILING_NONE)
2066 		return size;
2067 
2068 	/* Previous chips need a power-of-two fence region when tiling */
2069 	if (IS_GEN3(dev_priv))
2070 		ggtt_size = 1024*1024;
2071 	else
2072 		ggtt_size = 512*1024;
2073 
2074 	while (ggtt_size < size)
2075 		ggtt_size <<= 1;
2076 
2077 	return ggtt_size;
2078 }
2079 
2080 /**
2081  * i915_gem_get_ggtt_alignment - return required global GTT alignment
2082  * @dev_priv: i915 device
2083  * @size: object size
2084  * @tiling_mode: tiling mode
2085  * @fenced: is fenced alignment required or not
2086  *
2087  * Return the required global GTT alignment for an object, taking into account
2088  * potential fence register mapping.
2089  */
2090 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2091 				int tiling_mode, bool fenced)
2092 {
2093 	GEM_BUG_ON(size == 0);
2094 
2095 	/*
2096 	 * Minimum alignment is 4k (GTT page size), but might be greater
2097 	 * if a fence register is needed for the object.
2098 	 */
2099 	if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2100 	    tiling_mode == I915_TILING_NONE)
2101 		return 4096;
2102 
2103 	/*
2104 	 * Previous chips need to be aligned to the size of the smallest
2105 	 * fence register that can contain the object.
2106 	 */
2107 	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2108 }
2109 
2110 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2111 {
2112 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2113 	int err;
2114 
2115 	err = drm_gem_create_mmap_offset(&obj->base);
2116 	if (!err)
2117 		return 0;
2118 
2119 	/* We can idle the GPU locklessly to flush stale objects, but in order
2120 	 * to claim that space for ourselves, we need to take the big
2121 	 * struct_mutex to free the requests+objects and allocate our slot.
2122 	 */
2123 	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2124 	if (err)
2125 		return err;
2126 
2127 	err = i915_mutex_lock_interruptible(&dev_priv->drm);
2128 	if (!err) {
2129 		i915_gem_retire_requests(dev_priv);
2130 		err = drm_gem_create_mmap_offset(&obj->base);
2131 		mutex_unlock(&dev_priv->drm.struct_mutex);
2132 	}
2133 
2134 	return err;
2135 }
2136 
2137 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2138 {
2139 	drm_gem_free_mmap_offset(&obj->base);
2140 }
2141 
2142 int
2143 i915_gem_mmap_gtt(struct drm_file *file,
2144 		  struct drm_device *dev,
2145 		  uint32_t handle,
2146 		  uint64_t *offset)
2147 {
2148 	struct drm_i915_gem_object *obj;
2149 	int ret;
2150 
2151 	obj = i915_gem_object_lookup(file, handle);
2152 	if (!obj)
2153 		return -ENOENT;
2154 
2155 	ret = i915_gem_object_create_mmap_offset(obj);
2156 	if (ret == 0)
2157 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2158 
2159 	i915_gem_object_put(obj);
2160 	return ret;
2161 }
2162 
2163 /**
2164  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2165  * @dev: DRM device
2166  * @data: GTT mapping ioctl data
2167  * @file: GEM object info
2168  *
2169  * Simply returns the fake offset to userspace so it can mmap it.
2170  * The mmap call will end up in drm_gem_mmap(), which will set things
2171  * up so we can get faults in the handler above.
2172  *
2173  * The fault handler will take care of binding the object into the GTT
2174  * (since it may have been evicted to make room for something), allocating
2175  * a fence register, and mapping the appropriate aperture address into
2176  * userspace.
2177  */
2178 int
2179 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2180 			struct drm_file *file)
2181 {
2182 	struct drm_i915_gem_mmap_gtt *args = data;
2183 
2184 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2185 }
2186 
2187 /* Immediately discard the backing storage */
2188 static void
2189 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2190 {
2191 	i915_gem_object_free_mmap_offset(obj);
2192 
2193 	if (obj->base.filp == NULL)
2194 		return;
2195 
2196 	/* Our goal here is to return as much of the memory as
2197 	 * is possible back to the system as we are called from OOM.
2198 	 * To do this we must instruct the shmfs to drop all of its
2199 	 * backing pages, *now*.
2200 	 */
2201 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2202 	obj->mm.madv = __I915_MADV_PURGED;
2203 }
2204 
2205 /* Try to discard unwanted pages */
2206 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2207 {
2208 	struct address_space *mapping;
2209 
2210 	lockdep_assert_held(&obj->mm.lock);
2211 	GEM_BUG_ON(obj->mm.pages);
2212 
2213 	switch (obj->mm.madv) {
2214 	case I915_MADV_DONTNEED:
2215 		i915_gem_object_truncate(obj);
2216 	case __I915_MADV_PURGED:
2217 		return;
2218 	}
2219 
2220 	if (obj->base.filp == NULL)
2221 		return;
2222 
2223 	mapping = obj->base.filp->f_mapping,
2224 	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2225 }
2226 
2227 static void
2228 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2229 			      struct sg_table *pages)
2230 {
2231 	struct sgt_iter sgt_iter;
2232 	struct page *page;
2233 
2234 	__i915_gem_object_release_shmem(obj, pages);
2235 
2236 	i915_gem_gtt_finish_pages(obj, pages);
2237 
2238 	if (i915_gem_object_needs_bit17_swizzle(obj))
2239 		i915_gem_object_save_bit_17_swizzle(obj, pages);
2240 
2241 	for_each_sgt_page(page, sgt_iter, pages) {
2242 		if (obj->mm.dirty)
2243 			set_page_dirty(page);
2244 
2245 		if (obj->mm.madv == I915_MADV_WILLNEED)
2246 			mark_page_accessed(page);
2247 
2248 		put_page(page);
2249 	}
2250 	obj->mm.dirty = false;
2251 
2252 	sg_free_table(pages);
2253 	kfree(pages);
2254 }
2255 
2256 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2257 {
2258 	struct radix_tree_iter iter;
2259 	void **slot;
2260 
2261 	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2262 		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2263 }
2264 
2265 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2266 				 enum i915_mm_subclass subclass)
2267 {
2268 	struct sg_table *pages;
2269 
2270 	if (i915_gem_object_has_pinned_pages(obj))
2271 		return;
2272 
2273 	GEM_BUG_ON(obj->bind_count);
2274 	if (!READ_ONCE(obj->mm.pages))
2275 		return;
2276 
2277 	/* May be called by shrinker from within get_pages() (on another bo) */
2278 	mutex_lock_nested(&obj->mm.lock, subclass);
2279 	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2280 		goto unlock;
2281 
2282 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2283 	 * array, hence protect them from being reaped by removing them from gtt
2284 	 * lists early. */
2285 	pages = fetch_and_zero(&obj->mm.pages);
2286 	GEM_BUG_ON(!pages);
2287 
2288 	if (obj->mm.mapping) {
2289 		void *ptr;
2290 
2291 		ptr = ptr_mask_bits(obj->mm.mapping);
2292 		if (is_vmalloc_addr(ptr))
2293 			vunmap(ptr);
2294 		else
2295 			kunmap(kmap_to_page(ptr));
2296 
2297 		obj->mm.mapping = NULL;
2298 	}
2299 
2300 	__i915_gem_object_reset_page_iter(obj);
2301 
2302 	obj->ops->put_pages(obj, pages);
2303 unlock:
2304 	mutex_unlock(&obj->mm.lock);
2305 }
2306 
2307 static unsigned int swiotlb_max_size(void)
2308 {
2309 #if IS_ENABLED(CONFIG_SWIOTLB)
2310 	return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2311 #else
2312 	return 0;
2313 #endif
2314 }
2315 
2316 static void i915_sg_trim(struct sg_table *orig_st)
2317 {
2318 	struct sg_table new_st;
2319 	struct scatterlist *sg, *new_sg;
2320 	unsigned int i;
2321 
2322 	if (orig_st->nents == orig_st->orig_nents)
2323 		return;
2324 
2325 	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL))
2326 		return;
2327 
2328 	new_sg = new_st.sgl;
2329 	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2330 		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2331 		/* called before being DMA mapped, no need to copy sg->dma_* */
2332 		new_sg = sg_next(new_sg);
2333 	}
2334 
2335 	sg_free_table(orig_st);
2336 
2337 	*orig_st = new_st;
2338 }
2339 
2340 static struct sg_table *
2341 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2342 {
2343 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2344 	const unsigned long page_count = obj->base.size / PAGE_SIZE;
2345 	unsigned long i;
2346 	struct address_space *mapping;
2347 	struct sg_table *st;
2348 	struct scatterlist *sg;
2349 	struct sgt_iter sgt_iter;
2350 	struct page *page;
2351 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2352 	unsigned int max_segment;
2353 	int ret;
2354 	gfp_t gfp;
2355 
2356 	/* Assert that the object is not currently in any GPU domain. As it
2357 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2358 	 * a GPU cache
2359 	 */
2360 	GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2361 	GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2362 
2363 	max_segment = swiotlb_max_size();
2364 	if (!max_segment)
2365 		max_segment = rounddown(UINT_MAX, PAGE_SIZE);
2366 
2367 	st = kmalloc(sizeof(*st), GFP_KERNEL);
2368 	if (st == NULL)
2369 		return ERR_PTR(-ENOMEM);
2370 
2371 rebuild_st:
2372 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2373 		kfree(st);
2374 		return ERR_PTR(-ENOMEM);
2375 	}
2376 
2377 	/* Get the list of pages out of our struct file.  They'll be pinned
2378 	 * at this point until we release them.
2379 	 *
2380 	 * Fail silently without starting the shrinker
2381 	 */
2382 	mapping = obj->base.filp->f_mapping;
2383 	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2384 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
2385 	sg = st->sgl;
2386 	st->nents = 0;
2387 	for (i = 0; i < page_count; i++) {
2388 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2389 		if (IS_ERR(page)) {
2390 			i915_gem_shrink(dev_priv,
2391 					page_count,
2392 					I915_SHRINK_BOUND |
2393 					I915_SHRINK_UNBOUND |
2394 					I915_SHRINK_PURGEABLE);
2395 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2396 		}
2397 		if (IS_ERR(page)) {
2398 			/* We've tried hard to allocate the memory by reaping
2399 			 * our own buffer, now let the real VM do its job and
2400 			 * go down in flames if truly OOM.
2401 			 */
2402 			page = shmem_read_mapping_page(mapping, i);
2403 			if (IS_ERR(page)) {
2404 				ret = PTR_ERR(page);
2405 				goto err_sg;
2406 			}
2407 		}
2408 		if (!i ||
2409 		    sg->length >= max_segment ||
2410 		    page_to_pfn(page) != last_pfn + 1) {
2411 			if (i)
2412 				sg = sg_next(sg);
2413 			st->nents++;
2414 			sg_set_page(sg, page, PAGE_SIZE, 0);
2415 		} else {
2416 			sg->length += PAGE_SIZE;
2417 		}
2418 		last_pfn = page_to_pfn(page);
2419 
2420 		/* Check that the i965g/gm workaround works. */
2421 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2422 	}
2423 	if (sg) /* loop terminated early; short sg table */
2424 		sg_mark_end(sg);
2425 
2426 	/* Trim unused sg entries to avoid wasting memory. */
2427 	i915_sg_trim(st);
2428 
2429 	ret = i915_gem_gtt_prepare_pages(obj, st);
2430 	if (ret) {
2431 		/* DMA remapping failed? One possible cause is that
2432 		 * it could not reserve enough large entries, asking
2433 		 * for PAGE_SIZE chunks instead may be helpful.
2434 		 */
2435 		if (max_segment > PAGE_SIZE) {
2436 			for_each_sgt_page(page, sgt_iter, st)
2437 				put_page(page);
2438 			sg_free_table(st);
2439 
2440 			max_segment = PAGE_SIZE;
2441 			goto rebuild_st;
2442 		} else {
2443 			dev_warn(&dev_priv->drm.pdev->dev,
2444 				 "Failed to DMA remap %lu pages\n",
2445 				 page_count);
2446 			goto err_pages;
2447 		}
2448 	}
2449 
2450 	if (i915_gem_object_needs_bit17_swizzle(obj))
2451 		i915_gem_object_do_bit_17_swizzle(obj, st);
2452 
2453 	return st;
2454 
2455 err_sg:
2456 	sg_mark_end(sg);
2457 err_pages:
2458 	for_each_sgt_page(page, sgt_iter, st)
2459 		put_page(page);
2460 	sg_free_table(st);
2461 	kfree(st);
2462 
2463 	/* shmemfs first checks if there is enough memory to allocate the page
2464 	 * and reports ENOSPC should there be insufficient, along with the usual
2465 	 * ENOMEM for a genuine allocation failure.
2466 	 *
2467 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2468 	 * space and so want to translate the error from shmemfs back to our
2469 	 * usual understanding of ENOMEM.
2470 	 */
2471 	if (ret == -ENOSPC)
2472 		ret = -ENOMEM;
2473 
2474 	return ERR_PTR(ret);
2475 }
2476 
2477 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2478 				 struct sg_table *pages)
2479 {
2480 	lockdep_assert_held(&obj->mm.lock);
2481 
2482 	obj->mm.get_page.sg_pos = pages->sgl;
2483 	obj->mm.get_page.sg_idx = 0;
2484 
2485 	obj->mm.pages = pages;
2486 
2487 	if (i915_gem_object_is_tiled(obj) &&
2488 	    to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2489 		GEM_BUG_ON(obj->mm.quirked);
2490 		__i915_gem_object_pin_pages(obj);
2491 		obj->mm.quirked = true;
2492 	}
2493 }
2494 
2495 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2496 {
2497 	struct sg_table *pages;
2498 
2499 	GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2500 
2501 	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2502 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2503 		return -EFAULT;
2504 	}
2505 
2506 	pages = obj->ops->get_pages(obj);
2507 	if (unlikely(IS_ERR(pages)))
2508 		return PTR_ERR(pages);
2509 
2510 	__i915_gem_object_set_pages(obj, pages);
2511 	return 0;
2512 }
2513 
2514 /* Ensure that the associated pages are gathered from the backing storage
2515  * and pinned into our object. i915_gem_object_pin_pages() may be called
2516  * multiple times before they are released by a single call to
2517  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2518  * either as a result of memory pressure (reaping pages under the shrinker)
2519  * or as the object is itself released.
2520  */
2521 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2522 {
2523 	int err;
2524 
2525 	err = mutex_lock_interruptible(&obj->mm.lock);
2526 	if (err)
2527 		return err;
2528 
2529 	if (unlikely(!obj->mm.pages)) {
2530 		err = ____i915_gem_object_get_pages(obj);
2531 		if (err)
2532 			goto unlock;
2533 
2534 		smp_mb__before_atomic();
2535 	}
2536 	atomic_inc(&obj->mm.pages_pin_count);
2537 
2538 unlock:
2539 	mutex_unlock(&obj->mm.lock);
2540 	return err;
2541 }
2542 
2543 /* The 'mapping' part of i915_gem_object_pin_map() below */
2544 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2545 				 enum i915_map_type type)
2546 {
2547 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2548 	struct sg_table *sgt = obj->mm.pages;
2549 	struct sgt_iter sgt_iter;
2550 	struct page *page;
2551 	struct page *stack_pages[32];
2552 	struct page **pages = stack_pages;
2553 	unsigned long i = 0;
2554 	pgprot_t pgprot;
2555 	void *addr;
2556 
2557 	/* A single page can always be kmapped */
2558 	if (n_pages == 1 && type == I915_MAP_WB)
2559 		return kmap(sg_page(sgt->sgl));
2560 
2561 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2562 		/* Too big for stack -- allocate temporary array instead */
2563 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2564 		if (!pages)
2565 			return NULL;
2566 	}
2567 
2568 	for_each_sgt_page(page, sgt_iter, sgt)
2569 		pages[i++] = page;
2570 
2571 	/* Check that we have the expected number of pages */
2572 	GEM_BUG_ON(i != n_pages);
2573 
2574 	switch (type) {
2575 	case I915_MAP_WB:
2576 		pgprot = PAGE_KERNEL;
2577 		break;
2578 	case I915_MAP_WC:
2579 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2580 		break;
2581 	}
2582 	addr = vmap(pages, n_pages, 0, pgprot);
2583 
2584 	if (pages != stack_pages)
2585 		drm_free_large(pages);
2586 
2587 	return addr;
2588 }
2589 
2590 /* get, pin, and map the pages of the object into kernel space */
2591 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2592 			      enum i915_map_type type)
2593 {
2594 	enum i915_map_type has_type;
2595 	bool pinned;
2596 	void *ptr;
2597 	int ret;
2598 
2599 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2600 
2601 	ret = mutex_lock_interruptible(&obj->mm.lock);
2602 	if (ret)
2603 		return ERR_PTR(ret);
2604 
2605 	pinned = true;
2606 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2607 		if (unlikely(!obj->mm.pages)) {
2608 			ret = ____i915_gem_object_get_pages(obj);
2609 			if (ret)
2610 				goto err_unlock;
2611 
2612 			smp_mb__before_atomic();
2613 		}
2614 		atomic_inc(&obj->mm.pages_pin_count);
2615 		pinned = false;
2616 	}
2617 	GEM_BUG_ON(!obj->mm.pages);
2618 
2619 	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
2620 	if (ptr && has_type != type) {
2621 		if (pinned) {
2622 			ret = -EBUSY;
2623 			goto err_unpin;
2624 		}
2625 
2626 		if (is_vmalloc_addr(ptr))
2627 			vunmap(ptr);
2628 		else
2629 			kunmap(kmap_to_page(ptr));
2630 
2631 		ptr = obj->mm.mapping = NULL;
2632 	}
2633 
2634 	if (!ptr) {
2635 		ptr = i915_gem_object_map(obj, type);
2636 		if (!ptr) {
2637 			ret = -ENOMEM;
2638 			goto err_unpin;
2639 		}
2640 
2641 		obj->mm.mapping = ptr_pack_bits(ptr, type);
2642 	}
2643 
2644 out_unlock:
2645 	mutex_unlock(&obj->mm.lock);
2646 	return ptr;
2647 
2648 err_unpin:
2649 	atomic_dec(&obj->mm.pages_pin_count);
2650 err_unlock:
2651 	ptr = ERR_PTR(ret);
2652 	goto out_unlock;
2653 }
2654 
2655 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2656 {
2657 	unsigned long elapsed;
2658 
2659 	if (ctx->hang_stats.banned)
2660 		return true;
2661 
2662 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2663 	if (ctx->hang_stats.ban_period_seconds &&
2664 	    elapsed <= ctx->hang_stats.ban_period_seconds) {
2665 		DRM_DEBUG("context hanging too fast, banning!\n");
2666 		return true;
2667 	}
2668 
2669 	return false;
2670 }
2671 
2672 static void i915_set_reset_status(struct i915_gem_context *ctx,
2673 				  const bool guilty)
2674 {
2675 	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2676 
2677 	if (guilty) {
2678 		hs->banned = i915_context_is_banned(ctx);
2679 		hs->batch_active++;
2680 		hs->guilty_ts = get_seconds();
2681 	} else {
2682 		hs->batch_pending++;
2683 	}
2684 }
2685 
2686 struct drm_i915_gem_request *
2687 i915_gem_find_active_request(struct intel_engine_cs *engine)
2688 {
2689 	struct drm_i915_gem_request *request;
2690 
2691 	/* We are called by the error capture and reset at a random
2692 	 * point in time. In particular, note that neither is crucially
2693 	 * ordered with an interrupt. After a hang, the GPU is dead and we
2694 	 * assume that no more writes can happen (we waited long enough for
2695 	 * all writes that were in transaction to be flushed) - adding an
2696 	 * extra delay for a recent interrupt is pointless. Hence, we do
2697 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2698 	 */
2699 	list_for_each_entry(request, &engine->timeline->requests, link) {
2700 		if (__i915_gem_request_completed(request))
2701 			continue;
2702 
2703 		return request;
2704 	}
2705 
2706 	return NULL;
2707 }
2708 
2709 static void reset_request(struct drm_i915_gem_request *request)
2710 {
2711 	void *vaddr = request->ring->vaddr;
2712 	u32 head;
2713 
2714 	/* As this request likely depends on state from the lost
2715 	 * context, clear out all the user operations leaving the
2716 	 * breadcrumb at the end (so we get the fence notifications).
2717 	 */
2718 	head = request->head;
2719 	if (request->postfix < head) {
2720 		memset(vaddr + head, 0, request->ring->size - head);
2721 		head = 0;
2722 	}
2723 	memset(vaddr + head, 0, request->postfix - head);
2724 }
2725 
2726 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2727 {
2728 	struct drm_i915_gem_request *request;
2729 	struct i915_gem_context *incomplete_ctx;
2730 	struct intel_timeline *timeline;
2731 	bool ring_hung;
2732 
2733 	if (engine->irq_seqno_barrier)
2734 		engine->irq_seqno_barrier(engine);
2735 
2736 	request = i915_gem_find_active_request(engine);
2737 	if (!request)
2738 		return;
2739 
2740 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2741 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2742 		ring_hung = false;
2743 
2744 	i915_set_reset_status(request->ctx, ring_hung);
2745 	if (!ring_hung)
2746 		return;
2747 
2748 	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2749 			 engine->name, request->global_seqno);
2750 
2751 	/* Setup the CS to resume from the breadcrumb of the hung request */
2752 	engine->reset_hw(engine, request);
2753 
2754 	/* Users of the default context do not rely on logical state
2755 	 * preserved between batches. They have to emit full state on
2756 	 * every batch and so it is safe to execute queued requests following
2757 	 * the hang.
2758 	 *
2759 	 * Other contexts preserve state, now corrupt. We want to skip all
2760 	 * queued requests that reference the corrupt context.
2761 	 */
2762 	incomplete_ctx = request->ctx;
2763 	if (i915_gem_context_is_default(incomplete_ctx))
2764 		return;
2765 
2766 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
2767 		if (request->ctx == incomplete_ctx)
2768 			reset_request(request);
2769 
2770 	timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
2771 	list_for_each_entry(request, &timeline->requests, link)
2772 		reset_request(request);
2773 }
2774 
2775 void i915_gem_reset(struct drm_i915_private *dev_priv)
2776 {
2777 	struct intel_engine_cs *engine;
2778 	enum intel_engine_id id;
2779 
2780 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2781 
2782 	i915_gem_retire_requests(dev_priv);
2783 
2784 	for_each_engine(engine, dev_priv, id)
2785 		i915_gem_reset_engine(engine);
2786 
2787 	i915_gem_restore_fences(dev_priv);
2788 
2789 	if (dev_priv->gt.awake) {
2790 		intel_sanitize_gt_powersave(dev_priv);
2791 		intel_enable_gt_powersave(dev_priv);
2792 		if (INTEL_GEN(dev_priv) >= 6)
2793 			gen6_rps_busy(dev_priv);
2794 	}
2795 }
2796 
2797 static void nop_submit_request(struct drm_i915_gem_request *request)
2798 {
2799 	i915_gem_request_submit(request);
2800 	intel_engine_init_global_seqno(request->engine, request->global_seqno);
2801 }
2802 
2803 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2804 {
2805 	engine->submit_request = nop_submit_request;
2806 
2807 	/* Mark all pending requests as complete so that any concurrent
2808 	 * (lockless) lookup doesn't try and wait upon the request as we
2809 	 * reset it.
2810 	 */
2811 	intel_engine_init_global_seqno(engine,
2812 				       intel_engine_last_submit(engine));
2813 
2814 	/*
2815 	 * Clear the execlists queue up before freeing the requests, as those
2816 	 * are the ones that keep the context and ringbuffer backing objects
2817 	 * pinned in place.
2818 	 */
2819 
2820 	if (i915.enable_execlists) {
2821 		unsigned long flags;
2822 
2823 		spin_lock_irqsave(&engine->timeline->lock, flags);
2824 
2825 		i915_gem_request_put(engine->execlist_port[0].request);
2826 		i915_gem_request_put(engine->execlist_port[1].request);
2827 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2828 		engine->execlist_queue = RB_ROOT;
2829 		engine->execlist_first = NULL;
2830 
2831 		spin_unlock_irqrestore(&engine->timeline->lock, flags);
2832 	}
2833 }
2834 
2835 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2836 {
2837 	struct intel_engine_cs *engine;
2838 	enum intel_engine_id id;
2839 
2840 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2841 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2842 
2843 	i915_gem_context_lost(dev_priv);
2844 	for_each_engine(engine, dev_priv, id)
2845 		i915_gem_cleanup_engine(engine);
2846 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2847 
2848 	i915_gem_retire_requests(dev_priv);
2849 }
2850 
2851 static void
2852 i915_gem_retire_work_handler(struct work_struct *work)
2853 {
2854 	struct drm_i915_private *dev_priv =
2855 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
2856 	struct drm_device *dev = &dev_priv->drm;
2857 
2858 	/* Come back later if the device is busy... */
2859 	if (mutex_trylock(&dev->struct_mutex)) {
2860 		i915_gem_retire_requests(dev_priv);
2861 		mutex_unlock(&dev->struct_mutex);
2862 	}
2863 
2864 	/* Keep the retire handler running until we are finally idle.
2865 	 * We do not need to do this test under locking as in the worst-case
2866 	 * we queue the retire worker once too often.
2867 	 */
2868 	if (READ_ONCE(dev_priv->gt.awake)) {
2869 		i915_queue_hangcheck(dev_priv);
2870 		queue_delayed_work(dev_priv->wq,
2871 				   &dev_priv->gt.retire_work,
2872 				   round_jiffies_up_relative(HZ));
2873 	}
2874 }
2875 
2876 static void
2877 i915_gem_idle_work_handler(struct work_struct *work)
2878 {
2879 	struct drm_i915_private *dev_priv =
2880 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
2881 	struct drm_device *dev = &dev_priv->drm;
2882 	struct intel_engine_cs *engine;
2883 	enum intel_engine_id id;
2884 	bool rearm_hangcheck;
2885 
2886 	if (!READ_ONCE(dev_priv->gt.awake))
2887 		return;
2888 
2889 	/*
2890 	 * Wait for last execlists context complete, but bail out in case a
2891 	 * new request is submitted.
2892 	 */
2893 	wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
2894 		 intel_execlists_idle(dev_priv), 10);
2895 
2896 	if (READ_ONCE(dev_priv->gt.active_requests))
2897 		return;
2898 
2899 	rearm_hangcheck =
2900 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2901 
2902 	if (!mutex_trylock(&dev->struct_mutex)) {
2903 		/* Currently busy, come back later */
2904 		mod_delayed_work(dev_priv->wq,
2905 				 &dev_priv->gt.idle_work,
2906 				 msecs_to_jiffies(50));
2907 		goto out_rearm;
2908 	}
2909 
2910 	/*
2911 	 * New request retired after this work handler started, extend active
2912 	 * period until next instance of the work.
2913 	 */
2914 	if (work_pending(work))
2915 		goto out_unlock;
2916 
2917 	if (dev_priv->gt.active_requests)
2918 		goto out_unlock;
2919 
2920 	if (wait_for(intel_execlists_idle(dev_priv), 10))
2921 		DRM_ERROR("Timeout waiting for engines to idle\n");
2922 
2923 	for_each_engine(engine, dev_priv, id)
2924 		i915_gem_batch_pool_fini(&engine->batch_pool);
2925 
2926 	GEM_BUG_ON(!dev_priv->gt.awake);
2927 	dev_priv->gt.awake = false;
2928 	rearm_hangcheck = false;
2929 
2930 	if (INTEL_GEN(dev_priv) >= 6)
2931 		gen6_rps_idle(dev_priv);
2932 	intel_runtime_pm_put(dev_priv);
2933 out_unlock:
2934 	mutex_unlock(&dev->struct_mutex);
2935 
2936 out_rearm:
2937 	if (rearm_hangcheck) {
2938 		GEM_BUG_ON(!dev_priv->gt.awake);
2939 		i915_queue_hangcheck(dev_priv);
2940 	}
2941 }
2942 
2943 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2944 {
2945 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
2946 	struct drm_i915_file_private *fpriv = file->driver_priv;
2947 	struct i915_vma *vma, *vn;
2948 
2949 	mutex_lock(&obj->base.dev->struct_mutex);
2950 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2951 		if (vma->vm->file == fpriv)
2952 			i915_vma_close(vma);
2953 
2954 	if (i915_gem_object_is_active(obj) &&
2955 	    !i915_gem_object_has_active_reference(obj)) {
2956 		i915_gem_object_set_active_reference(obj);
2957 		i915_gem_object_get(obj);
2958 	}
2959 	mutex_unlock(&obj->base.dev->struct_mutex);
2960 }
2961 
2962 static unsigned long to_wait_timeout(s64 timeout_ns)
2963 {
2964 	if (timeout_ns < 0)
2965 		return MAX_SCHEDULE_TIMEOUT;
2966 
2967 	if (timeout_ns == 0)
2968 		return 0;
2969 
2970 	return nsecs_to_jiffies_timeout(timeout_ns);
2971 }
2972 
2973 /**
2974  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2975  * @dev: drm device pointer
2976  * @data: ioctl data blob
2977  * @file: drm file pointer
2978  *
2979  * Returns 0 if successful, else an error is returned with the remaining time in
2980  * the timeout parameter.
2981  *  -ETIME: object is still busy after timeout
2982  *  -ERESTARTSYS: signal interrupted the wait
2983  *  -ENONENT: object doesn't exist
2984  * Also possible, but rare:
2985  *  -EAGAIN: GPU wedged
2986  *  -ENOMEM: damn
2987  *  -ENODEV: Internal IRQ fail
2988  *  -E?: The add request failed
2989  *
2990  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2991  * non-zero timeout parameter the wait ioctl will wait for the given number of
2992  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2993  * without holding struct_mutex the object may become re-busied before this
2994  * function completes. A similar but shorter * race condition exists in the busy
2995  * ioctl
2996  */
2997 int
2998 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2999 {
3000 	struct drm_i915_gem_wait *args = data;
3001 	struct drm_i915_gem_object *obj;
3002 	ktime_t start;
3003 	long ret;
3004 
3005 	if (args->flags != 0)
3006 		return -EINVAL;
3007 
3008 	obj = i915_gem_object_lookup(file, args->bo_handle);
3009 	if (!obj)
3010 		return -ENOENT;
3011 
3012 	start = ktime_get();
3013 
3014 	ret = i915_gem_object_wait(obj,
3015 				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
3016 				   to_wait_timeout(args->timeout_ns),
3017 				   to_rps_client(file));
3018 
3019 	if (args->timeout_ns > 0) {
3020 		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3021 		if (args->timeout_ns < 0)
3022 			args->timeout_ns = 0;
3023 	}
3024 
3025 	i915_gem_object_put(obj);
3026 	return ret;
3027 }
3028 
3029 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
3030 {
3031 	int ret, i;
3032 
3033 	for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3034 		ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3035 		if (ret)
3036 			return ret;
3037 	}
3038 
3039 	return 0;
3040 }
3041 
3042 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3043 {
3044 	int ret;
3045 
3046 	if (flags & I915_WAIT_LOCKED) {
3047 		struct i915_gem_timeline *tl;
3048 
3049 		lockdep_assert_held(&i915->drm.struct_mutex);
3050 
3051 		list_for_each_entry(tl, &i915->gt.timelines, link) {
3052 			ret = wait_for_timeline(tl, flags);
3053 			if (ret)
3054 				return ret;
3055 		}
3056 	} else {
3057 		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
3058 		if (ret)
3059 			return ret;
3060 	}
3061 
3062 	return 0;
3063 }
3064 
3065 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3066 			     bool force)
3067 {
3068 	/* If we don't have a page list set up, then we're not pinned
3069 	 * to GPU, and we can ignore the cache flush because it'll happen
3070 	 * again at bind time.
3071 	 */
3072 	if (!obj->mm.pages)
3073 		return;
3074 
3075 	/*
3076 	 * Stolen memory is always coherent with the GPU as it is explicitly
3077 	 * marked as wc by the system, or the system is cache-coherent.
3078 	 */
3079 	if (obj->stolen || obj->phys_handle)
3080 		return;
3081 
3082 	/* If the GPU is snooping the contents of the CPU cache,
3083 	 * we do not need to manually clear the CPU cache lines.  However,
3084 	 * the caches are only snooped when the render cache is
3085 	 * flushed/invalidated.  As we always have to emit invalidations
3086 	 * and flushes when moving into and out of the RENDER domain, correct
3087 	 * snooping behaviour occurs naturally as the result of our domain
3088 	 * tracking.
3089 	 */
3090 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3091 		obj->cache_dirty = true;
3092 		return;
3093 	}
3094 
3095 	trace_i915_gem_object_clflush(obj);
3096 	drm_clflush_sg(obj->mm.pages);
3097 	obj->cache_dirty = false;
3098 }
3099 
3100 /** Flushes the GTT write domain for the object if it's dirty. */
3101 static void
3102 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3103 {
3104 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3105 
3106 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3107 		return;
3108 
3109 	/* No actual flushing is required for the GTT write domain.  Writes
3110 	 * to it "immediately" go to main memory as far as we know, so there's
3111 	 * no chipset flush.  It also doesn't land in render cache.
3112 	 *
3113 	 * However, we do have to enforce the order so that all writes through
3114 	 * the GTT land before any writes to the device, such as updates to
3115 	 * the GATT itself.
3116 	 *
3117 	 * We also have to wait a bit for the writes to land from the GTT.
3118 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3119 	 * timing. This issue has only been observed when switching quickly
3120 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
3121 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3122 	 * system agents we cannot reproduce this behaviour).
3123 	 */
3124 	wmb();
3125 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3126 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3127 
3128 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3129 
3130 	obj->base.write_domain = 0;
3131 	trace_i915_gem_object_change_domain(obj,
3132 					    obj->base.read_domains,
3133 					    I915_GEM_DOMAIN_GTT);
3134 }
3135 
3136 /** Flushes the CPU write domain for the object if it's dirty. */
3137 static void
3138 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3139 {
3140 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3141 		return;
3142 
3143 	i915_gem_clflush_object(obj, obj->pin_display);
3144 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3145 
3146 	obj->base.write_domain = 0;
3147 	trace_i915_gem_object_change_domain(obj,
3148 					    obj->base.read_domains,
3149 					    I915_GEM_DOMAIN_CPU);
3150 }
3151 
3152 /**
3153  * Moves a single object to the GTT read, and possibly write domain.
3154  * @obj: object to act on
3155  * @write: ask for write access or read only
3156  *
3157  * This function returns when the move is complete, including waiting on
3158  * flushes to occur.
3159  */
3160 int
3161 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3162 {
3163 	uint32_t old_write_domain, old_read_domains;
3164 	int ret;
3165 
3166 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3167 
3168 	ret = i915_gem_object_wait(obj,
3169 				   I915_WAIT_INTERRUPTIBLE |
3170 				   I915_WAIT_LOCKED |
3171 				   (write ? I915_WAIT_ALL : 0),
3172 				   MAX_SCHEDULE_TIMEOUT,
3173 				   NULL);
3174 	if (ret)
3175 		return ret;
3176 
3177 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3178 		return 0;
3179 
3180 	/* Flush and acquire obj->pages so that we are coherent through
3181 	 * direct access in memory with previous cached writes through
3182 	 * shmemfs and that our cache domain tracking remains valid.
3183 	 * For example, if the obj->filp was moved to swap without us
3184 	 * being notified and releasing the pages, we would mistakenly
3185 	 * continue to assume that the obj remained out of the CPU cached
3186 	 * domain.
3187 	 */
3188 	ret = i915_gem_object_pin_pages(obj);
3189 	if (ret)
3190 		return ret;
3191 
3192 	i915_gem_object_flush_cpu_write_domain(obj);
3193 
3194 	/* Serialise direct access to this object with the barriers for
3195 	 * coherent writes from the GPU, by effectively invalidating the
3196 	 * GTT domain upon first access.
3197 	 */
3198 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3199 		mb();
3200 
3201 	old_write_domain = obj->base.write_domain;
3202 	old_read_domains = obj->base.read_domains;
3203 
3204 	/* It should now be out of any other write domains, and we can update
3205 	 * the domain values for our changes.
3206 	 */
3207 	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3208 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3209 	if (write) {
3210 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3211 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3212 		obj->mm.dirty = true;
3213 	}
3214 
3215 	trace_i915_gem_object_change_domain(obj,
3216 					    old_read_domains,
3217 					    old_write_domain);
3218 
3219 	i915_gem_object_unpin_pages(obj);
3220 	return 0;
3221 }
3222 
3223 /**
3224  * Changes the cache-level of an object across all VMA.
3225  * @obj: object to act on
3226  * @cache_level: new cache level to set for the object
3227  *
3228  * After this function returns, the object will be in the new cache-level
3229  * across all GTT and the contents of the backing storage will be coherent,
3230  * with respect to the new cache-level. In order to keep the backing storage
3231  * coherent for all users, we only allow a single cache level to be set
3232  * globally on the object and prevent it from being changed whilst the
3233  * hardware is reading from the object. That is if the object is currently
3234  * on the scanout it will be set to uncached (or equivalent display
3235  * cache coherency) and all non-MOCS GPU access will also be uncached so
3236  * that all direct access to the scanout remains coherent.
3237  */
3238 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3239 				    enum i915_cache_level cache_level)
3240 {
3241 	struct i915_vma *vma;
3242 	int ret;
3243 
3244 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3245 
3246 	if (obj->cache_level == cache_level)
3247 		return 0;
3248 
3249 	/* Inspect the list of currently bound VMA and unbind any that would
3250 	 * be invalid given the new cache-level. This is principally to
3251 	 * catch the issue of the CS prefetch crossing page boundaries and
3252 	 * reading an invalid PTE on older architectures.
3253 	 */
3254 restart:
3255 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3256 		if (!drm_mm_node_allocated(&vma->node))
3257 			continue;
3258 
3259 		if (i915_vma_is_pinned(vma)) {
3260 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3261 			return -EBUSY;
3262 		}
3263 
3264 		if (i915_gem_valid_gtt_space(vma, cache_level))
3265 			continue;
3266 
3267 		ret = i915_vma_unbind(vma);
3268 		if (ret)
3269 			return ret;
3270 
3271 		/* As unbinding may affect other elements in the
3272 		 * obj->vma_list (due to side-effects from retiring
3273 		 * an active vma), play safe and restart the iterator.
3274 		 */
3275 		goto restart;
3276 	}
3277 
3278 	/* We can reuse the existing drm_mm nodes but need to change the
3279 	 * cache-level on the PTE. We could simply unbind them all and
3280 	 * rebind with the correct cache-level on next use. However since
3281 	 * we already have a valid slot, dma mapping, pages etc, we may as
3282 	 * rewrite the PTE in the belief that doing so tramples upon less
3283 	 * state and so involves less work.
3284 	 */
3285 	if (obj->bind_count) {
3286 		/* Before we change the PTE, the GPU must not be accessing it.
3287 		 * If we wait upon the object, we know that all the bound
3288 		 * VMA are no longer active.
3289 		 */
3290 		ret = i915_gem_object_wait(obj,
3291 					   I915_WAIT_INTERRUPTIBLE |
3292 					   I915_WAIT_LOCKED |
3293 					   I915_WAIT_ALL,
3294 					   MAX_SCHEDULE_TIMEOUT,
3295 					   NULL);
3296 		if (ret)
3297 			return ret;
3298 
3299 		if (!HAS_LLC(to_i915(obj->base.dev)) &&
3300 		    cache_level != I915_CACHE_NONE) {
3301 			/* Access to snoopable pages through the GTT is
3302 			 * incoherent and on some machines causes a hard
3303 			 * lockup. Relinquish the CPU mmaping to force
3304 			 * userspace to refault in the pages and we can
3305 			 * then double check if the GTT mapping is still
3306 			 * valid for that pointer access.
3307 			 */
3308 			i915_gem_release_mmap(obj);
3309 
3310 			/* As we no longer need a fence for GTT access,
3311 			 * we can relinquish it now (and so prevent having
3312 			 * to steal a fence from someone else on the next
3313 			 * fence request). Note GPU activity would have
3314 			 * dropped the fence as all snoopable access is
3315 			 * supposed to be linear.
3316 			 */
3317 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
3318 				ret = i915_vma_put_fence(vma);
3319 				if (ret)
3320 					return ret;
3321 			}
3322 		} else {
3323 			/* We either have incoherent backing store and
3324 			 * so no GTT access or the architecture is fully
3325 			 * coherent. In such cases, existing GTT mmaps
3326 			 * ignore the cache bit in the PTE and we can
3327 			 * rewrite it without confusing the GPU or having
3328 			 * to force userspace to fault back in its mmaps.
3329 			 */
3330 		}
3331 
3332 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3333 			if (!drm_mm_node_allocated(&vma->node))
3334 				continue;
3335 
3336 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3337 			if (ret)
3338 				return ret;
3339 		}
3340 	}
3341 
3342 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
3343 	    cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
3344 		obj->cache_dirty = true;
3345 
3346 	list_for_each_entry(vma, &obj->vma_list, obj_link)
3347 		vma->node.color = cache_level;
3348 	obj->cache_level = cache_level;
3349 
3350 	return 0;
3351 }
3352 
3353 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3354 			       struct drm_file *file)
3355 {
3356 	struct drm_i915_gem_caching *args = data;
3357 	struct drm_i915_gem_object *obj;
3358 	int err = 0;
3359 
3360 	rcu_read_lock();
3361 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3362 	if (!obj) {
3363 		err = -ENOENT;
3364 		goto out;
3365 	}
3366 
3367 	switch (obj->cache_level) {
3368 	case I915_CACHE_LLC:
3369 	case I915_CACHE_L3_LLC:
3370 		args->caching = I915_CACHING_CACHED;
3371 		break;
3372 
3373 	case I915_CACHE_WT:
3374 		args->caching = I915_CACHING_DISPLAY;
3375 		break;
3376 
3377 	default:
3378 		args->caching = I915_CACHING_NONE;
3379 		break;
3380 	}
3381 out:
3382 	rcu_read_unlock();
3383 	return err;
3384 }
3385 
3386 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3387 			       struct drm_file *file)
3388 {
3389 	struct drm_i915_private *i915 = to_i915(dev);
3390 	struct drm_i915_gem_caching *args = data;
3391 	struct drm_i915_gem_object *obj;
3392 	enum i915_cache_level level;
3393 	int ret;
3394 
3395 	switch (args->caching) {
3396 	case I915_CACHING_NONE:
3397 		level = I915_CACHE_NONE;
3398 		break;
3399 	case I915_CACHING_CACHED:
3400 		/*
3401 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3402 		 * snooped mapping may leave stale data in a corresponding CPU
3403 		 * cacheline, whereas normally such cachelines would get
3404 		 * invalidated.
3405 		 */
3406 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3407 			return -ENODEV;
3408 
3409 		level = I915_CACHE_LLC;
3410 		break;
3411 	case I915_CACHING_DISPLAY:
3412 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3413 		break;
3414 	default:
3415 		return -EINVAL;
3416 	}
3417 
3418 	ret = i915_mutex_lock_interruptible(dev);
3419 	if (ret)
3420 		return ret;
3421 
3422 	obj = i915_gem_object_lookup(file, args->handle);
3423 	if (!obj) {
3424 		ret = -ENOENT;
3425 		goto unlock;
3426 	}
3427 
3428 	ret = i915_gem_object_set_cache_level(obj, level);
3429 	i915_gem_object_put(obj);
3430 unlock:
3431 	mutex_unlock(&dev->struct_mutex);
3432 	return ret;
3433 }
3434 
3435 /*
3436  * Prepare buffer for display plane (scanout, cursors, etc).
3437  * Can be called from an uninterruptible phase (modesetting) and allows
3438  * any flushes to be pipelined (for pageflips).
3439  */
3440 struct i915_vma *
3441 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3442 				     u32 alignment,
3443 				     const struct i915_ggtt_view *view)
3444 {
3445 	struct i915_vma *vma;
3446 	u32 old_read_domains, old_write_domain;
3447 	int ret;
3448 
3449 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3450 
3451 	/* Mark the pin_display early so that we account for the
3452 	 * display coherency whilst setting up the cache domains.
3453 	 */
3454 	obj->pin_display++;
3455 
3456 	/* The display engine is not coherent with the LLC cache on gen6.  As
3457 	 * a result, we make sure that the pinning that is about to occur is
3458 	 * done with uncached PTEs. This is lowest common denominator for all
3459 	 * chipsets.
3460 	 *
3461 	 * However for gen6+, we could do better by using the GFDT bit instead
3462 	 * of uncaching, which would allow us to flush all the LLC-cached data
3463 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3464 	 */
3465 	ret = i915_gem_object_set_cache_level(obj,
3466 					      HAS_WT(to_i915(obj->base.dev)) ?
3467 					      I915_CACHE_WT : I915_CACHE_NONE);
3468 	if (ret) {
3469 		vma = ERR_PTR(ret);
3470 		goto err_unpin_display;
3471 	}
3472 
3473 	/* As the user may map the buffer once pinned in the display plane
3474 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3475 	 * always use map_and_fenceable for all scanout buffers. However,
3476 	 * it may simply be too big to fit into mappable, in which case
3477 	 * put it anyway and hope that userspace can cope (but always first
3478 	 * try to preserve the existing ABI).
3479 	 */
3480 	vma = ERR_PTR(-ENOSPC);
3481 	if (view->type == I915_GGTT_VIEW_NORMAL)
3482 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3483 					       PIN_MAPPABLE | PIN_NONBLOCK);
3484 	if (IS_ERR(vma)) {
3485 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
3486 		unsigned int flags;
3487 
3488 		/* Valleyview is definitely limited to scanning out the first
3489 		 * 512MiB. Lets presume this behaviour was inherited from the
3490 		 * g4x display engine and that all earlier gen are similarly
3491 		 * limited. Testing suggests that it is a little more
3492 		 * complicated than this. For example, Cherryview appears quite
3493 		 * happy to scanout from anywhere within its global aperture.
3494 		 */
3495 		flags = 0;
3496 		if (HAS_GMCH_DISPLAY(i915))
3497 			flags = PIN_MAPPABLE;
3498 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3499 	}
3500 	if (IS_ERR(vma))
3501 		goto err_unpin_display;
3502 
3503 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3504 
3505 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
3506 	if (obj->cache_dirty) {
3507 		i915_gem_clflush_object(obj, true);
3508 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
3509 	}
3510 
3511 	old_write_domain = obj->base.write_domain;
3512 	old_read_domains = obj->base.read_domains;
3513 
3514 	/* It should now be out of any other write domains, and we can update
3515 	 * the domain values for our changes.
3516 	 */
3517 	obj->base.write_domain = 0;
3518 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3519 
3520 	trace_i915_gem_object_change_domain(obj,
3521 					    old_read_domains,
3522 					    old_write_domain);
3523 
3524 	return vma;
3525 
3526 err_unpin_display:
3527 	obj->pin_display--;
3528 	return vma;
3529 }
3530 
3531 void
3532 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3533 {
3534 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
3535 
3536 	if (WARN_ON(vma->obj->pin_display == 0))
3537 		return;
3538 
3539 	if (--vma->obj->pin_display == 0)
3540 		vma->display_alignment = 0;
3541 
3542 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3543 	if (!i915_vma_is_active(vma))
3544 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3545 
3546 	i915_vma_unpin(vma);
3547 }
3548 
3549 /**
3550  * Moves a single object to the CPU read, and possibly write domain.
3551  * @obj: object to act on
3552  * @write: requesting write or read-only access
3553  *
3554  * This function returns when the move is complete, including waiting on
3555  * flushes to occur.
3556  */
3557 int
3558 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3559 {
3560 	uint32_t old_write_domain, old_read_domains;
3561 	int ret;
3562 
3563 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3564 
3565 	ret = i915_gem_object_wait(obj,
3566 				   I915_WAIT_INTERRUPTIBLE |
3567 				   I915_WAIT_LOCKED |
3568 				   (write ? I915_WAIT_ALL : 0),
3569 				   MAX_SCHEDULE_TIMEOUT,
3570 				   NULL);
3571 	if (ret)
3572 		return ret;
3573 
3574 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3575 		return 0;
3576 
3577 	i915_gem_object_flush_gtt_write_domain(obj);
3578 
3579 	old_write_domain = obj->base.write_domain;
3580 	old_read_domains = obj->base.read_domains;
3581 
3582 	/* Flush the CPU cache if it's still invalid. */
3583 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3584 		i915_gem_clflush_object(obj, false);
3585 
3586 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3587 	}
3588 
3589 	/* It should now be out of any other write domains, and we can update
3590 	 * the domain values for our changes.
3591 	 */
3592 	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3593 
3594 	/* If we're writing through the CPU, then the GPU read domains will
3595 	 * need to be invalidated at next use.
3596 	 */
3597 	if (write) {
3598 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3599 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3600 	}
3601 
3602 	trace_i915_gem_object_change_domain(obj,
3603 					    old_read_domains,
3604 					    old_write_domain);
3605 
3606 	return 0;
3607 }
3608 
3609 /* Throttle our rendering by waiting until the ring has completed our requests
3610  * emitted over 20 msec ago.
3611  *
3612  * Note that if we were to use the current jiffies each time around the loop,
3613  * we wouldn't escape the function with any frames outstanding if the time to
3614  * render a frame was over 20ms.
3615  *
3616  * This should get us reasonable parallelism between CPU and GPU but also
3617  * relatively low latency when blocking on a particular request to finish.
3618  */
3619 static int
3620 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3621 {
3622 	struct drm_i915_private *dev_priv = to_i915(dev);
3623 	struct drm_i915_file_private *file_priv = file->driver_priv;
3624 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3625 	struct drm_i915_gem_request *request, *target = NULL;
3626 	long ret;
3627 
3628 	/* ABI: return -EIO if already wedged */
3629 	if (i915_terminally_wedged(&dev_priv->gpu_error))
3630 		return -EIO;
3631 
3632 	spin_lock(&file_priv->mm.lock);
3633 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3634 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3635 			break;
3636 
3637 		/*
3638 		 * Note that the request might not have been submitted yet.
3639 		 * In which case emitted_jiffies will be zero.
3640 		 */
3641 		if (!request->emitted_jiffies)
3642 			continue;
3643 
3644 		target = request;
3645 	}
3646 	if (target)
3647 		i915_gem_request_get(target);
3648 	spin_unlock(&file_priv->mm.lock);
3649 
3650 	if (target == NULL)
3651 		return 0;
3652 
3653 	ret = i915_wait_request(target,
3654 				I915_WAIT_INTERRUPTIBLE,
3655 				MAX_SCHEDULE_TIMEOUT);
3656 	i915_gem_request_put(target);
3657 
3658 	return ret < 0 ? ret : 0;
3659 }
3660 
3661 struct i915_vma *
3662 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3663 			 const struct i915_ggtt_view *view,
3664 			 u64 size,
3665 			 u64 alignment,
3666 			 u64 flags)
3667 {
3668 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3669 	struct i915_address_space *vm = &dev_priv->ggtt.base;
3670 	struct i915_vma *vma;
3671 	int ret;
3672 
3673 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3674 
3675 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
3676 	if (IS_ERR(vma))
3677 		return vma;
3678 
3679 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
3680 		if (flags & PIN_NONBLOCK &&
3681 		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3682 			return ERR_PTR(-ENOSPC);
3683 
3684 		if (flags & PIN_MAPPABLE) {
3685 			u32 fence_size;
3686 
3687 			fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
3688 							    i915_gem_object_get_tiling(obj));
3689 			/* If the required space is larger than the available
3690 			 * aperture, we will not able to find a slot for the
3691 			 * object and unbinding the object now will be in
3692 			 * vain. Worse, doing so may cause us to ping-pong
3693 			 * the object in and out of the Global GTT and
3694 			 * waste a lot of cycles under the mutex.
3695 			 */
3696 			if (fence_size > dev_priv->ggtt.mappable_end)
3697 				return ERR_PTR(-E2BIG);
3698 
3699 			/* If NONBLOCK is set the caller is optimistically
3700 			 * trying to cache the full object within the mappable
3701 			 * aperture, and *must* have a fallback in place for
3702 			 * situations where we cannot bind the object. We
3703 			 * can be a little more lax here and use the fallback
3704 			 * more often to avoid costly migrations of ourselves
3705 			 * and other objects within the aperture.
3706 			 *
3707 			 * Half-the-aperture is used as a simple heuristic.
3708 			 * More interesting would to do search for a free
3709 			 * block prior to making the commitment to unbind.
3710 			 * That caters for the self-harm case, and with a
3711 			 * little more heuristics (e.g. NOFAULT, NOEVICT)
3712 			 * we could try to minimise harm to others.
3713 			 */
3714 			if (flags & PIN_NONBLOCK &&
3715 			    fence_size > dev_priv->ggtt.mappable_end / 2)
3716 				return ERR_PTR(-ENOSPC);
3717 		}
3718 
3719 		WARN(i915_vma_is_pinned(vma),
3720 		     "bo is already pinned in ggtt with incorrect alignment:"
3721 		     " offset=%08x, req.alignment=%llx,"
3722 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3723 		     i915_ggtt_offset(vma), alignment,
3724 		     !!(flags & PIN_MAPPABLE),
3725 		     i915_vma_is_map_and_fenceable(vma));
3726 		ret = i915_vma_unbind(vma);
3727 		if (ret)
3728 			return ERR_PTR(ret);
3729 	}
3730 
3731 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3732 	if (ret)
3733 		return ERR_PTR(ret);
3734 
3735 	return vma;
3736 }
3737 
3738 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3739 {
3740 	/* Note that we could alias engines in the execbuf API, but
3741 	 * that would be very unwise as it prevents userspace from
3742 	 * fine control over engine selection. Ahem.
3743 	 *
3744 	 * This should be something like EXEC_MAX_ENGINE instead of
3745 	 * I915_NUM_ENGINES.
3746 	 */
3747 	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3748 	return 0x10000 << id;
3749 }
3750 
3751 static __always_inline unsigned int __busy_write_id(unsigned int id)
3752 {
3753 	/* The uABI guarantees an active writer is also amongst the read
3754 	 * engines. This would be true if we accessed the activity tracking
3755 	 * under the lock, but as we perform the lookup of the object and
3756 	 * its activity locklessly we can not guarantee that the last_write
3757 	 * being active implies that we have set the same engine flag from
3758 	 * last_read - hence we always set both read and write busy for
3759 	 * last_write.
3760 	 */
3761 	return id | __busy_read_flag(id);
3762 }
3763 
3764 static __always_inline unsigned int
3765 __busy_set_if_active(const struct dma_fence *fence,
3766 		     unsigned int (*flag)(unsigned int id))
3767 {
3768 	struct drm_i915_gem_request *rq;
3769 
3770 	/* We have to check the current hw status of the fence as the uABI
3771 	 * guarantees forward progress. We could rely on the idle worker
3772 	 * to eventually flush us, but to minimise latency just ask the
3773 	 * hardware.
3774 	 *
3775 	 * Note we only report on the status of native fences.
3776 	 */
3777 	if (!dma_fence_is_i915(fence))
3778 		return 0;
3779 
3780 	/* opencode to_request() in order to avoid const warnings */
3781 	rq = container_of(fence, struct drm_i915_gem_request, fence);
3782 	if (i915_gem_request_completed(rq))
3783 		return 0;
3784 
3785 	return flag(rq->engine->exec_id);
3786 }
3787 
3788 static __always_inline unsigned int
3789 busy_check_reader(const struct dma_fence *fence)
3790 {
3791 	return __busy_set_if_active(fence, __busy_read_flag);
3792 }
3793 
3794 static __always_inline unsigned int
3795 busy_check_writer(const struct dma_fence *fence)
3796 {
3797 	if (!fence)
3798 		return 0;
3799 
3800 	return __busy_set_if_active(fence, __busy_write_id);
3801 }
3802 
3803 int
3804 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3805 		    struct drm_file *file)
3806 {
3807 	struct drm_i915_gem_busy *args = data;
3808 	struct drm_i915_gem_object *obj;
3809 	struct reservation_object_list *list;
3810 	unsigned int seq;
3811 	int err;
3812 
3813 	err = -ENOENT;
3814 	rcu_read_lock();
3815 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3816 	if (!obj)
3817 		goto out;
3818 
3819 	/* A discrepancy here is that we do not report the status of
3820 	 * non-i915 fences, i.e. even though we may report the object as idle,
3821 	 * a call to set-domain may still stall waiting for foreign rendering.
3822 	 * This also means that wait-ioctl may report an object as busy,
3823 	 * where busy-ioctl considers it idle.
3824 	 *
3825 	 * We trade the ability to warn of foreign fences to report on which
3826 	 * i915 engines are active for the object.
3827 	 *
3828 	 * Alternatively, we can trade that extra information on read/write
3829 	 * activity with
3830 	 *	args->busy =
3831 	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
3832 	 * to report the overall busyness. This is what the wait-ioctl does.
3833 	 *
3834 	 */
3835 retry:
3836 	seq = raw_read_seqcount(&obj->resv->seq);
3837 
3838 	/* Translate the exclusive fence to the READ *and* WRITE engine */
3839 	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3840 
3841 	/* Translate shared fences to READ set of engines */
3842 	list = rcu_dereference(obj->resv->fence);
3843 	if (list) {
3844 		unsigned int shared_count = list->shared_count, i;
3845 
3846 		for (i = 0; i < shared_count; ++i) {
3847 			struct dma_fence *fence =
3848 				rcu_dereference(list->shared[i]);
3849 
3850 			args->busy |= busy_check_reader(fence);
3851 		}
3852 	}
3853 
3854 	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3855 		goto retry;
3856 
3857 	err = 0;
3858 out:
3859 	rcu_read_unlock();
3860 	return err;
3861 }
3862 
3863 int
3864 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3865 			struct drm_file *file_priv)
3866 {
3867 	return i915_gem_ring_throttle(dev, file_priv);
3868 }
3869 
3870 int
3871 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3872 		       struct drm_file *file_priv)
3873 {
3874 	struct drm_i915_private *dev_priv = to_i915(dev);
3875 	struct drm_i915_gem_madvise *args = data;
3876 	struct drm_i915_gem_object *obj;
3877 	int err;
3878 
3879 	switch (args->madv) {
3880 	case I915_MADV_DONTNEED:
3881 	case I915_MADV_WILLNEED:
3882 	    break;
3883 	default:
3884 	    return -EINVAL;
3885 	}
3886 
3887 	obj = i915_gem_object_lookup(file_priv, args->handle);
3888 	if (!obj)
3889 		return -ENOENT;
3890 
3891 	err = mutex_lock_interruptible(&obj->mm.lock);
3892 	if (err)
3893 		goto out;
3894 
3895 	if (obj->mm.pages &&
3896 	    i915_gem_object_is_tiled(obj) &&
3897 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3898 		if (obj->mm.madv == I915_MADV_WILLNEED) {
3899 			GEM_BUG_ON(!obj->mm.quirked);
3900 			__i915_gem_object_unpin_pages(obj);
3901 			obj->mm.quirked = false;
3902 		}
3903 		if (args->madv == I915_MADV_WILLNEED) {
3904 			GEM_BUG_ON(obj->mm.quirked);
3905 			__i915_gem_object_pin_pages(obj);
3906 			obj->mm.quirked = true;
3907 		}
3908 	}
3909 
3910 	if (obj->mm.madv != __I915_MADV_PURGED)
3911 		obj->mm.madv = args->madv;
3912 
3913 	/* if the object is no longer attached, discard its backing storage */
3914 	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
3915 		i915_gem_object_truncate(obj);
3916 
3917 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
3918 	mutex_unlock(&obj->mm.lock);
3919 
3920 out:
3921 	i915_gem_object_put(obj);
3922 	return err;
3923 }
3924 
3925 static void
3926 frontbuffer_retire(struct i915_gem_active *active,
3927 		   struct drm_i915_gem_request *request)
3928 {
3929 	struct drm_i915_gem_object *obj =
3930 		container_of(active, typeof(*obj), frontbuffer_write);
3931 
3932 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
3933 }
3934 
3935 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3936 			  const struct drm_i915_gem_object_ops *ops)
3937 {
3938 	mutex_init(&obj->mm.lock);
3939 
3940 	INIT_LIST_HEAD(&obj->global_link);
3941 	INIT_LIST_HEAD(&obj->userfault_link);
3942 	INIT_LIST_HEAD(&obj->obj_exec_link);
3943 	INIT_LIST_HEAD(&obj->vma_list);
3944 	INIT_LIST_HEAD(&obj->batch_pool_link);
3945 
3946 	obj->ops = ops;
3947 
3948 	reservation_object_init(&obj->__builtin_resv);
3949 	obj->resv = &obj->__builtin_resv;
3950 
3951 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3952 	init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
3953 
3954 	obj->mm.madv = I915_MADV_WILLNEED;
3955 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
3956 	mutex_init(&obj->mm.get_page.lock);
3957 
3958 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3959 }
3960 
3961 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3962 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
3963 		 I915_GEM_OBJECT_IS_SHRINKABLE,
3964 	.get_pages = i915_gem_object_get_pages_gtt,
3965 	.put_pages = i915_gem_object_put_pages_gtt,
3966 };
3967 
3968 /* Note we don't consider signbits :| */
3969 #define overflows_type(x, T) \
3970 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
3971 
3972 struct drm_i915_gem_object *
3973 i915_gem_object_create(struct drm_device *dev, u64 size)
3974 {
3975 	struct drm_i915_private *dev_priv = to_i915(dev);
3976 	struct drm_i915_gem_object *obj;
3977 	struct address_space *mapping;
3978 	gfp_t mask;
3979 	int ret;
3980 
3981 	/* There is a prevalence of the assumption that we fit the object's
3982 	 * page count inside a 32bit _signed_ variable. Let's document this and
3983 	 * catch if we ever need to fix it. In the meantime, if you do spot
3984 	 * such a local variable, please consider fixing!
3985 	 */
3986 	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
3987 		return ERR_PTR(-E2BIG);
3988 
3989 	if (overflows_type(size, obj->base.size))
3990 		return ERR_PTR(-E2BIG);
3991 
3992 	obj = i915_gem_object_alloc(dev);
3993 	if (obj == NULL)
3994 		return ERR_PTR(-ENOMEM);
3995 
3996 	ret = drm_gem_object_init(dev, &obj->base, size);
3997 	if (ret)
3998 		goto fail;
3999 
4000 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4001 	if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
4002 		/* 965gm cannot relocate objects above 4GiB. */
4003 		mask &= ~__GFP_HIGHMEM;
4004 		mask |= __GFP_DMA32;
4005 	}
4006 
4007 	mapping = obj->base.filp->f_mapping;
4008 	mapping_set_gfp_mask(mapping, mask);
4009 
4010 	i915_gem_object_init(obj, &i915_gem_object_ops);
4011 
4012 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4013 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4014 
4015 	if (HAS_LLC(dev_priv)) {
4016 		/* On some devices, we can have the GPU use the LLC (the CPU
4017 		 * cache) for about a 10% performance improvement
4018 		 * compared to uncached.  Graphics requests other than
4019 		 * display scanout are coherent with the CPU in
4020 		 * accessing this cache.  This means in this mode we
4021 		 * don't need to clflush on the CPU side, and on the
4022 		 * GPU side we only need to flush internal caches to
4023 		 * get data visible to the CPU.
4024 		 *
4025 		 * However, we maintain the display planes as UC, and so
4026 		 * need to rebind when first used as such.
4027 		 */
4028 		obj->cache_level = I915_CACHE_LLC;
4029 	} else
4030 		obj->cache_level = I915_CACHE_NONE;
4031 
4032 	trace_i915_gem_object_create(obj);
4033 
4034 	return obj;
4035 
4036 fail:
4037 	i915_gem_object_free(obj);
4038 	return ERR_PTR(ret);
4039 }
4040 
4041 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4042 {
4043 	/* If we are the last user of the backing storage (be it shmemfs
4044 	 * pages or stolen etc), we know that the pages are going to be
4045 	 * immediately released. In this case, we can then skip copying
4046 	 * back the contents from the GPU.
4047 	 */
4048 
4049 	if (obj->mm.madv != I915_MADV_WILLNEED)
4050 		return false;
4051 
4052 	if (obj->base.filp == NULL)
4053 		return true;
4054 
4055 	/* At first glance, this looks racy, but then again so would be
4056 	 * userspace racing mmap against close. However, the first external
4057 	 * reference to the filp can only be obtained through the
4058 	 * i915_gem_mmap_ioctl() which safeguards us against the user
4059 	 * acquiring such a reference whilst we are in the middle of
4060 	 * freeing the object.
4061 	 */
4062 	return atomic_long_read(&obj->base.filp->f_count) == 1;
4063 }
4064 
4065 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4066 				    struct llist_node *freed)
4067 {
4068 	struct drm_i915_gem_object *obj, *on;
4069 
4070 	mutex_lock(&i915->drm.struct_mutex);
4071 	intel_runtime_pm_get(i915);
4072 	llist_for_each_entry(obj, freed, freed) {
4073 		struct i915_vma *vma, *vn;
4074 
4075 		trace_i915_gem_object_destroy(obj);
4076 
4077 		GEM_BUG_ON(i915_gem_object_is_active(obj));
4078 		list_for_each_entry_safe(vma, vn,
4079 					 &obj->vma_list, obj_link) {
4080 			GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4081 			GEM_BUG_ON(i915_vma_is_active(vma));
4082 			vma->flags &= ~I915_VMA_PIN_MASK;
4083 			i915_vma_close(vma);
4084 		}
4085 		GEM_BUG_ON(!list_empty(&obj->vma_list));
4086 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4087 
4088 		list_del(&obj->global_link);
4089 	}
4090 	intel_runtime_pm_put(i915);
4091 	mutex_unlock(&i915->drm.struct_mutex);
4092 
4093 	llist_for_each_entry_safe(obj, on, freed, freed) {
4094 		GEM_BUG_ON(obj->bind_count);
4095 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4096 
4097 		if (obj->ops->release)
4098 			obj->ops->release(obj);
4099 
4100 		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4101 			atomic_set(&obj->mm.pages_pin_count, 0);
4102 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4103 		GEM_BUG_ON(obj->mm.pages);
4104 
4105 		if (obj->base.import_attach)
4106 			drm_prime_gem_destroy(&obj->base, NULL);
4107 
4108 		reservation_object_fini(&obj->__builtin_resv);
4109 		drm_gem_object_release(&obj->base);
4110 		i915_gem_info_remove_obj(i915, obj->base.size);
4111 
4112 		kfree(obj->bit_17);
4113 		i915_gem_object_free(obj);
4114 	}
4115 }
4116 
4117 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4118 {
4119 	struct llist_node *freed;
4120 
4121 	freed = llist_del_all(&i915->mm.free_list);
4122 	if (unlikely(freed))
4123 		__i915_gem_free_objects(i915, freed);
4124 }
4125 
4126 static void __i915_gem_free_work(struct work_struct *work)
4127 {
4128 	struct drm_i915_private *i915 =
4129 		container_of(work, struct drm_i915_private, mm.free_work);
4130 	struct llist_node *freed;
4131 
4132 	/* All file-owned VMA should have been released by this point through
4133 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4134 	 * However, the object may also be bound into the global GTT (e.g.
4135 	 * older GPUs without per-process support, or for direct access through
4136 	 * the GTT either for the user or for scanout). Those VMA still need to
4137 	 * unbound now.
4138 	 */
4139 
4140 	while ((freed = llist_del_all(&i915->mm.free_list)))
4141 		__i915_gem_free_objects(i915, freed);
4142 }
4143 
4144 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4145 {
4146 	struct drm_i915_gem_object *obj =
4147 		container_of(head, typeof(*obj), rcu);
4148 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
4149 
4150 	/* We can't simply use call_rcu() from i915_gem_free_object()
4151 	 * as we need to block whilst unbinding, and the call_rcu
4152 	 * task may be called from softirq context. So we take a
4153 	 * detour through a worker.
4154 	 */
4155 	if (llist_add(&obj->freed, &i915->mm.free_list))
4156 		schedule_work(&i915->mm.free_work);
4157 }
4158 
4159 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4160 {
4161 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4162 
4163 	if (obj->mm.quirked)
4164 		__i915_gem_object_unpin_pages(obj);
4165 
4166 	if (discard_backing_storage(obj))
4167 		obj->mm.madv = I915_MADV_DONTNEED;
4168 
4169 	/* Before we free the object, make sure any pure RCU-only
4170 	 * read-side critical sections are complete, e.g.
4171 	 * i915_gem_busy_ioctl(). For the corresponding synchronized
4172 	 * lookup see i915_gem_object_lookup_rcu().
4173 	 */
4174 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4175 }
4176 
4177 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4178 {
4179 	lockdep_assert_held(&obj->base.dev->struct_mutex);
4180 
4181 	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
4182 	if (i915_gem_object_is_active(obj))
4183 		i915_gem_object_set_active_reference(obj);
4184 	else
4185 		i915_gem_object_put(obj);
4186 }
4187 
4188 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
4189 {
4190 	struct intel_engine_cs *engine;
4191 	enum intel_engine_id id;
4192 
4193 	for_each_engine(engine, dev_priv, id)
4194 		GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
4195 }
4196 
4197 int i915_gem_suspend(struct drm_device *dev)
4198 {
4199 	struct drm_i915_private *dev_priv = to_i915(dev);
4200 	int ret;
4201 
4202 	intel_suspend_gt_powersave(dev_priv);
4203 
4204 	mutex_lock(&dev->struct_mutex);
4205 
4206 	/* We have to flush all the executing contexts to main memory so
4207 	 * that they can saved in the hibernation image. To ensure the last
4208 	 * context image is coherent, we have to switch away from it. That
4209 	 * leaves the dev_priv->kernel_context still active when
4210 	 * we actually suspend, and its image in memory may not match the GPU
4211 	 * state. Fortunately, the kernel_context is disposable and we do
4212 	 * not rely on its state.
4213 	 */
4214 	ret = i915_gem_switch_to_kernel_context(dev_priv);
4215 	if (ret)
4216 		goto err;
4217 
4218 	ret = i915_gem_wait_for_idle(dev_priv,
4219 				     I915_WAIT_INTERRUPTIBLE |
4220 				     I915_WAIT_LOCKED);
4221 	if (ret)
4222 		goto err;
4223 
4224 	i915_gem_retire_requests(dev_priv);
4225 	GEM_BUG_ON(dev_priv->gt.active_requests);
4226 
4227 	assert_kernel_context_is_current(dev_priv);
4228 	i915_gem_context_lost(dev_priv);
4229 	mutex_unlock(&dev->struct_mutex);
4230 
4231 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4232 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4233 	flush_delayed_work(&dev_priv->gt.idle_work);
4234 	flush_work(&dev_priv->mm.free_work);
4235 
4236 	/* Assert that we sucessfully flushed all the work and
4237 	 * reset the GPU back to its idle, low power state.
4238 	 */
4239 	WARN_ON(dev_priv->gt.awake);
4240 	WARN_ON(!intel_execlists_idle(dev_priv));
4241 
4242 	/*
4243 	 * Neither the BIOS, ourselves or any other kernel
4244 	 * expects the system to be in execlists mode on startup,
4245 	 * so we need to reset the GPU back to legacy mode. And the only
4246 	 * known way to disable logical contexts is through a GPU reset.
4247 	 *
4248 	 * So in order to leave the system in a known default configuration,
4249 	 * always reset the GPU upon unload and suspend. Afterwards we then
4250 	 * clean up the GEM state tracking, flushing off the requests and
4251 	 * leaving the system in a known idle state.
4252 	 *
4253 	 * Note that is of the upmost importance that the GPU is idle and
4254 	 * all stray writes are flushed *before* we dismantle the backing
4255 	 * storage for the pinned objects.
4256 	 *
4257 	 * However, since we are uncertain that resetting the GPU on older
4258 	 * machines is a good idea, we don't - just in case it leaves the
4259 	 * machine in an unusable condition.
4260 	 */
4261 	if (HAS_HW_CONTEXTS(dev_priv)) {
4262 		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4263 		WARN_ON(reset && reset != -ENODEV);
4264 	}
4265 
4266 	return 0;
4267 
4268 err:
4269 	mutex_unlock(&dev->struct_mutex);
4270 	return ret;
4271 }
4272 
4273 void i915_gem_resume(struct drm_device *dev)
4274 {
4275 	struct drm_i915_private *dev_priv = to_i915(dev);
4276 
4277 	WARN_ON(dev_priv->gt.awake);
4278 
4279 	mutex_lock(&dev->struct_mutex);
4280 	i915_gem_restore_gtt_mappings(dev_priv);
4281 
4282 	/* As we didn't flush the kernel context before suspend, we cannot
4283 	 * guarantee that the context image is complete. So let's just reset
4284 	 * it and start again.
4285 	 */
4286 	dev_priv->gt.resume(dev_priv);
4287 
4288 	mutex_unlock(&dev->struct_mutex);
4289 }
4290 
4291 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4292 {
4293 	if (INTEL_GEN(dev_priv) < 5 ||
4294 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4295 		return;
4296 
4297 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4298 				 DISP_TILE_SURFACE_SWIZZLING);
4299 
4300 	if (IS_GEN5(dev_priv))
4301 		return;
4302 
4303 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4304 	if (IS_GEN6(dev_priv))
4305 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4306 	else if (IS_GEN7(dev_priv))
4307 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4308 	else if (IS_GEN8(dev_priv))
4309 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4310 	else
4311 		BUG();
4312 }
4313 
4314 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4315 {
4316 	I915_WRITE(RING_CTL(base), 0);
4317 	I915_WRITE(RING_HEAD(base), 0);
4318 	I915_WRITE(RING_TAIL(base), 0);
4319 	I915_WRITE(RING_START(base), 0);
4320 }
4321 
4322 static void init_unused_rings(struct drm_i915_private *dev_priv)
4323 {
4324 	if (IS_I830(dev_priv)) {
4325 		init_unused_ring(dev_priv, PRB1_BASE);
4326 		init_unused_ring(dev_priv, SRB0_BASE);
4327 		init_unused_ring(dev_priv, SRB1_BASE);
4328 		init_unused_ring(dev_priv, SRB2_BASE);
4329 		init_unused_ring(dev_priv, SRB3_BASE);
4330 	} else if (IS_GEN2(dev_priv)) {
4331 		init_unused_ring(dev_priv, SRB0_BASE);
4332 		init_unused_ring(dev_priv, SRB1_BASE);
4333 	} else if (IS_GEN3(dev_priv)) {
4334 		init_unused_ring(dev_priv, PRB1_BASE);
4335 		init_unused_ring(dev_priv, PRB2_BASE);
4336 	}
4337 }
4338 
4339 int
4340 i915_gem_init_hw(struct drm_device *dev)
4341 {
4342 	struct drm_i915_private *dev_priv = to_i915(dev);
4343 	struct intel_engine_cs *engine;
4344 	enum intel_engine_id id;
4345 	int ret;
4346 
4347 	dev_priv->gt.last_init_time = ktime_get();
4348 
4349 	/* Double layer security blanket, see i915_gem_init() */
4350 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4351 
4352 	if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4353 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4354 
4355 	if (IS_HASWELL(dev_priv))
4356 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4357 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4358 
4359 	if (HAS_PCH_NOP(dev_priv)) {
4360 		if (IS_IVYBRIDGE(dev_priv)) {
4361 			u32 temp = I915_READ(GEN7_MSG_CTL);
4362 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4363 			I915_WRITE(GEN7_MSG_CTL, temp);
4364 		} else if (INTEL_GEN(dev_priv) >= 7) {
4365 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4366 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4367 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4368 		}
4369 	}
4370 
4371 	i915_gem_init_swizzling(dev_priv);
4372 
4373 	/*
4374 	 * At least 830 can leave some of the unused rings
4375 	 * "active" (ie. head != tail) after resume which
4376 	 * will prevent c3 entry. Makes sure all unused rings
4377 	 * are totally idle.
4378 	 */
4379 	init_unused_rings(dev_priv);
4380 
4381 	BUG_ON(!dev_priv->kernel_context);
4382 
4383 	ret = i915_ppgtt_init_hw(dev_priv);
4384 	if (ret) {
4385 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4386 		goto out;
4387 	}
4388 
4389 	/* Need to do basic initialisation of all rings first: */
4390 	for_each_engine(engine, dev_priv, id) {
4391 		ret = engine->init_hw(engine);
4392 		if (ret)
4393 			goto out;
4394 	}
4395 
4396 	intel_mocs_init_l3cc_table(dev);
4397 
4398 	/* We can't enable contexts until all firmware is loaded */
4399 	ret = intel_guc_setup(dev);
4400 	if (ret)
4401 		goto out;
4402 
4403 out:
4404 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4405 	return ret;
4406 }
4407 
4408 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4409 {
4410 	if (INTEL_INFO(dev_priv)->gen < 6)
4411 		return false;
4412 
4413 	/* TODO: make semaphores and Execlists play nicely together */
4414 	if (i915.enable_execlists)
4415 		return false;
4416 
4417 	if (value >= 0)
4418 		return value;
4419 
4420 #ifdef CONFIG_INTEL_IOMMU
4421 	/* Enable semaphores on SNB when IO remapping is off */
4422 	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4423 		return false;
4424 #endif
4425 
4426 	return true;
4427 }
4428 
4429 int i915_gem_init(struct drm_device *dev)
4430 {
4431 	struct drm_i915_private *dev_priv = to_i915(dev);
4432 	int ret;
4433 
4434 	mutex_lock(&dev->struct_mutex);
4435 
4436 	if (!i915.enable_execlists) {
4437 		dev_priv->gt.resume = intel_legacy_submission_resume;
4438 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4439 	} else {
4440 		dev_priv->gt.resume = intel_lr_context_resume;
4441 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4442 	}
4443 
4444 	/* This is just a security blanket to placate dragons.
4445 	 * On some systems, we very sporadically observe that the first TLBs
4446 	 * used by the CS may be stale, despite us poking the TLB reset. If
4447 	 * we hold the forcewake during initialisation these problems
4448 	 * just magically go away.
4449 	 */
4450 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4451 
4452 	i915_gem_init_userptr(dev_priv);
4453 
4454 	ret = i915_gem_init_ggtt(dev_priv);
4455 	if (ret)
4456 		goto out_unlock;
4457 
4458 	ret = i915_gem_context_init(dev);
4459 	if (ret)
4460 		goto out_unlock;
4461 
4462 	ret = intel_engines_init(dev);
4463 	if (ret)
4464 		goto out_unlock;
4465 
4466 	ret = i915_gem_init_hw(dev);
4467 	if (ret == -EIO) {
4468 		/* Allow engine initialisation to fail by marking the GPU as
4469 		 * wedged. But we only want to do this where the GPU is angry,
4470 		 * for all other failure, such as an allocation failure, bail.
4471 		 */
4472 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4473 		i915_gem_set_wedged(dev_priv);
4474 		ret = 0;
4475 	}
4476 
4477 out_unlock:
4478 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4479 	mutex_unlock(&dev->struct_mutex);
4480 
4481 	return ret;
4482 }
4483 
4484 void
4485 i915_gem_cleanup_engines(struct drm_device *dev)
4486 {
4487 	struct drm_i915_private *dev_priv = to_i915(dev);
4488 	struct intel_engine_cs *engine;
4489 	enum intel_engine_id id;
4490 
4491 	for_each_engine(engine, dev_priv, id)
4492 		dev_priv->gt.cleanup_engine(engine);
4493 }
4494 
4495 void
4496 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4497 {
4498 	int i;
4499 
4500 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4501 	    !IS_CHERRYVIEW(dev_priv))
4502 		dev_priv->num_fence_regs = 32;
4503 	else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4504 		 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4505 		dev_priv->num_fence_regs = 16;
4506 	else
4507 		dev_priv->num_fence_regs = 8;
4508 
4509 	if (intel_vgpu_active(dev_priv))
4510 		dev_priv->num_fence_regs =
4511 				I915_READ(vgtif_reg(avail_rs.fence_num));
4512 
4513 	/* Initialize fence registers to zero */
4514 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4515 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4516 
4517 		fence->i915 = dev_priv;
4518 		fence->id = i;
4519 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4520 	}
4521 	i915_gem_restore_fences(dev_priv);
4522 
4523 	i915_gem_detect_bit_6_swizzle(dev_priv);
4524 }
4525 
4526 int
4527 i915_gem_load_init(struct drm_device *dev)
4528 {
4529 	struct drm_i915_private *dev_priv = to_i915(dev);
4530 	int err = -ENOMEM;
4531 
4532 	dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
4533 	if (!dev_priv->objects)
4534 		goto err_out;
4535 
4536 	dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
4537 	if (!dev_priv->vmas)
4538 		goto err_objects;
4539 
4540 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
4541 					SLAB_HWCACHE_ALIGN |
4542 					SLAB_RECLAIM_ACCOUNT |
4543 					SLAB_DESTROY_BY_RCU);
4544 	if (!dev_priv->requests)
4545 		goto err_vmas;
4546 
4547 	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
4548 					    SLAB_HWCACHE_ALIGN |
4549 					    SLAB_RECLAIM_ACCOUNT);
4550 	if (!dev_priv->dependencies)
4551 		goto err_requests;
4552 
4553 	mutex_lock(&dev_priv->drm.struct_mutex);
4554 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
4555 	err = i915_gem_timeline_init__global(dev_priv);
4556 	mutex_unlock(&dev_priv->drm.struct_mutex);
4557 	if (err)
4558 		goto err_dependencies;
4559 
4560 	INIT_LIST_HEAD(&dev_priv->context_list);
4561 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
4562 	init_llist_head(&dev_priv->mm.free_list);
4563 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4564 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4565 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4566 	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
4567 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4568 			  i915_gem_retire_work_handler);
4569 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4570 			  i915_gem_idle_work_handler);
4571 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4572 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4573 
4574 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4575 
4576 	init_waitqueue_head(&dev_priv->pending_flip_queue);
4577 
4578 	dev_priv->mm.interruptible = true;
4579 
4580 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4581 
4582 	spin_lock_init(&dev_priv->fb_tracking.lock);
4583 
4584 	return 0;
4585 
4586 err_dependencies:
4587 	kmem_cache_destroy(dev_priv->dependencies);
4588 err_requests:
4589 	kmem_cache_destroy(dev_priv->requests);
4590 err_vmas:
4591 	kmem_cache_destroy(dev_priv->vmas);
4592 err_objects:
4593 	kmem_cache_destroy(dev_priv->objects);
4594 err_out:
4595 	return err;
4596 }
4597 
4598 void i915_gem_load_cleanup(struct drm_device *dev)
4599 {
4600 	struct drm_i915_private *dev_priv = to_i915(dev);
4601 
4602 	WARN_ON(!llist_empty(&dev_priv->mm.free_list));
4603 
4604 	mutex_lock(&dev_priv->drm.struct_mutex);
4605 	i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
4606 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
4607 	mutex_unlock(&dev_priv->drm.struct_mutex);
4608 
4609 	kmem_cache_destroy(dev_priv->dependencies);
4610 	kmem_cache_destroy(dev_priv->requests);
4611 	kmem_cache_destroy(dev_priv->vmas);
4612 	kmem_cache_destroy(dev_priv->objects);
4613 
4614 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4615 	rcu_barrier();
4616 }
4617 
4618 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4619 {
4620 	intel_runtime_pm_get(dev_priv);
4621 
4622 	mutex_lock(&dev_priv->drm.struct_mutex);
4623 	i915_gem_shrink_all(dev_priv);
4624 	mutex_unlock(&dev_priv->drm.struct_mutex);
4625 
4626 	intel_runtime_pm_put(dev_priv);
4627 
4628 	return 0;
4629 }
4630 
4631 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4632 {
4633 	struct drm_i915_gem_object *obj;
4634 	struct list_head *phases[] = {
4635 		&dev_priv->mm.unbound_list,
4636 		&dev_priv->mm.bound_list,
4637 		NULL
4638 	}, **p;
4639 
4640 	/* Called just before we write the hibernation image.
4641 	 *
4642 	 * We need to update the domain tracking to reflect that the CPU
4643 	 * will be accessing all the pages to create and restore from the
4644 	 * hibernation, and so upon restoration those pages will be in the
4645 	 * CPU domain.
4646 	 *
4647 	 * To make sure the hibernation image contains the latest state,
4648 	 * we update that state just before writing out the image.
4649 	 *
4650 	 * To try and reduce the hibernation image, we manually shrink
4651 	 * the objects as well.
4652 	 */
4653 
4654 	mutex_lock(&dev_priv->drm.struct_mutex);
4655 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4656 
4657 	for (p = phases; *p; p++) {
4658 		list_for_each_entry(obj, *p, global_link) {
4659 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4660 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4661 		}
4662 	}
4663 	mutex_unlock(&dev_priv->drm.struct_mutex);
4664 
4665 	return 0;
4666 }
4667 
4668 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4669 {
4670 	struct drm_i915_file_private *file_priv = file->driver_priv;
4671 	struct drm_i915_gem_request *request;
4672 
4673 	/* Clean up our request list when the client is going away, so that
4674 	 * later retire_requests won't dereference our soon-to-be-gone
4675 	 * file_priv.
4676 	 */
4677 	spin_lock(&file_priv->mm.lock);
4678 	list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4679 		request->file_priv = NULL;
4680 	spin_unlock(&file_priv->mm.lock);
4681 
4682 	if (!list_empty(&file_priv->rps.link)) {
4683 		spin_lock(&to_i915(dev)->rps.client_lock);
4684 		list_del(&file_priv->rps.link);
4685 		spin_unlock(&to_i915(dev)->rps.client_lock);
4686 	}
4687 }
4688 
4689 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4690 {
4691 	struct drm_i915_file_private *file_priv;
4692 	int ret;
4693 
4694 	DRM_DEBUG("\n");
4695 
4696 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4697 	if (!file_priv)
4698 		return -ENOMEM;
4699 
4700 	file->driver_priv = file_priv;
4701 	file_priv->dev_priv = to_i915(dev);
4702 	file_priv->file = file;
4703 	INIT_LIST_HEAD(&file_priv->rps.link);
4704 
4705 	spin_lock_init(&file_priv->mm.lock);
4706 	INIT_LIST_HEAD(&file_priv->mm.request_list);
4707 
4708 	file_priv->bsd_engine = -1;
4709 
4710 	ret = i915_gem_context_open(dev, file);
4711 	if (ret)
4712 		kfree(file_priv);
4713 
4714 	return ret;
4715 }
4716 
4717 /**
4718  * i915_gem_track_fb - update frontbuffer tracking
4719  * @old: current GEM buffer for the frontbuffer slots
4720  * @new: new GEM buffer for the frontbuffer slots
4721  * @frontbuffer_bits: bitmask of frontbuffer slots
4722  *
4723  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4724  * from @old and setting them in @new. Both @old and @new can be NULL.
4725  */
4726 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4727 		       struct drm_i915_gem_object *new,
4728 		       unsigned frontbuffer_bits)
4729 {
4730 	/* Control of individual bits within the mask are guarded by
4731 	 * the owning plane->mutex, i.e. we can never see concurrent
4732 	 * manipulation of individual bits. But since the bitfield as a whole
4733 	 * is updated using RMW, we need to use atomics in order to update
4734 	 * the bits.
4735 	 */
4736 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4737 		     sizeof(atomic_t) * BITS_PER_BYTE);
4738 
4739 	if (old) {
4740 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4741 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4742 	}
4743 
4744 	if (new) {
4745 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4746 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4747 	}
4748 }
4749 
4750 /* Allocate a new GEM object and fill it with the supplied data */
4751 struct drm_i915_gem_object *
4752 i915_gem_object_create_from_data(struct drm_device *dev,
4753 			         const void *data, size_t size)
4754 {
4755 	struct drm_i915_gem_object *obj;
4756 	struct sg_table *sg;
4757 	size_t bytes;
4758 	int ret;
4759 
4760 	obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
4761 	if (IS_ERR(obj))
4762 		return obj;
4763 
4764 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4765 	if (ret)
4766 		goto fail;
4767 
4768 	ret = i915_gem_object_pin_pages(obj);
4769 	if (ret)
4770 		goto fail;
4771 
4772 	sg = obj->mm.pages;
4773 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4774 	obj->mm.dirty = true; /* Backing store is now out of date */
4775 	i915_gem_object_unpin_pages(obj);
4776 
4777 	if (WARN_ON(bytes != size)) {
4778 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4779 		ret = -EFAULT;
4780 		goto fail;
4781 	}
4782 
4783 	return obj;
4784 
4785 fail:
4786 	i915_gem_object_put(obj);
4787 	return ERR_PTR(ret);
4788 }
4789 
4790 struct scatterlist *
4791 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4792 		       unsigned int n,
4793 		       unsigned int *offset)
4794 {
4795 	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4796 	struct scatterlist *sg;
4797 	unsigned int idx, count;
4798 
4799 	might_sleep();
4800 	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4801 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4802 
4803 	/* As we iterate forward through the sg, we record each entry in a
4804 	 * radixtree for quick repeated (backwards) lookups. If we have seen
4805 	 * this index previously, we will have an entry for it.
4806 	 *
4807 	 * Initial lookup is O(N), but this is amortized to O(1) for
4808 	 * sequential page access (where each new request is consecutive
4809 	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4810 	 * i.e. O(1) with a large constant!
4811 	 */
4812 	if (n < READ_ONCE(iter->sg_idx))
4813 		goto lookup;
4814 
4815 	mutex_lock(&iter->lock);
4816 
4817 	/* We prefer to reuse the last sg so that repeated lookup of this
4818 	 * (or the subsequent) sg are fast - comparing against the last
4819 	 * sg is faster than going through the radixtree.
4820 	 */
4821 
4822 	sg = iter->sg_pos;
4823 	idx = iter->sg_idx;
4824 	count = __sg_page_count(sg);
4825 
4826 	while (idx + count <= n) {
4827 		unsigned long exception, i;
4828 		int ret;
4829 
4830 		/* If we cannot allocate and insert this entry, or the
4831 		 * individual pages from this range, cancel updating the
4832 		 * sg_idx so that on this lookup we are forced to linearly
4833 		 * scan onwards, but on future lookups we will try the
4834 		 * insertion again (in which case we need to be careful of
4835 		 * the error return reporting that we have already inserted
4836 		 * this index).
4837 		 */
4838 		ret = radix_tree_insert(&iter->radix, idx, sg);
4839 		if (ret && ret != -EEXIST)
4840 			goto scan;
4841 
4842 		exception =
4843 			RADIX_TREE_EXCEPTIONAL_ENTRY |
4844 			idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
4845 		for (i = 1; i < count; i++) {
4846 			ret = radix_tree_insert(&iter->radix, idx + i,
4847 						(void *)exception);
4848 			if (ret && ret != -EEXIST)
4849 				goto scan;
4850 		}
4851 
4852 		idx += count;
4853 		sg = ____sg_next(sg);
4854 		count = __sg_page_count(sg);
4855 	}
4856 
4857 scan:
4858 	iter->sg_pos = sg;
4859 	iter->sg_idx = idx;
4860 
4861 	mutex_unlock(&iter->lock);
4862 
4863 	if (unlikely(n < idx)) /* insertion completed by another thread */
4864 		goto lookup;
4865 
4866 	/* In case we failed to insert the entry into the radixtree, we need
4867 	 * to look beyond the current sg.
4868 	 */
4869 	while (idx + count <= n) {
4870 		idx += count;
4871 		sg = ____sg_next(sg);
4872 		count = __sg_page_count(sg);
4873 	}
4874 
4875 	*offset = n - idx;
4876 	return sg;
4877 
4878 lookup:
4879 	rcu_read_lock();
4880 
4881 	sg = radix_tree_lookup(&iter->radix, n);
4882 	GEM_BUG_ON(!sg);
4883 
4884 	/* If this index is in the middle of multi-page sg entry,
4885 	 * the radixtree will contain an exceptional entry that points
4886 	 * to the start of that range. We will return the pointer to
4887 	 * the base page and the offset of this page within the
4888 	 * sg entry's range.
4889 	 */
4890 	*offset = 0;
4891 	if (unlikely(radix_tree_exception(sg))) {
4892 		unsigned long base =
4893 			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
4894 
4895 		sg = radix_tree_lookup(&iter->radix, base);
4896 		GEM_BUG_ON(!sg);
4897 
4898 		*offset = n - base;
4899 	}
4900 
4901 	rcu_read_unlock();
4902 
4903 	return sg;
4904 }
4905 
4906 struct page *
4907 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
4908 {
4909 	struct scatterlist *sg;
4910 	unsigned int offset;
4911 
4912 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
4913 
4914 	sg = i915_gem_object_get_sg(obj, n, &offset);
4915 	return nth_page(sg_page(sg), offset);
4916 }
4917 
4918 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
4919 struct page *
4920 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
4921 			       unsigned int n)
4922 {
4923 	struct page *page;
4924 
4925 	page = i915_gem_object_get_page(obj, n);
4926 	if (!obj->mm.dirty)
4927 		set_page_dirty(page);
4928 
4929 	return page;
4930 }
4931 
4932 dma_addr_t
4933 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
4934 				unsigned long n)
4935 {
4936 	struct scatterlist *sg;
4937 	unsigned int offset;
4938 
4939 	sg = i915_gem_object_get_sg(obj, n, &offset);
4940 	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
4941 }
4942