xref: /openbmc/linux/drivers/gpu/drm/i915/i915_gem.c (revision e52a6321)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drm_vma_manager.h>
29 #include <drm/i915_drm.h>
30 #include <linux/dma-fence-array.h>
31 #include <linux/kthread.h>
32 #include <linux/reservation.h>
33 #include <linux/shmem_fs.h>
34 #include <linux/slab.h>
35 #include <linux/stop_machine.h>
36 #include <linux/swap.h>
37 #include <linux/pci.h>
38 #include <linux/dma-buf.h>
39 #include <linux/mman.h>
40 
41 #include "display/intel_display.h"
42 #include "display/intel_frontbuffer.h"
43 
44 #include "gem/i915_gem_clflush.h"
45 #include "gem/i915_gem_context.h"
46 #include "gem/i915_gem_ioctls.h"
47 #include "gem/i915_gem_pm.h"
48 #include "gem/i915_gemfs.h"
49 #include "gt/intel_engine_pm.h"
50 #include "gt/intel_gt_pm.h"
51 #include "gt/intel_mocs.h"
52 #include "gt/intel_reset.h"
53 #include "gt/intel_workarounds.h"
54 
55 #include "i915_drv.h"
56 #include "i915_scatterlist.h"
57 #include "i915_trace.h"
58 #include "i915_vgpu.h"
59 
60 #include "intel_drv.h"
61 #include "intel_pm.h"
62 
63 static int
64 insert_mappable_node(struct i915_ggtt *ggtt,
65                      struct drm_mm_node *node, u32 size)
66 {
67 	memset(node, 0, sizeof(*node));
68 	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
69 					   size, 0, I915_COLOR_UNEVICTABLE,
70 					   0, ggtt->mappable_end,
71 					   DRM_MM_INSERT_LOW);
72 }
73 
74 static void
75 remove_mappable_node(struct drm_mm_node *node)
76 {
77 	drm_mm_remove_node(node);
78 }
79 
80 int
81 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
82 			    struct drm_file *file)
83 {
84 	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
85 	struct drm_i915_gem_get_aperture *args = data;
86 	struct i915_vma *vma;
87 	u64 pinned;
88 
89 	mutex_lock(&ggtt->vm.mutex);
90 
91 	pinned = ggtt->vm.reserved;
92 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
93 		if (i915_vma_is_pinned(vma))
94 			pinned += vma->node.size;
95 
96 	mutex_unlock(&ggtt->vm.mutex);
97 
98 	args->aper_size = ggtt->vm.total;
99 	args->aper_available_size = args->aper_size - pinned;
100 
101 	return 0;
102 }
103 
104 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
105 {
106 	struct i915_vma *vma;
107 	LIST_HEAD(still_in_list);
108 	int ret = 0;
109 
110 	lockdep_assert_held(&obj->base.dev->struct_mutex);
111 
112 	spin_lock(&obj->vma.lock);
113 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
114 						       struct i915_vma,
115 						       obj_link))) {
116 		list_move_tail(&vma->obj_link, &still_in_list);
117 		spin_unlock(&obj->vma.lock);
118 
119 		ret = i915_vma_unbind(vma);
120 
121 		spin_lock(&obj->vma.lock);
122 	}
123 	list_splice(&still_in_list, &obj->vma.list);
124 	spin_unlock(&obj->vma.lock);
125 
126 	return ret;
127 }
128 
129 static int
130 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
131 		     struct drm_i915_gem_pwrite *args,
132 		     struct drm_file *file)
133 {
134 	void *vaddr = obj->phys_handle->vaddr + args->offset;
135 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
136 
137 	/* We manually control the domain here and pretend that it
138 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
139 	 */
140 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
141 	if (copy_from_user(vaddr, user_data, args->size))
142 		return -EFAULT;
143 
144 	drm_clflush_virt_range(vaddr, args->size);
145 	i915_gem_chipset_flush(to_i915(obj->base.dev));
146 
147 	intel_fb_obj_flush(obj, ORIGIN_CPU);
148 	return 0;
149 }
150 
151 static int
152 i915_gem_create(struct drm_file *file,
153 		struct drm_i915_private *dev_priv,
154 		u64 *size_p,
155 		u32 *handle_p)
156 {
157 	struct drm_i915_gem_object *obj;
158 	u32 handle;
159 	u64 size;
160 	int ret;
161 
162 	size = round_up(*size_p, PAGE_SIZE);
163 	if (size == 0)
164 		return -EINVAL;
165 
166 	/* Allocate the new object */
167 	obj = i915_gem_object_create_shmem(dev_priv, size);
168 	if (IS_ERR(obj))
169 		return PTR_ERR(obj);
170 
171 	ret = drm_gem_handle_create(file, &obj->base, &handle);
172 	/* drop reference from allocate - handle holds it now */
173 	i915_gem_object_put(obj);
174 	if (ret)
175 		return ret;
176 
177 	*handle_p = handle;
178 	*size_p = size;
179 	return 0;
180 }
181 
182 int
183 i915_gem_dumb_create(struct drm_file *file,
184 		     struct drm_device *dev,
185 		     struct drm_mode_create_dumb *args)
186 {
187 	int cpp = DIV_ROUND_UP(args->bpp, 8);
188 	u32 format;
189 
190 	switch (cpp) {
191 	case 1:
192 		format = DRM_FORMAT_C8;
193 		break;
194 	case 2:
195 		format = DRM_FORMAT_RGB565;
196 		break;
197 	case 4:
198 		format = DRM_FORMAT_XRGB8888;
199 		break;
200 	default:
201 		return -EINVAL;
202 	}
203 
204 	/* have to work out size/pitch and return them */
205 	args->pitch = ALIGN(args->width * cpp, 64);
206 
207 	/* align stride to page size so that we can remap */
208 	if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
209 						    DRM_FORMAT_MOD_LINEAR))
210 		args->pitch = ALIGN(args->pitch, 4096);
211 
212 	args->size = args->pitch * args->height;
213 	return i915_gem_create(file, to_i915(dev),
214 			       &args->size, &args->handle);
215 }
216 
217 /**
218  * Creates a new mm object and returns a handle to it.
219  * @dev: drm device pointer
220  * @data: ioctl data blob
221  * @file: drm file pointer
222  */
223 int
224 i915_gem_create_ioctl(struct drm_device *dev, void *data,
225 		      struct drm_file *file)
226 {
227 	struct drm_i915_private *dev_priv = to_i915(dev);
228 	struct drm_i915_gem_create *args = data;
229 
230 	i915_gem_flush_free_objects(dev_priv);
231 
232 	return i915_gem_create(file, dev_priv,
233 			       &args->size, &args->handle);
234 }
235 
236 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
237 {
238 	intel_wakeref_t wakeref;
239 
240 	/*
241 	 * No actual flushing is required for the GTT write domain for reads
242 	 * from the GTT domain. Writes to it "immediately" go to main memory
243 	 * as far as we know, so there's no chipset flush. It also doesn't
244 	 * land in the GPU render cache.
245 	 *
246 	 * However, we do have to enforce the order so that all writes through
247 	 * the GTT land before any writes to the device, such as updates to
248 	 * the GATT itself.
249 	 *
250 	 * We also have to wait a bit for the writes to land from the GTT.
251 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
252 	 * timing. This issue has only been observed when switching quickly
253 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
254 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
255 	 * system agents we cannot reproduce this behaviour, until Cannonlake
256 	 * that was!).
257 	 */
258 
259 	wmb();
260 
261 	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
262 		return;
263 
264 	i915_gem_chipset_flush(dev_priv);
265 
266 	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
267 		struct intel_uncore *uncore = &dev_priv->uncore;
268 
269 		spin_lock_irq(&uncore->lock);
270 		intel_uncore_posting_read_fw(uncore,
271 					     RING_HEAD(RENDER_RING_BASE));
272 		spin_unlock_irq(&uncore->lock);
273 	}
274 }
275 
276 static int
277 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
278 	    bool needs_clflush)
279 {
280 	char *vaddr;
281 	int ret;
282 
283 	vaddr = kmap(page);
284 
285 	if (needs_clflush)
286 		drm_clflush_virt_range(vaddr + offset, len);
287 
288 	ret = __copy_to_user(user_data, vaddr + offset, len);
289 
290 	kunmap(page);
291 
292 	return ret ? -EFAULT : 0;
293 }
294 
295 static int
296 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
297 		     struct drm_i915_gem_pread *args)
298 {
299 	unsigned int needs_clflush;
300 	unsigned int idx, offset;
301 	struct dma_fence *fence;
302 	char __user *user_data;
303 	u64 remain;
304 	int ret;
305 
306 	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
307 	if (ret)
308 		return ret;
309 
310 	fence = i915_gem_object_lock_fence(obj);
311 	i915_gem_object_finish_access(obj);
312 	if (!fence)
313 		return -ENOMEM;
314 
315 	remain = args->size;
316 	user_data = u64_to_user_ptr(args->data_ptr);
317 	offset = offset_in_page(args->offset);
318 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
319 		struct page *page = i915_gem_object_get_page(obj, idx);
320 		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
321 
322 		ret = shmem_pread(page, offset, length, user_data,
323 				  needs_clflush);
324 		if (ret)
325 			break;
326 
327 		remain -= length;
328 		user_data += length;
329 		offset = 0;
330 	}
331 
332 	i915_gem_object_unlock_fence(obj, fence);
333 	return ret;
334 }
335 
336 static inline bool
337 gtt_user_read(struct io_mapping *mapping,
338 	      loff_t base, int offset,
339 	      char __user *user_data, int length)
340 {
341 	void __iomem *vaddr;
342 	unsigned long unwritten;
343 
344 	/* We can use the cpu mem copy function because this is X86. */
345 	vaddr = io_mapping_map_atomic_wc(mapping, base);
346 	unwritten = __copy_to_user_inatomic(user_data,
347 					    (void __force *)vaddr + offset,
348 					    length);
349 	io_mapping_unmap_atomic(vaddr);
350 	if (unwritten) {
351 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
352 		unwritten = copy_to_user(user_data,
353 					 (void __force *)vaddr + offset,
354 					 length);
355 		io_mapping_unmap(vaddr);
356 	}
357 	return unwritten;
358 }
359 
360 static int
361 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
362 		   const struct drm_i915_gem_pread *args)
363 {
364 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
365 	struct i915_ggtt *ggtt = &i915->ggtt;
366 	intel_wakeref_t wakeref;
367 	struct drm_mm_node node;
368 	struct dma_fence *fence;
369 	void __user *user_data;
370 	struct i915_vma *vma;
371 	u64 remain, offset;
372 	int ret;
373 
374 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
375 	if (ret)
376 		return ret;
377 
378 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
379 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
380 				       PIN_MAPPABLE |
381 				       PIN_NONFAULT |
382 				       PIN_NONBLOCK);
383 	if (!IS_ERR(vma)) {
384 		node.start = i915_ggtt_offset(vma);
385 		node.allocated = false;
386 		ret = i915_vma_put_fence(vma);
387 		if (ret) {
388 			i915_vma_unpin(vma);
389 			vma = ERR_PTR(ret);
390 		}
391 	}
392 	if (IS_ERR(vma)) {
393 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
394 		if (ret)
395 			goto out_unlock;
396 		GEM_BUG_ON(!node.allocated);
397 	}
398 
399 	mutex_unlock(&i915->drm.struct_mutex);
400 
401 	ret = i915_gem_object_lock_interruptible(obj);
402 	if (ret)
403 		goto out_unpin;
404 
405 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
406 	if (ret) {
407 		i915_gem_object_unlock(obj);
408 		goto out_unpin;
409 	}
410 
411 	fence = i915_gem_object_lock_fence(obj);
412 	i915_gem_object_unlock(obj);
413 	if (!fence) {
414 		ret = -ENOMEM;
415 		goto out_unpin;
416 	}
417 
418 	user_data = u64_to_user_ptr(args->data_ptr);
419 	remain = args->size;
420 	offset = args->offset;
421 
422 	while (remain > 0) {
423 		/* Operation in this page
424 		 *
425 		 * page_base = page offset within aperture
426 		 * page_offset = offset within page
427 		 * page_length = bytes to copy for this page
428 		 */
429 		u32 page_base = node.start;
430 		unsigned page_offset = offset_in_page(offset);
431 		unsigned page_length = PAGE_SIZE - page_offset;
432 		page_length = remain < page_length ? remain : page_length;
433 		if (node.allocated) {
434 			wmb();
435 			ggtt->vm.insert_page(&ggtt->vm,
436 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
437 					     node.start, I915_CACHE_NONE, 0);
438 			wmb();
439 		} else {
440 			page_base += offset & PAGE_MASK;
441 		}
442 
443 		if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
444 				  user_data, page_length)) {
445 			ret = -EFAULT;
446 			break;
447 		}
448 
449 		remain -= page_length;
450 		user_data += page_length;
451 		offset += page_length;
452 	}
453 
454 	i915_gem_object_unlock_fence(obj, fence);
455 out_unpin:
456 	mutex_lock(&i915->drm.struct_mutex);
457 	if (node.allocated) {
458 		wmb();
459 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
460 		remove_mappable_node(&node);
461 	} else {
462 		i915_vma_unpin(vma);
463 	}
464 out_unlock:
465 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
466 	mutex_unlock(&i915->drm.struct_mutex);
467 
468 	return ret;
469 }
470 
471 /**
472  * Reads data from the object referenced by handle.
473  * @dev: drm device pointer
474  * @data: ioctl data blob
475  * @file: drm file pointer
476  *
477  * On error, the contents of *data are undefined.
478  */
479 int
480 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
481 		     struct drm_file *file)
482 {
483 	struct drm_i915_gem_pread *args = data;
484 	struct drm_i915_gem_object *obj;
485 	int ret;
486 
487 	if (args->size == 0)
488 		return 0;
489 
490 	if (!access_ok(u64_to_user_ptr(args->data_ptr),
491 		       args->size))
492 		return -EFAULT;
493 
494 	obj = i915_gem_object_lookup(file, args->handle);
495 	if (!obj)
496 		return -ENOENT;
497 
498 	/* Bounds check source.  */
499 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
500 		ret = -EINVAL;
501 		goto out;
502 	}
503 
504 	trace_i915_gem_object_pread(obj, args->offset, args->size);
505 
506 	ret = i915_gem_object_wait(obj,
507 				   I915_WAIT_INTERRUPTIBLE,
508 				   MAX_SCHEDULE_TIMEOUT);
509 	if (ret)
510 		goto out;
511 
512 	ret = i915_gem_object_pin_pages(obj);
513 	if (ret)
514 		goto out;
515 
516 	ret = i915_gem_shmem_pread(obj, args);
517 	if (ret == -EFAULT || ret == -ENODEV)
518 		ret = i915_gem_gtt_pread(obj, args);
519 
520 	i915_gem_object_unpin_pages(obj);
521 out:
522 	i915_gem_object_put(obj);
523 	return ret;
524 }
525 
526 /* This is the fast write path which cannot handle
527  * page faults in the source data
528  */
529 
530 static inline bool
531 ggtt_write(struct io_mapping *mapping,
532 	   loff_t base, int offset,
533 	   char __user *user_data, int length)
534 {
535 	void __iomem *vaddr;
536 	unsigned long unwritten;
537 
538 	/* We can use the cpu mem copy function because this is X86. */
539 	vaddr = io_mapping_map_atomic_wc(mapping, base);
540 	unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
541 						      user_data, length);
542 	io_mapping_unmap_atomic(vaddr);
543 	if (unwritten) {
544 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
545 		unwritten = copy_from_user((void __force *)vaddr + offset,
546 					   user_data, length);
547 		io_mapping_unmap(vaddr);
548 	}
549 
550 	return unwritten;
551 }
552 
553 /**
554  * This is the fast pwrite path, where we copy the data directly from the
555  * user into the GTT, uncached.
556  * @obj: i915 GEM object
557  * @args: pwrite arguments structure
558  */
559 static int
560 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
561 			 const struct drm_i915_gem_pwrite *args)
562 {
563 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
564 	struct i915_ggtt *ggtt = &i915->ggtt;
565 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
566 	intel_wakeref_t wakeref;
567 	struct drm_mm_node node;
568 	struct dma_fence *fence;
569 	struct i915_vma *vma;
570 	u64 remain, offset;
571 	void __user *user_data;
572 	int ret;
573 
574 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
575 	if (ret)
576 		return ret;
577 
578 	if (i915_gem_object_has_struct_page(obj)) {
579 		/*
580 		 * Avoid waking the device up if we can fallback, as
581 		 * waking/resuming is very slow (worst-case 10-100 ms
582 		 * depending on PCI sleeps and our own resume time).
583 		 * This easily dwarfs any performance advantage from
584 		 * using the cache bypass of indirect GGTT access.
585 		 */
586 		wakeref = intel_runtime_pm_get_if_in_use(rpm);
587 		if (!wakeref) {
588 			ret = -EFAULT;
589 			goto out_unlock;
590 		}
591 	} else {
592 		/* No backing pages, no fallback, we must force GGTT access */
593 		wakeref = intel_runtime_pm_get(rpm);
594 	}
595 
596 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
597 				       PIN_MAPPABLE |
598 				       PIN_NONFAULT |
599 				       PIN_NONBLOCK);
600 	if (!IS_ERR(vma)) {
601 		node.start = i915_ggtt_offset(vma);
602 		node.allocated = false;
603 		ret = i915_vma_put_fence(vma);
604 		if (ret) {
605 			i915_vma_unpin(vma);
606 			vma = ERR_PTR(ret);
607 		}
608 	}
609 	if (IS_ERR(vma)) {
610 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
611 		if (ret)
612 			goto out_rpm;
613 		GEM_BUG_ON(!node.allocated);
614 	}
615 
616 	mutex_unlock(&i915->drm.struct_mutex);
617 
618 	ret = i915_gem_object_lock_interruptible(obj);
619 	if (ret)
620 		goto out_unpin;
621 
622 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
623 	if (ret) {
624 		i915_gem_object_unlock(obj);
625 		goto out_unpin;
626 	}
627 
628 	fence = i915_gem_object_lock_fence(obj);
629 	i915_gem_object_unlock(obj);
630 	if (!fence) {
631 		ret = -ENOMEM;
632 		goto out_unpin;
633 	}
634 
635 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
636 
637 	user_data = u64_to_user_ptr(args->data_ptr);
638 	offset = args->offset;
639 	remain = args->size;
640 	while (remain) {
641 		/* Operation in this page
642 		 *
643 		 * page_base = page offset within aperture
644 		 * page_offset = offset within page
645 		 * page_length = bytes to copy for this page
646 		 */
647 		u32 page_base = node.start;
648 		unsigned int page_offset = offset_in_page(offset);
649 		unsigned int page_length = PAGE_SIZE - page_offset;
650 		page_length = remain < page_length ? remain : page_length;
651 		if (node.allocated) {
652 			wmb(); /* flush the write before we modify the GGTT */
653 			ggtt->vm.insert_page(&ggtt->vm,
654 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
655 					     node.start, I915_CACHE_NONE, 0);
656 			wmb(); /* flush modifications to the GGTT (insert_page) */
657 		} else {
658 			page_base += offset & PAGE_MASK;
659 		}
660 		/* If we get a fault while copying data, then (presumably) our
661 		 * source page isn't available.  Return the error and we'll
662 		 * retry in the slow path.
663 		 * If the object is non-shmem backed, we retry again with the
664 		 * path that handles page fault.
665 		 */
666 		if (ggtt_write(&ggtt->iomap, page_base, page_offset,
667 			       user_data, page_length)) {
668 			ret = -EFAULT;
669 			break;
670 		}
671 
672 		remain -= page_length;
673 		user_data += page_length;
674 		offset += page_length;
675 	}
676 	intel_fb_obj_flush(obj, ORIGIN_CPU);
677 
678 	i915_gem_object_unlock_fence(obj, fence);
679 out_unpin:
680 	mutex_lock(&i915->drm.struct_mutex);
681 	if (node.allocated) {
682 		wmb();
683 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
684 		remove_mappable_node(&node);
685 	} else {
686 		i915_vma_unpin(vma);
687 	}
688 out_rpm:
689 	intel_runtime_pm_put(rpm, wakeref);
690 out_unlock:
691 	mutex_unlock(&i915->drm.struct_mutex);
692 	return ret;
693 }
694 
695 /* Per-page copy function for the shmem pwrite fastpath.
696  * Flushes invalid cachelines before writing to the target if
697  * needs_clflush_before is set and flushes out any written cachelines after
698  * writing if needs_clflush is set.
699  */
700 static int
701 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
702 	     bool needs_clflush_before,
703 	     bool needs_clflush_after)
704 {
705 	char *vaddr;
706 	int ret;
707 
708 	vaddr = kmap(page);
709 
710 	if (needs_clflush_before)
711 		drm_clflush_virt_range(vaddr + offset, len);
712 
713 	ret = __copy_from_user(vaddr + offset, user_data, len);
714 	if (!ret && needs_clflush_after)
715 		drm_clflush_virt_range(vaddr + offset, len);
716 
717 	kunmap(page);
718 
719 	return ret ? -EFAULT : 0;
720 }
721 
722 static int
723 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
724 		      const struct drm_i915_gem_pwrite *args)
725 {
726 	unsigned int partial_cacheline_write;
727 	unsigned int needs_clflush;
728 	unsigned int offset, idx;
729 	struct dma_fence *fence;
730 	void __user *user_data;
731 	u64 remain;
732 	int ret;
733 
734 	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
735 	if (ret)
736 		return ret;
737 
738 	fence = i915_gem_object_lock_fence(obj);
739 	i915_gem_object_finish_access(obj);
740 	if (!fence)
741 		return -ENOMEM;
742 
743 	/* If we don't overwrite a cacheline completely we need to be
744 	 * careful to have up-to-date data by first clflushing. Don't
745 	 * overcomplicate things and flush the entire patch.
746 	 */
747 	partial_cacheline_write = 0;
748 	if (needs_clflush & CLFLUSH_BEFORE)
749 		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
750 
751 	user_data = u64_to_user_ptr(args->data_ptr);
752 	remain = args->size;
753 	offset = offset_in_page(args->offset);
754 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
755 		struct page *page = i915_gem_object_get_page(obj, idx);
756 		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
757 
758 		ret = shmem_pwrite(page, offset, length, user_data,
759 				   (offset | length) & partial_cacheline_write,
760 				   needs_clflush & CLFLUSH_AFTER);
761 		if (ret)
762 			break;
763 
764 		remain -= length;
765 		user_data += length;
766 		offset = 0;
767 	}
768 
769 	intel_fb_obj_flush(obj, ORIGIN_CPU);
770 	i915_gem_object_unlock_fence(obj, fence);
771 
772 	return ret;
773 }
774 
775 /**
776  * Writes data to the object referenced by handle.
777  * @dev: drm device
778  * @data: ioctl data blob
779  * @file: drm file
780  *
781  * On error, the contents of the buffer that were to be modified are undefined.
782  */
783 int
784 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
785 		      struct drm_file *file)
786 {
787 	struct drm_i915_gem_pwrite *args = data;
788 	struct drm_i915_gem_object *obj;
789 	int ret;
790 
791 	if (args->size == 0)
792 		return 0;
793 
794 	if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
795 		return -EFAULT;
796 
797 	obj = i915_gem_object_lookup(file, args->handle);
798 	if (!obj)
799 		return -ENOENT;
800 
801 	/* Bounds check destination. */
802 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
803 		ret = -EINVAL;
804 		goto err;
805 	}
806 
807 	/* Writes not allowed into this read-only object */
808 	if (i915_gem_object_is_readonly(obj)) {
809 		ret = -EINVAL;
810 		goto err;
811 	}
812 
813 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
814 
815 	ret = -ENODEV;
816 	if (obj->ops->pwrite)
817 		ret = obj->ops->pwrite(obj, args);
818 	if (ret != -ENODEV)
819 		goto err;
820 
821 	ret = i915_gem_object_wait(obj,
822 				   I915_WAIT_INTERRUPTIBLE |
823 				   I915_WAIT_ALL,
824 				   MAX_SCHEDULE_TIMEOUT);
825 	if (ret)
826 		goto err;
827 
828 	ret = i915_gem_object_pin_pages(obj);
829 	if (ret)
830 		goto err;
831 
832 	ret = -EFAULT;
833 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
834 	 * it would end up going through the fenced access, and we'll get
835 	 * different detiling behavior between reading and writing.
836 	 * pread/pwrite currently are reading and writing from the CPU
837 	 * perspective, requiring manual detiling by the client.
838 	 */
839 	if (!i915_gem_object_has_struct_page(obj) ||
840 	    cpu_write_needs_clflush(obj))
841 		/* Note that the gtt paths might fail with non-page-backed user
842 		 * pointers (e.g. gtt mappings when moving data between
843 		 * textures). Fallback to the shmem path in that case.
844 		 */
845 		ret = i915_gem_gtt_pwrite_fast(obj, args);
846 
847 	if (ret == -EFAULT || ret == -ENOSPC) {
848 		if (obj->phys_handle)
849 			ret = i915_gem_phys_pwrite(obj, args, file);
850 		else
851 			ret = i915_gem_shmem_pwrite(obj, args);
852 	}
853 
854 	i915_gem_object_unpin_pages(obj);
855 err:
856 	i915_gem_object_put(obj);
857 	return ret;
858 }
859 
860 /**
861  * Called when user space has done writes to this buffer
862  * @dev: drm device
863  * @data: ioctl data blob
864  * @file: drm file
865  */
866 int
867 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
868 			 struct drm_file *file)
869 {
870 	struct drm_i915_gem_sw_finish *args = data;
871 	struct drm_i915_gem_object *obj;
872 
873 	obj = i915_gem_object_lookup(file, args->handle);
874 	if (!obj)
875 		return -ENOENT;
876 
877 	/*
878 	 * Proxy objects are barred from CPU access, so there is no
879 	 * need to ban sw_finish as it is a nop.
880 	 */
881 
882 	/* Pinned buffers may be scanout, so flush the cache */
883 	i915_gem_object_flush_if_display(obj);
884 	i915_gem_object_put(obj);
885 
886 	return 0;
887 }
888 
889 void i915_gem_runtime_suspend(struct drm_i915_private *i915)
890 {
891 	struct drm_i915_gem_object *obj, *on;
892 	int i;
893 
894 	/*
895 	 * Only called during RPM suspend. All users of the userfault_list
896 	 * must be holding an RPM wakeref to ensure that this can not
897 	 * run concurrently with themselves (and use the struct_mutex for
898 	 * protection between themselves).
899 	 */
900 
901 	list_for_each_entry_safe(obj, on,
902 				 &i915->ggtt.userfault_list, userfault_link)
903 		__i915_gem_object_release_mmap(obj);
904 
905 	/*
906 	 * The fence will be lost when the device powers down. If any were
907 	 * in use by hardware (i.e. they are pinned), we should not be powering
908 	 * down! All other fences will be reacquired by the user upon waking.
909 	 */
910 	for (i = 0; i < i915->ggtt.num_fences; i++) {
911 		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
912 
913 		/*
914 		 * Ideally we want to assert that the fence register is not
915 		 * live at this point (i.e. that no piece of code will be
916 		 * trying to write through fence + GTT, as that both violates
917 		 * our tracking of activity and associated locking/barriers,
918 		 * but also is illegal given that the hw is powered down).
919 		 *
920 		 * Previously we used reg->pin_count as a "liveness" indicator.
921 		 * That is not sufficient, and we need a more fine-grained
922 		 * tool if we want to have a sanity check here.
923 		 */
924 
925 		if (!reg->vma)
926 			continue;
927 
928 		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
929 		reg->dirty = true;
930 	}
931 }
932 
933 static int wait_for_engines(struct drm_i915_private *i915)
934 {
935 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
936 		dev_err(i915->drm.dev,
937 			"Failed to idle engines, declaring wedged!\n");
938 		GEM_TRACE_DUMP();
939 		i915_gem_set_wedged(i915);
940 		return -EIO;
941 	}
942 
943 	return 0;
944 }
945 
946 static long
947 wait_for_timelines(struct drm_i915_private *i915,
948 		   unsigned int flags, long timeout)
949 {
950 	struct i915_gt_timelines *gt = &i915->gt.timelines;
951 	struct i915_timeline *tl;
952 
953 	mutex_lock(&gt->mutex);
954 	list_for_each_entry(tl, &gt->active_list, link) {
955 		struct i915_request *rq;
956 
957 		rq = i915_active_request_get_unlocked(&tl->last_request);
958 		if (!rq)
959 			continue;
960 
961 		mutex_unlock(&gt->mutex);
962 
963 		/*
964 		 * "Race-to-idle".
965 		 *
966 		 * Switching to the kernel context is often used a synchronous
967 		 * step prior to idling, e.g. in suspend for flushing all
968 		 * current operations to memory before sleeping. These we
969 		 * want to complete as quickly as possible to avoid prolonged
970 		 * stalls, so allow the gpu to boost to maximum clocks.
971 		 */
972 		if (flags & I915_WAIT_FOR_IDLE_BOOST)
973 			gen6_rps_boost(rq);
974 
975 		timeout = i915_request_wait(rq, flags, timeout);
976 		i915_request_put(rq);
977 		if (timeout < 0)
978 			return timeout;
979 
980 		/* restart after reacquiring the lock */
981 		mutex_lock(&gt->mutex);
982 		tl = list_entry(&gt->active_list, typeof(*tl), link);
983 	}
984 	mutex_unlock(&gt->mutex);
985 
986 	return timeout;
987 }
988 
989 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
990 			   unsigned int flags, long timeout)
991 {
992 	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
993 		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
994 		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
995 		  yesno(i915->gt.awake));
996 
997 	/* If the device is asleep, we have no requests outstanding */
998 	if (!READ_ONCE(i915->gt.awake))
999 		return 0;
1000 
1001 	timeout = wait_for_timelines(i915, flags, timeout);
1002 	if (timeout < 0)
1003 		return timeout;
1004 
1005 	if (flags & I915_WAIT_LOCKED) {
1006 		int err;
1007 
1008 		lockdep_assert_held(&i915->drm.struct_mutex);
1009 
1010 		err = wait_for_engines(i915);
1011 		if (err)
1012 			return err;
1013 
1014 		i915_retire_requests(i915);
1015 	}
1016 
1017 	return 0;
1018 }
1019 
1020 struct i915_vma *
1021 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
1022 			 const struct i915_ggtt_view *view,
1023 			 u64 size,
1024 			 u64 alignment,
1025 			 u64 flags)
1026 {
1027 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1028 	struct i915_address_space *vm = &dev_priv->ggtt.vm;
1029 	struct i915_vma *vma;
1030 	int ret;
1031 
1032 	lockdep_assert_held(&obj->base.dev->struct_mutex);
1033 
1034 	if (flags & PIN_MAPPABLE &&
1035 	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
1036 		/* If the required space is larger than the available
1037 		 * aperture, we will not able to find a slot for the
1038 		 * object and unbinding the object now will be in
1039 		 * vain. Worse, doing so may cause us to ping-pong
1040 		 * the object in and out of the Global GTT and
1041 		 * waste a lot of cycles under the mutex.
1042 		 */
1043 		if (obj->base.size > dev_priv->ggtt.mappable_end)
1044 			return ERR_PTR(-E2BIG);
1045 
1046 		/* If NONBLOCK is set the caller is optimistically
1047 		 * trying to cache the full object within the mappable
1048 		 * aperture, and *must* have a fallback in place for
1049 		 * situations where we cannot bind the object. We
1050 		 * can be a little more lax here and use the fallback
1051 		 * more often to avoid costly migrations of ourselves
1052 		 * and other objects within the aperture.
1053 		 *
1054 		 * Half-the-aperture is used as a simple heuristic.
1055 		 * More interesting would to do search for a free
1056 		 * block prior to making the commitment to unbind.
1057 		 * That caters for the self-harm case, and with a
1058 		 * little more heuristics (e.g. NOFAULT, NOEVICT)
1059 		 * we could try to minimise harm to others.
1060 		 */
1061 		if (flags & PIN_NONBLOCK &&
1062 		    obj->base.size > dev_priv->ggtt.mappable_end / 2)
1063 			return ERR_PTR(-ENOSPC);
1064 	}
1065 
1066 	vma = i915_vma_instance(obj, vm, view);
1067 	if (IS_ERR(vma))
1068 		return vma;
1069 
1070 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
1071 		if (flags & PIN_NONBLOCK) {
1072 			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
1073 				return ERR_PTR(-ENOSPC);
1074 
1075 			if (flags & PIN_MAPPABLE &&
1076 			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
1077 				return ERR_PTR(-ENOSPC);
1078 		}
1079 
1080 		WARN(i915_vma_is_pinned(vma),
1081 		     "bo is already pinned in ggtt with incorrect alignment:"
1082 		     " offset=%08x, req.alignment=%llx,"
1083 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
1084 		     i915_ggtt_offset(vma), alignment,
1085 		     !!(flags & PIN_MAPPABLE),
1086 		     i915_vma_is_map_and_fenceable(vma));
1087 		ret = i915_vma_unbind(vma);
1088 		if (ret)
1089 			return ERR_PTR(ret);
1090 	}
1091 
1092 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
1093 	if (ret)
1094 		return ERR_PTR(ret);
1095 
1096 	return vma;
1097 }
1098 
1099 int
1100 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
1101 		       struct drm_file *file_priv)
1102 {
1103 	struct drm_i915_private *i915 = to_i915(dev);
1104 	struct drm_i915_gem_madvise *args = data;
1105 	struct drm_i915_gem_object *obj;
1106 	int err;
1107 
1108 	switch (args->madv) {
1109 	case I915_MADV_DONTNEED:
1110 	case I915_MADV_WILLNEED:
1111 	    break;
1112 	default:
1113 	    return -EINVAL;
1114 	}
1115 
1116 	obj = i915_gem_object_lookup(file_priv, args->handle);
1117 	if (!obj)
1118 		return -ENOENT;
1119 
1120 	err = mutex_lock_interruptible(&obj->mm.lock);
1121 	if (err)
1122 		goto out;
1123 
1124 	if (i915_gem_object_has_pages(obj) &&
1125 	    i915_gem_object_is_tiled(obj) &&
1126 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
1127 		if (obj->mm.madv == I915_MADV_WILLNEED) {
1128 			GEM_BUG_ON(!obj->mm.quirked);
1129 			__i915_gem_object_unpin_pages(obj);
1130 			obj->mm.quirked = false;
1131 		}
1132 		if (args->madv == I915_MADV_WILLNEED) {
1133 			GEM_BUG_ON(obj->mm.quirked);
1134 			__i915_gem_object_pin_pages(obj);
1135 			obj->mm.quirked = true;
1136 		}
1137 	}
1138 
1139 	if (obj->mm.madv != __I915_MADV_PURGED)
1140 		obj->mm.madv = args->madv;
1141 
1142 	if (i915_gem_object_has_pages(obj)) {
1143 		struct list_head *list;
1144 
1145 		if (i915_gem_object_is_shrinkable(obj)) {
1146 			unsigned long flags;
1147 
1148 			spin_lock_irqsave(&i915->mm.obj_lock, flags);
1149 
1150 			if (obj->mm.madv != I915_MADV_WILLNEED)
1151 				list = &i915->mm.purge_list;
1152 			else
1153 				list = &i915->mm.shrink_list;
1154 			list_move_tail(&obj->mm.link, list);
1155 
1156 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
1157 		}
1158 	}
1159 
1160 	/* if the object is no longer attached, discard its backing storage */
1161 	if (obj->mm.madv == I915_MADV_DONTNEED &&
1162 	    !i915_gem_object_has_pages(obj))
1163 		i915_gem_object_truncate(obj);
1164 
1165 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
1166 	mutex_unlock(&obj->mm.lock);
1167 
1168 out:
1169 	i915_gem_object_put(obj);
1170 	return err;
1171 }
1172 
1173 void i915_gem_sanitize(struct drm_i915_private *i915)
1174 {
1175 	intel_wakeref_t wakeref;
1176 
1177 	GEM_TRACE("\n");
1178 
1179 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1180 	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
1181 
1182 	/*
1183 	 * As we have just resumed the machine and woken the device up from
1184 	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
1185 	 * back to defaults, recovering from whatever wedged state we left it
1186 	 * in and so worth trying to use the device once more.
1187 	 */
1188 	if (i915_terminally_wedged(i915))
1189 		i915_gem_unset_wedged(i915);
1190 
1191 	/*
1192 	 * If we inherit context state from the BIOS or earlier occupants
1193 	 * of the GPU, the GPU may be in an inconsistent state when we
1194 	 * try to take over. The only way to remove the earlier state
1195 	 * is by resetting. However, resetting on earlier gen is tricky as
1196 	 * it may impact the display and we are uncertain about the stability
1197 	 * of the reset, so this could be applied to even earlier gen.
1198 	 */
1199 	intel_gt_sanitize(i915, false);
1200 
1201 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
1202 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1203 }
1204 
1205 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
1206 {
1207 	if (INTEL_GEN(dev_priv) < 5 ||
1208 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
1209 		return;
1210 
1211 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
1212 				 DISP_TILE_SURFACE_SWIZZLING);
1213 
1214 	if (IS_GEN(dev_priv, 5))
1215 		return;
1216 
1217 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
1218 	if (IS_GEN(dev_priv, 6))
1219 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
1220 	else if (IS_GEN(dev_priv, 7))
1221 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
1222 	else if (IS_GEN(dev_priv, 8))
1223 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
1224 	else
1225 		BUG();
1226 }
1227 
1228 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
1229 {
1230 	I915_WRITE(RING_CTL(base), 0);
1231 	I915_WRITE(RING_HEAD(base), 0);
1232 	I915_WRITE(RING_TAIL(base), 0);
1233 	I915_WRITE(RING_START(base), 0);
1234 }
1235 
1236 static void init_unused_rings(struct drm_i915_private *dev_priv)
1237 {
1238 	if (IS_I830(dev_priv)) {
1239 		init_unused_ring(dev_priv, PRB1_BASE);
1240 		init_unused_ring(dev_priv, SRB0_BASE);
1241 		init_unused_ring(dev_priv, SRB1_BASE);
1242 		init_unused_ring(dev_priv, SRB2_BASE);
1243 		init_unused_ring(dev_priv, SRB3_BASE);
1244 	} else if (IS_GEN(dev_priv, 2)) {
1245 		init_unused_ring(dev_priv, SRB0_BASE);
1246 		init_unused_ring(dev_priv, SRB1_BASE);
1247 	} else if (IS_GEN(dev_priv, 3)) {
1248 		init_unused_ring(dev_priv, PRB1_BASE);
1249 		init_unused_ring(dev_priv, PRB2_BASE);
1250 	}
1251 }
1252 
1253 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
1254 {
1255 	int ret;
1256 
1257 	dev_priv->gt.last_init_time = ktime_get();
1258 
1259 	/* Double layer security blanket, see i915_gem_init() */
1260 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
1261 
1262 	if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
1263 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
1264 
1265 	if (IS_HASWELL(dev_priv))
1266 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
1267 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
1268 
1269 	/* Apply the GT workarounds... */
1270 	intel_gt_apply_workarounds(dev_priv);
1271 	/* ...and determine whether they are sticking. */
1272 	intel_gt_verify_workarounds(dev_priv, "init");
1273 
1274 	i915_gem_init_swizzling(dev_priv);
1275 
1276 	/*
1277 	 * At least 830 can leave some of the unused rings
1278 	 * "active" (ie. head != tail) after resume which
1279 	 * will prevent c3 entry. Makes sure all unused rings
1280 	 * are totally idle.
1281 	 */
1282 	init_unused_rings(dev_priv);
1283 
1284 	BUG_ON(!dev_priv->kernel_context);
1285 	ret = i915_terminally_wedged(dev_priv);
1286 	if (ret)
1287 		goto out;
1288 
1289 	ret = i915_ppgtt_init_hw(dev_priv);
1290 	if (ret) {
1291 		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
1292 		goto out;
1293 	}
1294 
1295 	ret = intel_wopcm_init_hw(&dev_priv->wopcm);
1296 	if (ret) {
1297 		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
1298 		goto out;
1299 	}
1300 
1301 	/* We can't enable contexts until all firmware is loaded */
1302 	ret = intel_uc_init_hw(dev_priv);
1303 	if (ret) {
1304 		DRM_ERROR("Enabling uc failed (%d)\n", ret);
1305 		goto out;
1306 	}
1307 
1308 	intel_mocs_init_l3cc_table(dev_priv);
1309 
1310 	/* Only when the HW is re-initialised, can we replay the requests */
1311 	ret = intel_engines_resume(dev_priv);
1312 	if (ret)
1313 		goto cleanup_uc;
1314 
1315 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1316 
1317 	intel_engines_set_scheduler_caps(dev_priv);
1318 	return 0;
1319 
1320 cleanup_uc:
1321 	intel_uc_fini_hw(dev_priv);
1322 out:
1323 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1324 
1325 	return ret;
1326 }
1327 
1328 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
1329 {
1330 	struct intel_engine_cs *engine;
1331 	struct i915_gem_context *ctx;
1332 	struct i915_gem_engines *e;
1333 	enum intel_engine_id id;
1334 	int err = 0;
1335 
1336 	/*
1337 	 * As we reset the gpu during very early sanitisation, the current
1338 	 * register state on the GPU should reflect its defaults values.
1339 	 * We load a context onto the hw (with restore-inhibit), then switch
1340 	 * over to a second context to save that default register state. We
1341 	 * can then prime every new context with that state so they all start
1342 	 * from the same default HW values.
1343 	 */
1344 
1345 	ctx = i915_gem_context_create_kernel(i915, 0);
1346 	if (IS_ERR(ctx))
1347 		return PTR_ERR(ctx);
1348 
1349 	e = i915_gem_context_lock_engines(ctx);
1350 
1351 	for_each_engine(engine, i915, id) {
1352 		struct intel_context *ce = e->engines[id];
1353 		struct i915_request *rq;
1354 
1355 		rq = intel_context_create_request(ce);
1356 		if (IS_ERR(rq)) {
1357 			err = PTR_ERR(rq);
1358 			goto err_active;
1359 		}
1360 
1361 		err = 0;
1362 		if (rq->engine->init_context)
1363 			err = rq->engine->init_context(rq);
1364 
1365 		i915_request_add(rq);
1366 		if (err)
1367 			goto err_active;
1368 	}
1369 
1370 	/* Flush the default context image to memory, and enable powersaving. */
1371 	if (!i915_gem_load_power_context(i915)) {
1372 		err = -EIO;
1373 		goto err_active;
1374 	}
1375 
1376 	for_each_engine(engine, i915, id) {
1377 		struct intel_context *ce = e->engines[id];
1378 		struct i915_vma *state = ce->state;
1379 		void *vaddr;
1380 
1381 		if (!state)
1382 			continue;
1383 
1384 		GEM_BUG_ON(intel_context_is_pinned(ce));
1385 
1386 		/*
1387 		 * As we will hold a reference to the logical state, it will
1388 		 * not be torn down with the context, and importantly the
1389 		 * object will hold onto its vma (making it possible for a
1390 		 * stray GTT write to corrupt our defaults). Unmap the vma
1391 		 * from the GTT to prevent such accidents and reclaim the
1392 		 * space.
1393 		 */
1394 		err = i915_vma_unbind(state);
1395 		if (err)
1396 			goto err_active;
1397 
1398 		i915_gem_object_lock(state->obj);
1399 		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
1400 		i915_gem_object_unlock(state->obj);
1401 		if (err)
1402 			goto err_active;
1403 
1404 		engine->default_state = i915_gem_object_get(state->obj);
1405 		i915_gem_object_set_cache_coherency(engine->default_state,
1406 						    I915_CACHE_LLC);
1407 
1408 		/* Check we can acquire the image of the context state */
1409 		vaddr = i915_gem_object_pin_map(engine->default_state,
1410 						I915_MAP_FORCE_WB);
1411 		if (IS_ERR(vaddr)) {
1412 			err = PTR_ERR(vaddr);
1413 			goto err_active;
1414 		}
1415 
1416 		i915_gem_object_unpin_map(engine->default_state);
1417 	}
1418 
1419 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1420 		unsigned int found = intel_engines_has_context_isolation(i915);
1421 
1422 		/*
1423 		 * Make sure that classes with multiple engine instances all
1424 		 * share the same basic configuration.
1425 		 */
1426 		for_each_engine(engine, i915, id) {
1427 			unsigned int bit = BIT(engine->uabi_class);
1428 			unsigned int expected = engine->default_state ? bit : 0;
1429 
1430 			if ((found & bit) != expected) {
1431 				DRM_ERROR("mismatching default context state for class %d on engine %s\n",
1432 					  engine->uabi_class, engine->name);
1433 			}
1434 		}
1435 	}
1436 
1437 out_ctx:
1438 	i915_gem_context_unlock_engines(ctx);
1439 	i915_gem_context_set_closed(ctx);
1440 	i915_gem_context_put(ctx);
1441 	return err;
1442 
1443 err_active:
1444 	/*
1445 	 * If we have to abandon now, we expect the engines to be idle
1446 	 * and ready to be torn-down. The quickest way we can accomplish
1447 	 * this is by declaring ourselves wedged.
1448 	 */
1449 	i915_gem_set_wedged(i915);
1450 	goto out_ctx;
1451 }
1452 
1453 static int
1454 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
1455 {
1456 	struct drm_i915_gem_object *obj;
1457 	struct i915_vma *vma;
1458 	int ret;
1459 
1460 	obj = i915_gem_object_create_stolen(i915, size);
1461 	if (!obj)
1462 		obj = i915_gem_object_create_internal(i915, size);
1463 	if (IS_ERR(obj)) {
1464 		DRM_ERROR("Failed to allocate scratch page\n");
1465 		return PTR_ERR(obj);
1466 	}
1467 
1468 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
1469 	if (IS_ERR(vma)) {
1470 		ret = PTR_ERR(vma);
1471 		goto err_unref;
1472 	}
1473 
1474 	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
1475 	if (ret)
1476 		goto err_unref;
1477 
1478 	i915->gt.scratch = vma;
1479 	return 0;
1480 
1481 err_unref:
1482 	i915_gem_object_put(obj);
1483 	return ret;
1484 }
1485 
1486 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
1487 {
1488 	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
1489 }
1490 
1491 static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
1492 {
1493 	struct intel_engine_cs *engine;
1494 	enum intel_engine_id id;
1495 	int err = 0;
1496 
1497 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1498 		return 0;
1499 
1500 	for_each_engine(engine, i915, id) {
1501 		if (intel_engine_verify_workarounds(engine, "load"))
1502 			err = -EIO;
1503 	}
1504 
1505 	return err;
1506 }
1507 
1508 int i915_gem_init(struct drm_i915_private *dev_priv)
1509 {
1510 	int ret;
1511 
1512 	/* We need to fallback to 4K pages if host doesn't support huge gtt. */
1513 	if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
1514 		mkwrite_device_info(dev_priv)->page_sizes =
1515 			I915_GTT_PAGE_SIZE_4K;
1516 
1517 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
1518 
1519 	i915_timelines_init(dev_priv);
1520 
1521 	ret = i915_gem_init_userptr(dev_priv);
1522 	if (ret)
1523 		return ret;
1524 
1525 	ret = intel_uc_init_misc(dev_priv);
1526 	if (ret)
1527 		return ret;
1528 
1529 	ret = intel_wopcm_init(&dev_priv->wopcm);
1530 	if (ret)
1531 		goto err_uc_misc;
1532 
1533 	/* This is just a security blanket to placate dragons.
1534 	 * On some systems, we very sporadically observe that the first TLBs
1535 	 * used by the CS may be stale, despite us poking the TLB reset. If
1536 	 * we hold the forcewake during initialisation these problems
1537 	 * just magically go away.
1538 	 */
1539 	mutex_lock(&dev_priv->drm.struct_mutex);
1540 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
1541 
1542 	ret = i915_gem_init_ggtt(dev_priv);
1543 	if (ret) {
1544 		GEM_BUG_ON(ret == -EIO);
1545 		goto err_unlock;
1546 	}
1547 
1548 	ret = i915_gem_init_scratch(dev_priv,
1549 				    IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
1550 	if (ret) {
1551 		GEM_BUG_ON(ret == -EIO);
1552 		goto err_ggtt;
1553 	}
1554 
1555 	ret = intel_engines_setup(dev_priv);
1556 	if (ret) {
1557 		GEM_BUG_ON(ret == -EIO);
1558 		goto err_unlock;
1559 	}
1560 
1561 	ret = i915_gem_contexts_init(dev_priv);
1562 	if (ret) {
1563 		GEM_BUG_ON(ret == -EIO);
1564 		goto err_scratch;
1565 	}
1566 
1567 	ret = intel_engines_init(dev_priv);
1568 	if (ret) {
1569 		GEM_BUG_ON(ret == -EIO);
1570 		goto err_context;
1571 	}
1572 
1573 	intel_init_gt_powersave(dev_priv);
1574 
1575 	ret = intel_uc_init(dev_priv);
1576 	if (ret)
1577 		goto err_pm;
1578 
1579 	ret = i915_gem_init_hw(dev_priv);
1580 	if (ret)
1581 		goto err_uc_init;
1582 
1583 	/*
1584 	 * Despite its name intel_init_clock_gating applies both display
1585 	 * clock gating workarounds; GT mmio workarounds and the occasional
1586 	 * GT power context workaround. Worse, sometimes it includes a context
1587 	 * register workaround which we need to apply before we record the
1588 	 * default HW state for all contexts.
1589 	 *
1590 	 * FIXME: break up the workarounds and apply them at the right time!
1591 	 */
1592 	intel_init_clock_gating(dev_priv);
1593 
1594 	ret = intel_engines_verify_workarounds(dev_priv);
1595 	if (ret)
1596 		goto err_init_hw;
1597 
1598 	ret = __intel_engines_record_defaults(dev_priv);
1599 	if (ret)
1600 		goto err_init_hw;
1601 
1602 	if (i915_inject_load_failure()) {
1603 		ret = -ENODEV;
1604 		goto err_init_hw;
1605 	}
1606 
1607 	if (i915_inject_load_failure()) {
1608 		ret = -EIO;
1609 		goto err_init_hw;
1610 	}
1611 
1612 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1613 	mutex_unlock(&dev_priv->drm.struct_mutex);
1614 
1615 	return 0;
1616 
1617 	/*
1618 	 * Unwinding is complicated by that we want to handle -EIO to mean
1619 	 * disable GPU submission but keep KMS alive. We want to mark the
1620 	 * HW as irrevisibly wedged, but keep enough state around that the
1621 	 * driver doesn't explode during runtime.
1622 	 */
1623 err_init_hw:
1624 	mutex_unlock(&dev_priv->drm.struct_mutex);
1625 
1626 	i915_gem_set_wedged(dev_priv);
1627 	i915_gem_suspend(dev_priv);
1628 	i915_gem_suspend_late(dev_priv);
1629 
1630 	i915_gem_drain_workqueue(dev_priv);
1631 
1632 	mutex_lock(&dev_priv->drm.struct_mutex);
1633 	intel_uc_fini_hw(dev_priv);
1634 err_uc_init:
1635 	intel_uc_fini(dev_priv);
1636 err_pm:
1637 	if (ret != -EIO) {
1638 		intel_cleanup_gt_powersave(dev_priv);
1639 		intel_engines_cleanup(dev_priv);
1640 	}
1641 err_context:
1642 	if (ret != -EIO)
1643 		i915_gem_contexts_fini(dev_priv);
1644 err_scratch:
1645 	i915_gem_fini_scratch(dev_priv);
1646 err_ggtt:
1647 err_unlock:
1648 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1649 	mutex_unlock(&dev_priv->drm.struct_mutex);
1650 
1651 err_uc_misc:
1652 	intel_uc_fini_misc(dev_priv);
1653 
1654 	if (ret != -EIO) {
1655 		i915_gem_cleanup_userptr(dev_priv);
1656 		i915_timelines_fini(dev_priv);
1657 	}
1658 
1659 	if (ret == -EIO) {
1660 		mutex_lock(&dev_priv->drm.struct_mutex);
1661 
1662 		/*
1663 		 * Allow engine initialisation to fail by marking the GPU as
1664 		 * wedged. But we only want to do this where the GPU is angry,
1665 		 * for all other failure, such as an allocation failure, bail.
1666 		 */
1667 		if (!i915_reset_failed(dev_priv)) {
1668 			i915_load_error(dev_priv,
1669 					"Failed to initialize GPU, declaring it wedged!\n");
1670 			i915_gem_set_wedged(dev_priv);
1671 		}
1672 
1673 		/* Minimal basic recovery for KMS */
1674 		ret = i915_ggtt_enable_hw(dev_priv);
1675 		i915_gem_restore_gtt_mappings(dev_priv);
1676 		i915_gem_restore_fences(dev_priv);
1677 		intel_init_clock_gating(dev_priv);
1678 
1679 		mutex_unlock(&dev_priv->drm.struct_mutex);
1680 	}
1681 
1682 	i915_gem_drain_freed_objects(dev_priv);
1683 	return ret;
1684 }
1685 
1686 void i915_gem_fini_hw(struct drm_i915_private *dev_priv)
1687 {
1688 	GEM_BUG_ON(dev_priv->gt.awake);
1689 
1690 	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
1691 
1692 	i915_gem_suspend_late(dev_priv);
1693 	intel_disable_gt_powersave(dev_priv);
1694 
1695 	/* Flush any outstanding unpin_work. */
1696 	i915_gem_drain_workqueue(dev_priv);
1697 
1698 	mutex_lock(&dev_priv->drm.struct_mutex);
1699 	intel_uc_fini_hw(dev_priv);
1700 	intel_uc_fini(dev_priv);
1701 	mutex_unlock(&dev_priv->drm.struct_mutex);
1702 
1703 	i915_gem_drain_freed_objects(dev_priv);
1704 }
1705 
1706 void i915_gem_fini(struct drm_i915_private *dev_priv)
1707 {
1708 	mutex_lock(&dev_priv->drm.struct_mutex);
1709 	intel_engines_cleanup(dev_priv);
1710 	i915_gem_contexts_fini(dev_priv);
1711 	i915_gem_fini_scratch(dev_priv);
1712 	mutex_unlock(&dev_priv->drm.struct_mutex);
1713 
1714 	intel_wa_list_free(&dev_priv->gt_wa_list);
1715 
1716 	intel_cleanup_gt_powersave(dev_priv);
1717 
1718 	intel_uc_fini_misc(dev_priv);
1719 	i915_gem_cleanup_userptr(dev_priv);
1720 	i915_timelines_fini(dev_priv);
1721 
1722 	i915_gem_drain_freed_objects(dev_priv);
1723 
1724 	WARN_ON(!list_empty(&dev_priv->contexts.list));
1725 }
1726 
1727 void i915_gem_init_mmio(struct drm_i915_private *i915)
1728 {
1729 	i915_gem_sanitize(i915);
1730 }
1731 
1732 static void i915_gem_init__mm(struct drm_i915_private *i915)
1733 {
1734 	spin_lock_init(&i915->mm.obj_lock);
1735 	spin_lock_init(&i915->mm.free_lock);
1736 
1737 	init_llist_head(&i915->mm.free_list);
1738 
1739 	INIT_LIST_HEAD(&i915->mm.purge_list);
1740 	INIT_LIST_HEAD(&i915->mm.shrink_list);
1741 
1742 	i915_gem_init__objects(i915);
1743 }
1744 
1745 int i915_gem_init_early(struct drm_i915_private *dev_priv)
1746 {
1747 	int err;
1748 
1749 	intel_gt_pm_init(dev_priv);
1750 
1751 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
1752 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
1753 	spin_lock_init(&dev_priv->gt.closed_lock);
1754 
1755 	i915_gem_init__mm(dev_priv);
1756 	i915_gem_init__pm(dev_priv);
1757 
1758 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
1759 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
1760 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
1761 	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
1762 
1763 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
1764 
1765 	spin_lock_init(&dev_priv->fb_tracking.lock);
1766 
1767 	err = i915_gemfs_init(dev_priv);
1768 	if (err)
1769 		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
1770 
1771 	return 0;
1772 }
1773 
1774 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
1775 {
1776 	i915_gem_drain_freed_objects(dev_priv);
1777 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
1778 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
1779 	WARN_ON(dev_priv->mm.shrink_count);
1780 
1781 	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
1782 
1783 	i915_gemfs_fini(dev_priv);
1784 }
1785 
1786 int i915_gem_freeze(struct drm_i915_private *dev_priv)
1787 {
1788 	/* Discard all purgeable objects, let userspace recover those as
1789 	 * required after resuming.
1790 	 */
1791 	i915_gem_shrink_all(dev_priv);
1792 
1793 	return 0;
1794 }
1795 
1796 int i915_gem_freeze_late(struct drm_i915_private *i915)
1797 {
1798 	struct drm_i915_gem_object *obj;
1799 	intel_wakeref_t wakeref;
1800 
1801 	/*
1802 	 * Called just before we write the hibernation image.
1803 	 *
1804 	 * We need to update the domain tracking to reflect that the CPU
1805 	 * will be accessing all the pages to create and restore from the
1806 	 * hibernation, and so upon restoration those pages will be in the
1807 	 * CPU domain.
1808 	 *
1809 	 * To make sure the hibernation image contains the latest state,
1810 	 * we update that state just before writing out the image.
1811 	 *
1812 	 * To try and reduce the hibernation image, we manually shrink
1813 	 * the objects as well, see i915_gem_freeze()
1814 	 */
1815 
1816 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1817 
1818 	i915_gem_shrink(i915, -1UL, NULL, ~0);
1819 	i915_gem_drain_freed_objects(i915);
1820 
1821 	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
1822 		i915_gem_object_lock(obj);
1823 		WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
1824 		i915_gem_object_unlock(obj);
1825 	}
1826 
1827 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1828 
1829 	return 0;
1830 }
1831 
1832 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
1833 {
1834 	struct drm_i915_file_private *file_priv = file->driver_priv;
1835 	struct i915_request *request;
1836 
1837 	/* Clean up our request list when the client is going away, so that
1838 	 * later retire_requests won't dereference our soon-to-be-gone
1839 	 * file_priv.
1840 	 */
1841 	spin_lock(&file_priv->mm.lock);
1842 	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
1843 		request->file_priv = NULL;
1844 	spin_unlock(&file_priv->mm.lock);
1845 }
1846 
1847 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
1848 {
1849 	struct drm_i915_file_private *file_priv;
1850 	int ret;
1851 
1852 	DRM_DEBUG("\n");
1853 
1854 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
1855 	if (!file_priv)
1856 		return -ENOMEM;
1857 
1858 	file->driver_priv = file_priv;
1859 	file_priv->dev_priv = i915;
1860 	file_priv->file = file;
1861 
1862 	spin_lock_init(&file_priv->mm.lock);
1863 	INIT_LIST_HEAD(&file_priv->mm.request_list);
1864 
1865 	file_priv->bsd_engine = -1;
1866 	file_priv->hang_timestamp = jiffies;
1867 
1868 	ret = i915_gem_context_open(i915, file);
1869 	if (ret)
1870 		kfree(file_priv);
1871 
1872 	return ret;
1873 }
1874 
1875 /**
1876  * i915_gem_track_fb - update frontbuffer tracking
1877  * @old: current GEM buffer for the frontbuffer slots
1878  * @new: new GEM buffer for the frontbuffer slots
1879  * @frontbuffer_bits: bitmask of frontbuffer slots
1880  *
1881  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
1882  * from @old and setting them in @new. Both @old and @new can be NULL.
1883  */
1884 void i915_gem_track_fb(struct drm_i915_gem_object *old,
1885 		       struct drm_i915_gem_object *new,
1886 		       unsigned frontbuffer_bits)
1887 {
1888 	/* Control of individual bits within the mask are guarded by
1889 	 * the owning plane->mutex, i.e. we can never see concurrent
1890 	 * manipulation of individual bits. But since the bitfield as a whole
1891 	 * is updated using RMW, we need to use atomics in order to update
1892 	 * the bits.
1893 	 */
1894 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
1895 		     BITS_PER_TYPE(atomic_t));
1896 
1897 	if (old) {
1898 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
1899 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
1900 	}
1901 
1902 	if (new) {
1903 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
1904 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
1905 	}
1906 }
1907 
1908 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1909 #include "selftests/mock_gem_device.c"
1910 #include "selftests/i915_gem.c"
1911 #endif
1912