xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_ggtt.c (revision f8e17c17)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/stop_machine.h>
7 
8 #include <asm/set_memory.h>
9 #include <asm/smp.h>
10 
11 #include "intel_gt.h"
12 #include "i915_drv.h"
13 #include "i915_scatterlist.h"
14 #include "i915_vgpu.h"
15 
16 #include "intel_gtt.h"
17 
18 static int
19 i915_get_ggtt_vma_pages(struct i915_vma *vma);
20 
21 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
22 				   unsigned long color,
23 				   u64 *start,
24 				   u64 *end)
25 {
26 	if (i915_node_color_differs(node, color))
27 		*start += I915_GTT_PAGE_SIZE;
28 
29 	/*
30 	 * Also leave a space between the unallocated reserved node after the
31 	 * GTT and any objects within the GTT, i.e. we use the color adjustment
32 	 * to insert a guard page to prevent prefetches crossing over the
33 	 * GTT boundary.
34 	 */
35 	node = list_next_entry(node, node_list);
36 	if (node->color != color)
37 		*end -= I915_GTT_PAGE_SIZE;
38 }
39 
40 static int ggtt_init_hw(struct i915_ggtt *ggtt)
41 {
42 	struct drm_i915_private *i915 = ggtt->vm.i915;
43 
44 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
45 
46 	ggtt->vm.is_ggtt = true;
47 
48 	/* Only VLV supports read-only GGTT mappings */
49 	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
50 
51 	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
52 		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
53 
54 	if (ggtt->mappable_end) {
55 		if (!io_mapping_init_wc(&ggtt->iomap,
56 					ggtt->gmadr.start,
57 					ggtt->mappable_end)) {
58 			ggtt->vm.cleanup(&ggtt->vm);
59 			return -EIO;
60 		}
61 
62 		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
63 					      ggtt->mappable_end);
64 	}
65 
66 	i915_ggtt_init_fences(ggtt);
67 
68 	return 0;
69 }
70 
71 /**
72  * i915_ggtt_init_hw - Initialize GGTT hardware
73  * @i915: i915 device
74  */
75 int i915_ggtt_init_hw(struct drm_i915_private *i915)
76 {
77 	int ret;
78 
79 	stash_init(&i915->mm.wc_stash);
80 
81 	/*
82 	 * Note that we use page colouring to enforce a guard page at the
83 	 * end of the address space. This is required as the CS may prefetch
84 	 * beyond the end of the batch buffer, across the page boundary,
85 	 * and beyond the end of the GTT if we do not provide a guard.
86 	 */
87 	ret = ggtt_init_hw(&i915->ggtt);
88 	if (ret)
89 		return ret;
90 
91 	return 0;
92 }
93 
94 /*
95  * Certain Gen5 chipsets require require idling the GPU before
96  * unmapping anything from the GTT when VT-d is enabled.
97  */
98 static bool needs_idle_maps(struct drm_i915_private *i915)
99 {
100 	/*
101 	 * Query intel_iommu to see if we need the workaround. Presumably that
102 	 * was loaded first.
103 	 */
104 	return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
105 }
106 
107 static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
108 {
109 	struct drm_i915_private *i915 = ggtt->vm.i915;
110 
111 	/*
112 	 * Don't bother messing with faults pre GEN6 as we have little
113 	 * documentation supporting that it's a good idea.
114 	 */
115 	if (INTEL_GEN(i915) < 6)
116 		return;
117 
118 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
119 
120 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
121 
122 	ggtt->invalidate(ggtt);
123 }
124 
125 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
126 {
127 	ggtt_suspend_mappings(&i915->ggtt);
128 }
129 
130 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
131 {
132 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
133 
134 	spin_lock_irq(&uncore->lock);
135 	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
136 	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
137 	spin_unlock_irq(&uncore->lock);
138 }
139 
140 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
141 {
142 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
143 
144 	/*
145 	 * Note that as an uncached mmio write, this will flush the
146 	 * WCB of the writes into the GGTT before it triggers the invalidate.
147 	 */
148 	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
149 }
150 
151 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
152 {
153 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
154 	struct drm_i915_private *i915 = ggtt->vm.i915;
155 
156 	gen8_ggtt_invalidate(ggtt);
157 
158 	if (INTEL_GEN(i915) >= 12)
159 		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
160 				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
161 	else
162 		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
163 }
164 
165 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
166 {
167 	intel_gtt_chipset_flush();
168 }
169 
170 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
171 {
172 	writeq(pte, addr);
173 }
174 
175 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
176 				  dma_addr_t addr,
177 				  u64 offset,
178 				  enum i915_cache_level level,
179 				  u32 unused)
180 {
181 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
182 	gen8_pte_t __iomem *pte =
183 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
184 
185 	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
186 
187 	ggtt->invalidate(ggtt);
188 }
189 
190 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
191 				     struct i915_vma *vma,
192 				     enum i915_cache_level level,
193 				     u32 flags)
194 {
195 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
196 	struct sgt_iter sgt_iter;
197 	gen8_pte_t __iomem *gtt_entries;
198 	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
199 	dma_addr_t addr;
200 
201 	/*
202 	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
203 	 * not to allow the user to override access to a read only page.
204 	 */
205 
206 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
207 	gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
208 	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
209 		gen8_set_pte(gtt_entries++, pte_encode | addr);
210 
211 	/*
212 	 * We want to flush the TLBs only after we're certain all the PTE
213 	 * updates have finished.
214 	 */
215 	ggtt->invalidate(ggtt);
216 }
217 
218 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
219 				  dma_addr_t addr,
220 				  u64 offset,
221 				  enum i915_cache_level level,
222 				  u32 flags)
223 {
224 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
225 	gen6_pte_t __iomem *pte =
226 		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
227 
228 	iowrite32(vm->pte_encode(addr, level, flags), pte);
229 
230 	ggtt->invalidate(ggtt);
231 }
232 
233 /*
234  * Binds an object into the global gtt with the specified cache level.
235  * The object will be accessible to the GPU via commands whose operands
236  * reference offsets within the global GTT as well as accessible by the GPU
237  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
238  */
239 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
240 				     struct i915_vma *vma,
241 				     enum i915_cache_level level,
242 				     u32 flags)
243 {
244 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
245 	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
246 	unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
247 	struct sgt_iter iter;
248 	dma_addr_t addr;
249 
250 	for_each_sgt_daddr(addr, iter, vma->pages)
251 		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
252 
253 	/*
254 	 * We want to flush the TLBs only after we're certain all the PTE
255 	 * updates have finished.
256 	 */
257 	ggtt->invalidate(ggtt);
258 }
259 
260 static void nop_clear_range(struct i915_address_space *vm,
261 			    u64 start, u64 length)
262 {
263 }
264 
265 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
266 				  u64 start, u64 length)
267 {
268 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
269 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
270 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
271 	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
272 	gen8_pte_t __iomem *gtt_base =
273 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
274 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
275 	int i;
276 
277 	if (WARN(num_entries > max_entries,
278 		 "First entry = %d; Num entries = %d (max=%d)\n",
279 		 first_entry, num_entries, max_entries))
280 		num_entries = max_entries;
281 
282 	for (i = 0; i < num_entries; i++)
283 		gen8_set_pte(&gtt_base[i], scratch_pte);
284 }
285 
286 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
287 {
288 	/*
289 	 * Make sure the internal GAM fifo has been cleared of all GTT
290 	 * writes before exiting stop_machine(). This guarantees that
291 	 * any aperture accesses waiting to start in another process
292 	 * cannot back up behind the GTT writes causing a hang.
293 	 * The register can be any arbitrary GAM register.
294 	 */
295 	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
296 }
297 
298 struct insert_page {
299 	struct i915_address_space *vm;
300 	dma_addr_t addr;
301 	u64 offset;
302 	enum i915_cache_level level;
303 };
304 
305 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
306 {
307 	struct insert_page *arg = _arg;
308 
309 	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
310 	bxt_vtd_ggtt_wa(arg->vm);
311 
312 	return 0;
313 }
314 
315 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
316 					  dma_addr_t addr,
317 					  u64 offset,
318 					  enum i915_cache_level level,
319 					  u32 unused)
320 {
321 	struct insert_page arg = { vm, addr, offset, level };
322 
323 	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
324 }
325 
326 struct insert_entries {
327 	struct i915_address_space *vm;
328 	struct i915_vma *vma;
329 	enum i915_cache_level level;
330 	u32 flags;
331 };
332 
333 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
334 {
335 	struct insert_entries *arg = _arg;
336 
337 	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
338 	bxt_vtd_ggtt_wa(arg->vm);
339 
340 	return 0;
341 }
342 
343 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
344 					     struct i915_vma *vma,
345 					     enum i915_cache_level level,
346 					     u32 flags)
347 {
348 	struct insert_entries arg = { vm, vma, level, flags };
349 
350 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
351 }
352 
353 struct clear_range {
354 	struct i915_address_space *vm;
355 	u64 start;
356 	u64 length;
357 };
358 
359 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
360 {
361 	struct clear_range *arg = _arg;
362 
363 	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
364 	bxt_vtd_ggtt_wa(arg->vm);
365 
366 	return 0;
367 }
368 
369 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
370 					  u64 start,
371 					  u64 length)
372 {
373 	struct clear_range arg = { vm, start, length };
374 
375 	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
376 }
377 
378 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
379 				  u64 start, u64 length)
380 {
381 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
382 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
383 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
384 	gen6_pte_t scratch_pte, __iomem *gtt_base =
385 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
386 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
387 	int i;
388 
389 	if (WARN(num_entries > max_entries,
390 		 "First entry = %d; Num entries = %d (max=%d)\n",
391 		 first_entry, num_entries, max_entries))
392 		num_entries = max_entries;
393 
394 	scratch_pte = vm->scratch[0].encode;
395 	for (i = 0; i < num_entries; i++)
396 		iowrite32(scratch_pte, &gtt_base[i]);
397 }
398 
399 static void i915_ggtt_insert_page(struct i915_address_space *vm,
400 				  dma_addr_t addr,
401 				  u64 offset,
402 				  enum i915_cache_level cache_level,
403 				  u32 unused)
404 {
405 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
406 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
407 
408 	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
409 }
410 
411 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
412 				     struct i915_vma *vma,
413 				     enum i915_cache_level cache_level,
414 				     u32 unused)
415 {
416 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
417 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
418 
419 	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
420 				    flags);
421 }
422 
423 static void i915_ggtt_clear_range(struct i915_address_space *vm,
424 				  u64 start, u64 length)
425 {
426 	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
427 }
428 
429 static int ggtt_bind_vma(struct i915_vma *vma,
430 			 enum i915_cache_level cache_level,
431 			 u32 flags)
432 {
433 	struct drm_i915_gem_object *obj = vma->obj;
434 	u32 pte_flags;
435 
436 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
437 	pte_flags = 0;
438 	if (i915_gem_object_is_readonly(obj))
439 		pte_flags |= PTE_READ_ONLY;
440 
441 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
442 
443 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
444 
445 	/*
446 	 * Without aliasing PPGTT there's no difference between
447 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
448 	 * upgrade to both bound if we bind either to avoid double-binding.
449 	 */
450 	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
451 
452 	return 0;
453 }
454 
455 static void ggtt_unbind_vma(struct i915_vma *vma)
456 {
457 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
458 }
459 
460 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
461 {
462 	u64 size;
463 	int ret;
464 
465 	if (!USES_GUC(ggtt->vm.i915))
466 		return 0;
467 
468 	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
469 	size = ggtt->vm.total - GUC_GGTT_TOP;
470 
471 	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
472 				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
473 				   PIN_NOEVICT);
474 	if (ret)
475 		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
476 
477 	return ret;
478 }
479 
480 static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
481 {
482 	if (drm_mm_node_allocated(&ggtt->uc_fw))
483 		drm_mm_remove_node(&ggtt->uc_fw);
484 }
485 
486 static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
487 {
488 	ggtt_release_guc_top(ggtt);
489 	if (drm_mm_node_allocated(&ggtt->error_capture))
490 		drm_mm_remove_node(&ggtt->error_capture);
491 	mutex_destroy(&ggtt->error_mutex);
492 }
493 
494 static int init_ggtt(struct i915_ggtt *ggtt)
495 {
496 	/*
497 	 * Let GEM Manage all of the aperture.
498 	 *
499 	 * However, leave one page at the end still bound to the scratch page.
500 	 * There are a number of places where the hardware apparently prefetches
501 	 * past the end of the object, and we've seen multiple hangs with the
502 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
503 	 * aperture.  One page should be enough to keep any prefetching inside
504 	 * of the aperture.
505 	 */
506 	unsigned long hole_start, hole_end;
507 	struct drm_mm_node *entry;
508 	int ret;
509 
510 	/*
511 	 * GuC requires all resources that we're sharing with it to be placed in
512 	 * non-WOPCM memory. If GuC is not present or not in use we still need a
513 	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
514 	 * why.
515 	 */
516 	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
517 			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
518 
519 	ret = intel_vgt_balloon(ggtt);
520 	if (ret)
521 		return ret;
522 
523 	mutex_init(&ggtt->error_mutex);
524 	if (ggtt->mappable_end) {
525 		/* Reserve a mappable slot for our lockless error capture */
526 		ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
527 						  &ggtt->error_capture,
528 						  PAGE_SIZE, 0,
529 						  I915_COLOR_UNEVICTABLE,
530 						  0, ggtt->mappable_end,
531 						  DRM_MM_INSERT_LOW);
532 		if (ret)
533 			return ret;
534 	}
535 
536 	/*
537 	 * The upper portion of the GuC address space has a sizeable hole
538 	 * (several MB) that is inaccessible by GuC. Reserve this range within
539 	 * GGTT as it can comfortably hold GuC/HuC firmware images.
540 	 */
541 	ret = ggtt_reserve_guc_top(ggtt);
542 	if (ret)
543 		goto err;
544 
545 	/* Clear any non-preallocated blocks */
546 	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
547 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
548 			      hole_start, hole_end);
549 		ggtt->vm.clear_range(&ggtt->vm, hole_start,
550 				     hole_end - hole_start);
551 	}
552 
553 	/* And finally clear the reserved guard page */
554 	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
555 
556 	return 0;
557 
558 err:
559 	cleanup_init_ggtt(ggtt);
560 	return ret;
561 }
562 
563 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
564 				 enum i915_cache_level cache_level,
565 				 u32 flags)
566 {
567 	u32 pte_flags;
568 	int ret;
569 
570 	/* Currently applicable only to VLV */
571 	pte_flags = 0;
572 	if (i915_gem_object_is_readonly(vma->obj))
573 		pte_flags |= PTE_READ_ONLY;
574 
575 	if (flags & I915_VMA_LOCAL_BIND) {
576 		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
577 
578 		if (flags & I915_VMA_ALLOC) {
579 			ret = alias->vm.allocate_va_range(&alias->vm,
580 							  vma->node.start,
581 							  vma->size);
582 			if (ret)
583 				return ret;
584 
585 			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
586 		}
587 
588 		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
589 				     __i915_vma_flags(vma)));
590 		alias->vm.insert_entries(&alias->vm, vma,
591 					 cache_level, pte_flags);
592 	}
593 
594 	if (flags & I915_VMA_GLOBAL_BIND)
595 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
596 
597 	return 0;
598 }
599 
600 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
601 {
602 	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
603 		struct i915_address_space *vm = vma->vm;
604 
605 		vm->clear_range(vm, vma->node.start, vma->size);
606 	}
607 
608 	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
609 		struct i915_address_space *vm =
610 			&i915_vm_to_ggtt(vma->vm)->alias->vm;
611 
612 		vm->clear_range(vm, vma->node.start, vma->size);
613 	}
614 }
615 
616 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
617 {
618 	struct i915_ppgtt *ppgtt;
619 	int err;
620 
621 	ppgtt = i915_ppgtt_create(ggtt->vm.gt);
622 	if (IS_ERR(ppgtt))
623 		return PTR_ERR(ppgtt);
624 
625 	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
626 		err = -ENODEV;
627 		goto err_ppgtt;
628 	}
629 
630 	/*
631 	 * Note we only pre-allocate as far as the end of the global
632 	 * GTT. On 48b / 4-level page-tables, the difference is very,
633 	 * very significant! We have to preallocate as GVT/vgpu does
634 	 * not like the page directory disappearing.
635 	 */
636 	err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
637 	if (err)
638 		goto err_ppgtt;
639 
640 	ggtt->alias = ppgtt;
641 	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
642 
643 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
644 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
645 
646 	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
647 	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
648 
649 	return 0;
650 
651 err_ppgtt:
652 	i915_vm_put(&ppgtt->vm);
653 	return err;
654 }
655 
656 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
657 {
658 	struct i915_ppgtt *ppgtt;
659 
660 	ppgtt = fetch_and_zero(&ggtt->alias);
661 	if (!ppgtt)
662 		return;
663 
664 	i915_vm_put(&ppgtt->vm);
665 
666 	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
667 	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
668 }
669 
670 int i915_init_ggtt(struct drm_i915_private *i915)
671 {
672 	int ret;
673 
674 	ret = init_ggtt(&i915->ggtt);
675 	if (ret)
676 		return ret;
677 
678 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
679 		ret = init_aliasing_ppgtt(&i915->ggtt);
680 		if (ret)
681 			cleanup_init_ggtt(&i915->ggtt);
682 	}
683 
684 	return 0;
685 }
686 
687 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
688 {
689 	struct i915_vma *vma, *vn;
690 
691 	atomic_set(&ggtt->vm.open, 0);
692 
693 	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
694 	flush_workqueue(ggtt->vm.i915->wq);
695 
696 	mutex_lock(&ggtt->vm.mutex);
697 
698 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
699 		WARN_ON(__i915_vma_unbind(vma));
700 
701 	if (drm_mm_node_allocated(&ggtt->error_capture))
702 		drm_mm_remove_node(&ggtt->error_capture);
703 	mutex_destroy(&ggtt->error_mutex);
704 
705 	ggtt_release_guc_top(ggtt);
706 	intel_vgt_deballoon(ggtt);
707 
708 	ggtt->vm.cleanup(&ggtt->vm);
709 
710 	mutex_unlock(&ggtt->vm.mutex);
711 	i915_address_space_fini(&ggtt->vm);
712 
713 	arch_phys_wc_del(ggtt->mtrr);
714 
715 	if (ggtt->iomap.size)
716 		io_mapping_fini(&ggtt->iomap);
717 }
718 
719 /**
720  * i915_ggtt_driver_release - Clean up GGTT hardware initialization
721  * @i915: i915 device
722  */
723 void i915_ggtt_driver_release(struct drm_i915_private *i915)
724 {
725 	struct pagevec *pvec;
726 
727 	fini_aliasing_ppgtt(&i915->ggtt);
728 
729 	ggtt_cleanup_hw(&i915->ggtt);
730 
731 	pvec = &i915->mm.wc_stash.pvec;
732 	if (pvec->nr) {
733 		set_pages_array_wb(pvec->pages, pvec->nr);
734 		__pagevec_release(pvec);
735 	}
736 }
737 
738 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
739 {
740 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
741 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
742 	return snb_gmch_ctl << 20;
743 }
744 
745 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
746 {
747 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
748 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
749 	if (bdw_gmch_ctl)
750 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
751 
752 #ifdef CONFIG_X86_32
753 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
754 	if (bdw_gmch_ctl > 4)
755 		bdw_gmch_ctl = 4;
756 #endif
757 
758 	return bdw_gmch_ctl << 20;
759 }
760 
761 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
762 {
763 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
764 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
765 
766 	if (gmch_ctrl)
767 		return 1 << (20 + gmch_ctrl);
768 
769 	return 0;
770 }
771 
772 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
773 {
774 	struct drm_i915_private *i915 = ggtt->vm.i915;
775 	struct pci_dev *pdev = i915->drm.pdev;
776 	phys_addr_t phys_addr;
777 	int ret;
778 
779 	/* For Modern GENs the PTEs and register space are split in the BAR */
780 	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
781 
782 	/*
783 	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
784 	 * will be dropped. For WC mappings in general we have 64 byte burst
785 	 * writes when the WC buffer is flushed, so we can't use it, but have to
786 	 * resort to an uncached mapping. The WC issue is easily caught by the
787 	 * readback check when writing GTT PTE entries.
788 	 */
789 	if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
790 		ggtt->gsm = ioremap(phys_addr, size);
791 	else
792 		ggtt->gsm = ioremap_wc(phys_addr, size);
793 	if (!ggtt->gsm) {
794 		DRM_ERROR("Failed to map the ggtt page table\n");
795 		return -ENOMEM;
796 	}
797 
798 	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
799 	if (ret) {
800 		DRM_ERROR("Scratch setup failed\n");
801 		/* iounmap will also get called at remove, but meh */
802 		iounmap(ggtt->gsm);
803 		return ret;
804 	}
805 
806 	ggtt->vm.scratch[0].encode =
807 		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
808 				    I915_CACHE_NONE, 0);
809 
810 	return 0;
811 }
812 
813 int ggtt_set_pages(struct i915_vma *vma)
814 {
815 	int ret;
816 
817 	GEM_BUG_ON(vma->pages);
818 
819 	ret = i915_get_ggtt_vma_pages(vma);
820 	if (ret)
821 		return ret;
822 
823 	vma->page_sizes = vma->obj->mm.page_sizes;
824 
825 	return 0;
826 }
827 
828 static void gen6_gmch_remove(struct i915_address_space *vm)
829 {
830 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
831 
832 	iounmap(ggtt->gsm);
833 	cleanup_scratch_page(vm);
834 }
835 
836 static struct resource pci_resource(struct pci_dev *pdev, int bar)
837 {
838 	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
839 					       pci_resource_len(pdev, bar));
840 }
841 
842 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
843 {
844 	struct drm_i915_private *i915 = ggtt->vm.i915;
845 	struct pci_dev *pdev = i915->drm.pdev;
846 	unsigned int size;
847 	u16 snb_gmch_ctl;
848 	int err;
849 
850 	/* TODO: We're not aware of mappable constraints on gen8 yet */
851 	if (!IS_DGFX(i915)) {
852 		ggtt->gmadr = pci_resource(pdev, 2);
853 		ggtt->mappable_end = resource_size(&ggtt->gmadr);
854 	}
855 
856 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
857 	if (!err)
858 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
859 	if (err)
860 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
861 
862 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
863 	if (IS_CHERRYVIEW(i915))
864 		size = chv_get_total_gtt_size(snb_gmch_ctl);
865 	else
866 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
867 
868 	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
869 	ggtt->vm.cleanup = gen6_gmch_remove;
870 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
871 	ggtt->vm.clear_range = nop_clear_range;
872 	if (intel_scanout_needs_vtd_wa(i915))
873 		ggtt->vm.clear_range = gen8_ggtt_clear_range;
874 
875 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
876 
877 	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
878 	if (intel_ggtt_update_needs_vtd_wa(i915) ||
879 	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
880 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
881 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
882 		if (ggtt->vm.clear_range != nop_clear_range)
883 			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
884 	}
885 
886 	ggtt->invalidate = gen8_ggtt_invalidate;
887 
888 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
889 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
890 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
891 	ggtt->vm.vma_ops.clear_pages = clear_pages;
892 
893 	ggtt->vm.pte_encode = gen8_pte_encode;
894 
895 	setup_private_pat(ggtt->vm.gt->uncore);
896 
897 	return ggtt_probe_common(ggtt, size);
898 }
899 
900 static u64 snb_pte_encode(dma_addr_t addr,
901 			  enum i915_cache_level level,
902 			  u32 flags)
903 {
904 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
905 
906 	switch (level) {
907 	case I915_CACHE_L3_LLC:
908 	case I915_CACHE_LLC:
909 		pte |= GEN6_PTE_CACHE_LLC;
910 		break;
911 	case I915_CACHE_NONE:
912 		pte |= GEN6_PTE_UNCACHED;
913 		break;
914 	default:
915 		MISSING_CASE(level);
916 	}
917 
918 	return pte;
919 }
920 
921 static u64 ivb_pte_encode(dma_addr_t addr,
922 			  enum i915_cache_level level,
923 			  u32 flags)
924 {
925 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
926 
927 	switch (level) {
928 	case I915_CACHE_L3_LLC:
929 		pte |= GEN7_PTE_CACHE_L3_LLC;
930 		break;
931 	case I915_CACHE_LLC:
932 		pte |= GEN6_PTE_CACHE_LLC;
933 		break;
934 	case I915_CACHE_NONE:
935 		pte |= GEN6_PTE_UNCACHED;
936 		break;
937 	default:
938 		MISSING_CASE(level);
939 	}
940 
941 	return pte;
942 }
943 
944 static u64 byt_pte_encode(dma_addr_t addr,
945 			  enum i915_cache_level level,
946 			  u32 flags)
947 {
948 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
949 
950 	if (!(flags & PTE_READ_ONLY))
951 		pte |= BYT_PTE_WRITEABLE;
952 
953 	if (level != I915_CACHE_NONE)
954 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
955 
956 	return pte;
957 }
958 
959 static u64 hsw_pte_encode(dma_addr_t addr,
960 			  enum i915_cache_level level,
961 			  u32 flags)
962 {
963 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
964 
965 	if (level != I915_CACHE_NONE)
966 		pte |= HSW_WB_LLC_AGE3;
967 
968 	return pte;
969 }
970 
971 static u64 iris_pte_encode(dma_addr_t addr,
972 			   enum i915_cache_level level,
973 			   u32 flags)
974 {
975 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
976 
977 	switch (level) {
978 	case I915_CACHE_NONE:
979 		break;
980 	case I915_CACHE_WT:
981 		pte |= HSW_WT_ELLC_LLC_AGE3;
982 		break;
983 	default:
984 		pte |= HSW_WB_ELLC_LLC_AGE3;
985 		break;
986 	}
987 
988 	return pte;
989 }
990 
991 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
992 {
993 	struct drm_i915_private *i915 = ggtt->vm.i915;
994 	struct pci_dev *pdev = i915->drm.pdev;
995 	unsigned int size;
996 	u16 snb_gmch_ctl;
997 	int err;
998 
999 	ggtt->gmadr = pci_resource(pdev, 2);
1000 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
1001 
1002 	/*
1003 	 * 64/512MB is the current min/max we actually know of, but this is
1004 	 * just a coarse sanity check.
1005 	 */
1006 	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
1007 		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
1008 		return -ENXIO;
1009 	}
1010 
1011 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
1012 	if (!err)
1013 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
1014 	if (err)
1015 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
1016 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1017 
1018 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
1019 	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1020 
1021 	ggtt->vm.clear_range = nop_clear_range;
1022 	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
1023 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
1024 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
1025 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1026 	ggtt->vm.cleanup = gen6_gmch_remove;
1027 
1028 	ggtt->invalidate = gen6_ggtt_invalidate;
1029 
1030 	if (HAS_EDRAM(i915))
1031 		ggtt->vm.pte_encode = iris_pte_encode;
1032 	else if (IS_HASWELL(i915))
1033 		ggtt->vm.pte_encode = hsw_pte_encode;
1034 	else if (IS_VALLEYVIEW(i915))
1035 		ggtt->vm.pte_encode = byt_pte_encode;
1036 	else if (INTEL_GEN(i915) >= 7)
1037 		ggtt->vm.pte_encode = ivb_pte_encode;
1038 	else
1039 		ggtt->vm.pte_encode = snb_pte_encode;
1040 
1041 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1042 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1043 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1044 	ggtt->vm.vma_ops.clear_pages = clear_pages;
1045 
1046 	return ggtt_probe_common(ggtt, size);
1047 }
1048 
1049 static void i915_gmch_remove(struct i915_address_space *vm)
1050 {
1051 	intel_gmch_remove();
1052 }
1053 
1054 static int i915_gmch_probe(struct i915_ggtt *ggtt)
1055 {
1056 	struct drm_i915_private *i915 = ggtt->vm.i915;
1057 	phys_addr_t gmadr_base;
1058 	int ret;
1059 
1060 	ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
1061 	if (!ret) {
1062 		DRM_ERROR("failed to set up gmch\n");
1063 		return -EIO;
1064 	}
1065 
1066 	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
1067 
1068 	ggtt->gmadr =
1069 		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
1070 
1071 	ggtt->do_idle_maps = needs_idle_maps(i915);
1072 	ggtt->vm.insert_page = i915_ggtt_insert_page;
1073 	ggtt->vm.insert_entries = i915_ggtt_insert_entries;
1074 	ggtt->vm.clear_range = i915_ggtt_clear_range;
1075 	ggtt->vm.cleanup = i915_gmch_remove;
1076 
1077 	ggtt->invalidate = gmch_ggtt_invalidate;
1078 
1079 	ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1080 	ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1081 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1082 	ggtt->vm.vma_ops.clear_pages = clear_pages;
1083 
1084 	if (unlikely(ggtt->do_idle_maps))
1085 		dev_notice(i915->drm.dev,
1086 			   "Applying Ironlake quirks for intel_iommu\n");
1087 
1088 	return 0;
1089 }
1090 
1091 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1092 {
1093 	struct drm_i915_private *i915 = gt->i915;
1094 	int ret;
1095 
1096 	ggtt->vm.gt = gt;
1097 	ggtt->vm.i915 = i915;
1098 	ggtt->vm.dma = &i915->drm.pdev->dev;
1099 
1100 	if (INTEL_GEN(i915) <= 5)
1101 		ret = i915_gmch_probe(ggtt);
1102 	else if (INTEL_GEN(i915) < 8)
1103 		ret = gen6_gmch_probe(ggtt);
1104 	else
1105 		ret = gen8_gmch_probe(ggtt);
1106 	if (ret)
1107 		return ret;
1108 
1109 	if ((ggtt->vm.total - 1) >> 32) {
1110 		DRM_ERROR("We never expected a Global GTT with more than 32bits"
1111 			  " of address space! Found %lldM!\n",
1112 			  ggtt->vm.total >> 20);
1113 		ggtt->vm.total = 1ULL << 32;
1114 		ggtt->mappable_end =
1115 			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1116 	}
1117 
1118 	if (ggtt->mappable_end > ggtt->vm.total) {
1119 		DRM_ERROR("mappable aperture extends past end of GGTT,"
1120 			  " aperture=%pa, total=%llx\n",
1121 			  &ggtt->mappable_end, ggtt->vm.total);
1122 		ggtt->mappable_end = ggtt->vm.total;
1123 	}
1124 
1125 	/* GMADR is the PCI mmio aperture into the global GTT. */
1126 	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
1127 	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
1128 	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
1129 			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
1130 
1131 	return 0;
1132 }
1133 
1134 /**
1135  * i915_ggtt_probe_hw - Probe GGTT hardware location
1136  * @i915: i915 device
1137  */
1138 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1139 {
1140 	int ret;
1141 
1142 	ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
1143 	if (ret)
1144 		return ret;
1145 
1146 	if (intel_vtd_active())
1147 		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
1148 
1149 	return 0;
1150 }
1151 
1152 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1153 {
1154 	if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
1155 		return -EIO;
1156 
1157 	return 0;
1158 }
1159 
1160 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
1161 {
1162 	GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
1163 
1164 	ggtt->invalidate = guc_ggtt_invalidate;
1165 
1166 	ggtt->invalidate(ggtt);
1167 }
1168 
1169 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
1170 {
1171 	/* XXX Temporary pardon for error unload */
1172 	if (ggtt->invalidate == gen8_ggtt_invalidate)
1173 		return;
1174 
1175 	/* We should only be called after i915_ggtt_enable_guc() */
1176 	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
1177 
1178 	ggtt->invalidate = gen8_ggtt_invalidate;
1179 
1180 	ggtt->invalidate(ggtt);
1181 }
1182 
1183 static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
1184 {
1185 	struct i915_vma *vma;
1186 	bool flush = false;
1187 	int open;
1188 
1189 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
1190 
1191 	mutex_lock(&ggtt->vm.mutex);
1192 
1193 	/* First fill our portion of the GTT with scratch pages */
1194 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
1195 
1196 	/* Skip rewriting PTE on VMA unbind. */
1197 	open = atomic_xchg(&ggtt->vm.open, 0);
1198 
1199 	/* clflush objects bound into the GGTT and rebind them. */
1200 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
1201 		struct drm_i915_gem_object *obj = vma->obj;
1202 
1203 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
1204 			continue;
1205 
1206 		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
1207 		WARN_ON(i915_vma_bind(vma,
1208 				      obj ? obj->cache_level : 0,
1209 				      PIN_GLOBAL, NULL));
1210 		if (obj) { /* only used during resume => exclusive access */
1211 			flush |= fetch_and_zero(&obj->write_domain);
1212 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
1213 		}
1214 	}
1215 
1216 	atomic_set(&ggtt->vm.open, open);
1217 	ggtt->invalidate(ggtt);
1218 
1219 	mutex_unlock(&ggtt->vm.mutex);
1220 
1221 	if (flush)
1222 		wbinvd_on_all_cpus();
1223 }
1224 
1225 void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
1226 {
1227 	struct i915_ggtt *ggtt = &i915->ggtt;
1228 
1229 	ggtt_restore_mappings(ggtt);
1230 
1231 	if (INTEL_GEN(i915) >= 8)
1232 		setup_private_pat(ggtt->vm.gt->uncore);
1233 }
1234 
1235 static struct scatterlist *
1236 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1237 	     unsigned int width, unsigned int height,
1238 	     unsigned int stride,
1239 	     struct sg_table *st, struct scatterlist *sg)
1240 {
1241 	unsigned int column, row;
1242 	unsigned int src_idx;
1243 
1244 	for (column = 0; column < width; column++) {
1245 		src_idx = stride * (height - 1) + column + offset;
1246 		for (row = 0; row < height; row++) {
1247 			st->nents++;
1248 			/*
1249 			 * We don't need the pages, but need to initialize
1250 			 * the entries so the sg list can be happily traversed.
1251 			 * The only thing we need are DMA addresses.
1252 			 */
1253 			sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
1254 			sg_dma_address(sg) =
1255 				i915_gem_object_get_dma_address(obj, src_idx);
1256 			sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
1257 			sg = sg_next(sg);
1258 			src_idx -= stride;
1259 		}
1260 	}
1261 
1262 	return sg;
1263 }
1264 
1265 static noinline struct sg_table *
1266 intel_rotate_pages(struct intel_rotation_info *rot_info,
1267 		   struct drm_i915_gem_object *obj)
1268 {
1269 	unsigned int size = intel_rotation_info_size(rot_info);
1270 	struct sg_table *st;
1271 	struct scatterlist *sg;
1272 	int ret = -ENOMEM;
1273 	int i;
1274 
1275 	/* Allocate target SG list. */
1276 	st = kmalloc(sizeof(*st), GFP_KERNEL);
1277 	if (!st)
1278 		goto err_st_alloc;
1279 
1280 	ret = sg_alloc_table(st, size, GFP_KERNEL);
1281 	if (ret)
1282 		goto err_sg_alloc;
1283 
1284 	st->nents = 0;
1285 	sg = st->sgl;
1286 
1287 	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
1288 		sg = rotate_pages(obj, rot_info->plane[i].offset,
1289 				  rot_info->plane[i].width, rot_info->plane[i].height,
1290 				  rot_info->plane[i].stride, st, sg);
1291 	}
1292 
1293 	return st;
1294 
1295 err_sg_alloc:
1296 	kfree(st);
1297 err_st_alloc:
1298 
1299 	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1300 			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
1301 
1302 	return ERR_PTR(ret);
1303 }
1304 
1305 static struct scatterlist *
1306 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1307 	    unsigned int width, unsigned int height,
1308 	    unsigned int stride,
1309 	    struct sg_table *st, struct scatterlist *sg)
1310 {
1311 	unsigned int row;
1312 
1313 	for (row = 0; row < height; row++) {
1314 		unsigned int left = width * I915_GTT_PAGE_SIZE;
1315 
1316 		while (left) {
1317 			dma_addr_t addr;
1318 			unsigned int length;
1319 
1320 			/*
1321 			 * We don't need the pages, but need to initialize
1322 			 * the entries so the sg list can be happily traversed.
1323 			 * The only thing we need are DMA addresses.
1324 			 */
1325 
1326 			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
1327 
1328 			length = min(left, length);
1329 
1330 			st->nents++;
1331 
1332 			sg_set_page(sg, NULL, length, 0);
1333 			sg_dma_address(sg) = addr;
1334 			sg_dma_len(sg) = length;
1335 			sg = sg_next(sg);
1336 
1337 			offset += length / I915_GTT_PAGE_SIZE;
1338 			left -= length;
1339 		}
1340 
1341 		offset += stride - width;
1342 	}
1343 
1344 	return sg;
1345 }
1346 
1347 static noinline struct sg_table *
1348 intel_remap_pages(struct intel_remapped_info *rem_info,
1349 		  struct drm_i915_gem_object *obj)
1350 {
1351 	unsigned int size = intel_remapped_info_size(rem_info);
1352 	struct sg_table *st;
1353 	struct scatterlist *sg;
1354 	int ret = -ENOMEM;
1355 	int i;
1356 
1357 	/* Allocate target SG list. */
1358 	st = kmalloc(sizeof(*st), GFP_KERNEL);
1359 	if (!st)
1360 		goto err_st_alloc;
1361 
1362 	ret = sg_alloc_table(st, size, GFP_KERNEL);
1363 	if (ret)
1364 		goto err_sg_alloc;
1365 
1366 	st->nents = 0;
1367 	sg = st->sgl;
1368 
1369 	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
1370 		sg = remap_pages(obj, rem_info->plane[i].offset,
1371 				 rem_info->plane[i].width, rem_info->plane[i].height,
1372 				 rem_info->plane[i].stride, st, sg);
1373 	}
1374 
1375 	i915_sg_trim(st);
1376 
1377 	return st;
1378 
1379 err_sg_alloc:
1380 	kfree(st);
1381 err_st_alloc:
1382 
1383 	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1384 			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
1385 
1386 	return ERR_PTR(ret);
1387 }
1388 
1389 static noinline struct sg_table *
1390 intel_partial_pages(const struct i915_ggtt_view *view,
1391 		    struct drm_i915_gem_object *obj)
1392 {
1393 	struct sg_table *st;
1394 	struct scatterlist *sg, *iter;
1395 	unsigned int count = view->partial.size;
1396 	unsigned int offset;
1397 	int ret = -ENOMEM;
1398 
1399 	st = kmalloc(sizeof(*st), GFP_KERNEL);
1400 	if (!st)
1401 		goto err_st_alloc;
1402 
1403 	ret = sg_alloc_table(st, count, GFP_KERNEL);
1404 	if (ret)
1405 		goto err_sg_alloc;
1406 
1407 	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
1408 	GEM_BUG_ON(!iter);
1409 
1410 	sg = st->sgl;
1411 	st->nents = 0;
1412 	do {
1413 		unsigned int len;
1414 
1415 		len = min(iter->length - (offset << PAGE_SHIFT),
1416 			  count << PAGE_SHIFT);
1417 		sg_set_page(sg, NULL, len, 0);
1418 		sg_dma_address(sg) =
1419 			sg_dma_address(iter) + (offset << PAGE_SHIFT);
1420 		sg_dma_len(sg) = len;
1421 
1422 		st->nents++;
1423 		count -= len >> PAGE_SHIFT;
1424 		if (count == 0) {
1425 			sg_mark_end(sg);
1426 			i915_sg_trim(st); /* Drop any unused tail entries. */
1427 
1428 			return st;
1429 		}
1430 
1431 		sg = __sg_next(sg);
1432 		iter = __sg_next(iter);
1433 		offset = 0;
1434 	} while (1);
1435 
1436 err_sg_alloc:
1437 	kfree(st);
1438 err_st_alloc:
1439 	return ERR_PTR(ret);
1440 }
1441 
1442 static int
1443 i915_get_ggtt_vma_pages(struct i915_vma *vma)
1444 {
1445 	int ret;
1446 
1447 	/*
1448 	 * The vma->pages are only valid within the lifespan of the borrowed
1449 	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
1450 	 * must be the vma->pages. A simple rule is that vma->pages must only
1451 	 * be accessed when the obj->mm.pages are pinned.
1452 	 */
1453 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
1454 
1455 	switch (vma->ggtt_view.type) {
1456 	default:
1457 		GEM_BUG_ON(vma->ggtt_view.type);
1458 		/* fall through */
1459 	case I915_GGTT_VIEW_NORMAL:
1460 		vma->pages = vma->obj->mm.pages;
1461 		return 0;
1462 
1463 	case I915_GGTT_VIEW_ROTATED:
1464 		vma->pages =
1465 			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
1466 		break;
1467 
1468 	case I915_GGTT_VIEW_REMAPPED:
1469 		vma->pages =
1470 			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
1471 		break;
1472 
1473 	case I915_GGTT_VIEW_PARTIAL:
1474 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
1475 		break;
1476 	}
1477 
1478 	ret = 0;
1479 	if (IS_ERR(vma->pages)) {
1480 		ret = PTR_ERR(vma->pages);
1481 		vma->pages = NULL;
1482 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
1483 			  vma->ggtt_view.type, ret);
1484 	}
1485 	return ret;
1486 }
1487