1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
27 
28 #include <linux/fault-inject.h>
29 #include <linux/log2.h>
30 #include <linux/random.h>
31 #include <linux/seq_file.h>
32 #include <linux/stop_machine.h>
33 
34 #include <asm/set_memory.h>
35 
36 #include <drm/drmP.h>
37 #include <drm/i915_drm.h>
38 
39 #include "i915_drv.h"
40 #include "i915_vgpu.h"
41 #include "i915_trace.h"
42 #include "intel_drv.h"
43 #include "intel_frontbuffer.h"
44 
45 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
46 
47 /**
48  * DOC: Global GTT views
49  *
50  * Background and previous state
51  *
52  * Historically objects could exists (be bound) in global GTT space only as
53  * singular instances with a view representing all of the object's backing pages
54  * in a linear fashion. This view will be called a normal view.
55  *
56  * To support multiple views of the same object, where the number of mapped
57  * pages is not equal to the backing store, or where the layout of the pages
58  * is not linear, concept of a GGTT view was added.
59  *
60  * One example of an alternative view is a stereo display driven by a single
61  * image. In this case we would have a framebuffer looking like this
62  * (2x2 pages):
63  *
64  *    12
65  *    34
66  *
67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68  * rendering. In contrast, fed to the display engine would be an alternative
69  * view which could look something like this:
70  *
71  *   1212
72  *   3434
73  *
74  * In this example both the size and layout of pages in the alternative view is
75  * different from the normal view.
76  *
77  * Implementation and usage
78  *
79  * GGTT views are implemented using VMAs and are distinguished via enum
80  * i915_ggtt_view_type and struct i915_ggtt_view.
81  *
82  * A new flavour of core GEM functions which work with GGTT bound objects were
83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
85  * parameter encapsulating all metadata required to implement a view.
86  *
87  * As a helper for callers which are only interested in the normal view,
88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
89  * GEM API functions, the ones not taking the view parameter, are operating on,
90  * or with the normal GGTT view.
91  *
92  * Code wanting to add or use a new GGTT view needs to:
93  *
94  * 1. Add a new enum with a suitable name.
95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
96  * 3. Add support to i915_get_vma_pages().
97  *
98  * New views are required to build a scatter-gather table from within the
99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100  * exists for the lifetime of an VMA.
101  *
102  * Core API is designed to have copy semantics which means that passed in
103  * struct i915_ggtt_view does not need to be persistent (left around after
104  * calling the core API functions).
105  *
106  */
107 
108 static int
109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
110 
111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112 {
113 	/* Note that as an uncached mmio write, this should flush the
114 	 * WCB of the writes into the GGTT before it triggers the invalidate.
115 	 */
116 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
117 }
118 
119 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
120 {
121 	gen6_ggtt_invalidate(dev_priv);
122 	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
123 }
124 
125 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
126 {
127 	intel_gtt_chipset_flush();
128 }
129 
130 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
131 {
132 	i915->ggtt.invalidate(i915);
133 }
134 
135 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
136 			       	int enable_ppgtt)
137 {
138 	bool has_full_ppgtt;
139 	bool has_full_48bit_ppgtt;
140 
141 	if (!dev_priv->info.has_aliasing_ppgtt)
142 		return 0;
143 
144 	has_full_ppgtt = dev_priv->info.has_full_ppgtt;
145 	has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
146 
147 	if (intel_vgpu_active(dev_priv)) {
148 		/* GVT-g has no support for 32bit ppgtt */
149 		has_full_ppgtt = false;
150 		has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
151 	}
152 
153 	/*
154 	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
155 	 * execlists, the sole mechanism available to submit work.
156 	 */
157 	if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
158 		return 0;
159 
160 	if (enable_ppgtt == 1)
161 		return 1;
162 
163 	if (enable_ppgtt == 2 && has_full_ppgtt)
164 		return 2;
165 
166 	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
167 		return 3;
168 
169 	/* Disable ppgtt on SNB if VT-d is on. */
170 	if (IS_GEN6(dev_priv) && intel_vtd_active()) {
171 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
172 		return 0;
173 	}
174 
175 	/* Early VLV doesn't have this */
176 	if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
177 		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
178 		return 0;
179 	}
180 
181 	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
182 		if (has_full_48bit_ppgtt)
183 			return 3;
184 
185 		if (has_full_ppgtt)
186 			return 2;
187 	}
188 
189 	return 1;
190 }
191 
192 static int ppgtt_bind_vma(struct i915_vma *vma,
193 			  enum i915_cache_level cache_level,
194 			  u32 unused)
195 {
196 	u32 pte_flags;
197 	int ret;
198 
199 	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
200 		ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
201 						 vma->size);
202 		if (ret)
203 			return ret;
204 	}
205 
206 	/* Currently applicable only to VLV */
207 	pte_flags = 0;
208 	if (vma->obj->gt_ro)
209 		pte_flags |= PTE_READ_ONLY;
210 
211 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
212 
213 	return 0;
214 }
215 
216 static void ppgtt_unbind_vma(struct i915_vma *vma)
217 {
218 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
219 }
220 
221 static int ppgtt_set_pages(struct i915_vma *vma)
222 {
223 	GEM_BUG_ON(vma->pages);
224 
225 	vma->pages = vma->obj->mm.pages;
226 
227 	vma->page_sizes = vma->obj->mm.page_sizes;
228 
229 	return 0;
230 }
231 
232 static void clear_pages(struct i915_vma *vma)
233 {
234 	GEM_BUG_ON(!vma->pages);
235 
236 	if (vma->pages != vma->obj->mm.pages) {
237 		sg_free_table(vma->pages);
238 		kfree(vma->pages);
239 	}
240 	vma->pages = NULL;
241 
242 	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
243 }
244 
245 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
246 				  enum i915_cache_level level)
247 {
248 	gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
249 	pte |= addr;
250 
251 	switch (level) {
252 	case I915_CACHE_NONE:
253 		pte |= PPAT_UNCACHED;
254 		break;
255 	case I915_CACHE_WT:
256 		pte |= PPAT_DISPLAY_ELLC;
257 		break;
258 	default:
259 		pte |= PPAT_CACHED;
260 		break;
261 	}
262 
263 	return pte;
264 }
265 
266 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
267 				  const enum i915_cache_level level)
268 {
269 	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
270 	pde |= addr;
271 	if (level != I915_CACHE_NONE)
272 		pde |= PPAT_CACHED_PDE;
273 	else
274 		pde |= PPAT_UNCACHED;
275 	return pde;
276 }
277 
278 #define gen8_pdpe_encode gen8_pde_encode
279 #define gen8_pml4e_encode gen8_pde_encode
280 
281 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
282 				 enum i915_cache_level level,
283 				 u32 unused)
284 {
285 	gen6_pte_t pte = GEN6_PTE_VALID;
286 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
287 
288 	switch (level) {
289 	case I915_CACHE_L3_LLC:
290 	case I915_CACHE_LLC:
291 		pte |= GEN6_PTE_CACHE_LLC;
292 		break;
293 	case I915_CACHE_NONE:
294 		pte |= GEN6_PTE_UNCACHED;
295 		break;
296 	default:
297 		MISSING_CASE(level);
298 	}
299 
300 	return pte;
301 }
302 
303 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
304 				 enum i915_cache_level level,
305 				 u32 unused)
306 {
307 	gen6_pte_t pte = GEN6_PTE_VALID;
308 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
309 
310 	switch (level) {
311 	case I915_CACHE_L3_LLC:
312 		pte |= GEN7_PTE_CACHE_L3_LLC;
313 		break;
314 	case I915_CACHE_LLC:
315 		pte |= GEN6_PTE_CACHE_LLC;
316 		break;
317 	case I915_CACHE_NONE:
318 		pte |= GEN6_PTE_UNCACHED;
319 		break;
320 	default:
321 		MISSING_CASE(level);
322 	}
323 
324 	return pte;
325 }
326 
327 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
328 				 enum i915_cache_level level,
329 				 u32 flags)
330 {
331 	gen6_pte_t pte = GEN6_PTE_VALID;
332 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
333 
334 	if (!(flags & PTE_READ_ONLY))
335 		pte |= BYT_PTE_WRITEABLE;
336 
337 	if (level != I915_CACHE_NONE)
338 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
339 
340 	return pte;
341 }
342 
343 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
344 				 enum i915_cache_level level,
345 				 u32 unused)
346 {
347 	gen6_pte_t pte = GEN6_PTE_VALID;
348 	pte |= HSW_PTE_ADDR_ENCODE(addr);
349 
350 	if (level != I915_CACHE_NONE)
351 		pte |= HSW_WB_LLC_AGE3;
352 
353 	return pte;
354 }
355 
356 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
357 				  enum i915_cache_level level,
358 				  u32 unused)
359 {
360 	gen6_pte_t pte = GEN6_PTE_VALID;
361 	pte |= HSW_PTE_ADDR_ENCODE(addr);
362 
363 	switch (level) {
364 	case I915_CACHE_NONE:
365 		break;
366 	case I915_CACHE_WT:
367 		pte |= HSW_WT_ELLC_LLC_AGE3;
368 		break;
369 	default:
370 		pte |= HSW_WB_ELLC_LLC_AGE3;
371 		break;
372 	}
373 
374 	return pte;
375 }
376 
377 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
378 {
379 	struct pagevec *pvec = &vm->free_pages;
380 	struct pagevec stash;
381 
382 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
383 		i915_gem_shrink_all(vm->i915);
384 
385 	if (likely(pvec->nr))
386 		return pvec->pages[--pvec->nr];
387 
388 	if (!vm->pt_kmap_wc)
389 		return alloc_page(gfp);
390 
391 	/* A placeholder for a specific mutex to guard the WC stash */
392 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
393 
394 	/* Look in our global stash of WC pages... */
395 	pvec = &vm->i915->mm.wc_stash;
396 	if (likely(pvec->nr))
397 		return pvec->pages[--pvec->nr];
398 
399 	/*
400 	 * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
401 	 *
402 	 * We have to be careful as page allocation may trigger the shrinker
403 	 * (via direct reclaim) which will fill up the WC stash underneath us.
404 	 * So we add our WB pages into a temporary pvec on the stack and merge
405 	 * them into the WC stash after all the allocations are complete.
406 	 */
407 	pagevec_init(&stash);
408 	do {
409 		struct page *page;
410 
411 		page = alloc_page(gfp);
412 		if (unlikely(!page))
413 			break;
414 
415 		stash.pages[stash.nr++] = page;
416 	} while (stash.nr < pagevec_space(pvec));
417 
418 	if (stash.nr) {
419 		int nr = min_t(int, stash.nr, pagevec_space(pvec));
420 		struct page **pages = stash.pages + stash.nr - nr;
421 
422 		if (nr && !set_pages_array_wc(pages, nr)) {
423 			memcpy(pvec->pages + pvec->nr,
424 			       pages, sizeof(pages[0]) * nr);
425 			pvec->nr += nr;
426 			stash.nr -= nr;
427 		}
428 
429 		pagevec_release(&stash);
430 	}
431 
432 	return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL;
433 }
434 
435 static void vm_free_pages_release(struct i915_address_space *vm,
436 				  bool immediate)
437 {
438 	struct pagevec *pvec = &vm->free_pages;
439 
440 	GEM_BUG_ON(!pagevec_count(pvec));
441 
442 	if (vm->pt_kmap_wc) {
443 		struct pagevec *stash = &vm->i915->mm.wc_stash;
444 
445 		/* When we use WC, first fill up the global stash and then
446 		 * only if full immediately free the overflow.
447 		 */
448 
449 		lockdep_assert_held(&vm->i915->drm.struct_mutex);
450 		if (pagevec_space(stash)) {
451 			do {
452 				stash->pages[stash->nr++] =
453 					pvec->pages[--pvec->nr];
454 				if (!pvec->nr)
455 					return;
456 			} while (pagevec_space(stash));
457 
458 			/* As we have made some room in the VM's free_pages,
459 			 * we can wait for it to fill again. Unless we are
460 			 * inside i915_address_space_fini() and must
461 			 * immediately release the pages!
462 			 */
463 			if (!immediate)
464 				return;
465 		}
466 
467 		set_pages_array_wb(pvec->pages, pvec->nr);
468 	}
469 
470 	__pagevec_release(pvec);
471 }
472 
473 static void vm_free_page(struct i915_address_space *vm, struct page *page)
474 {
475 	/*
476 	 * On !llc, we need to change the pages back to WB. We only do so
477 	 * in bulk, so we rarely need to change the page attributes here,
478 	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
479 	 * To make detection of the possible sleep more likely, use an
480 	 * unconditional might_sleep() for everybody.
481 	 */
482 	might_sleep();
483 	if (!pagevec_add(&vm->free_pages, page))
484 		vm_free_pages_release(vm, false);
485 }
486 
487 static int __setup_page_dma(struct i915_address_space *vm,
488 			    struct i915_page_dma *p,
489 			    gfp_t gfp)
490 {
491 	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
492 	if (unlikely(!p->page))
493 		return -ENOMEM;
494 
495 	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
496 				PCI_DMA_BIDIRECTIONAL);
497 	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
498 		vm_free_page(vm, p->page);
499 		return -ENOMEM;
500 	}
501 
502 	return 0;
503 }
504 
505 static int setup_page_dma(struct i915_address_space *vm,
506 			  struct i915_page_dma *p)
507 {
508 	return __setup_page_dma(vm, p, I915_GFP_DMA);
509 }
510 
511 static void cleanup_page_dma(struct i915_address_space *vm,
512 			     struct i915_page_dma *p)
513 {
514 	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
515 	vm_free_page(vm, p->page);
516 }
517 
518 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
519 
520 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
521 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
522 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
523 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
524 
525 static void fill_page_dma(struct i915_address_space *vm,
526 			  struct i915_page_dma *p,
527 			  const u64 val)
528 {
529 	u64 * const vaddr = kmap_atomic(p->page);
530 
531 	memset64(vaddr, val, PAGE_SIZE / sizeof(val));
532 
533 	kunmap_atomic(vaddr);
534 }
535 
536 static void fill_page_dma_32(struct i915_address_space *vm,
537 			     struct i915_page_dma *p,
538 			     const u32 v)
539 {
540 	fill_page_dma(vm, p, (u64)v << 32 | v);
541 }
542 
543 static int
544 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
545 {
546 	unsigned long size;
547 
548 	/*
549 	 * In order to utilize 64K pages for an object with a size < 2M, we will
550 	 * need to support a 64K scratch page, given that every 16th entry for a
551 	 * page-table operating in 64K mode must point to a properly aligned 64K
552 	 * region, including any PTEs which happen to point to scratch.
553 	 *
554 	 * This is only relevant for the 48b PPGTT where we support
555 	 * huge-gtt-pages, see also i915_vma_insert().
556 	 *
557 	 * TODO: we should really consider write-protecting the scratch-page and
558 	 * sharing between ppgtt
559 	 */
560 	size = I915_GTT_PAGE_SIZE_4K;
561 	if (i915_vm_is_48bit(vm) &&
562 	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
563 		size = I915_GTT_PAGE_SIZE_64K;
564 		gfp |= __GFP_NOWARN;
565 	}
566 	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
567 
568 	do {
569 		int order = get_order(size);
570 		struct page *page;
571 		dma_addr_t addr;
572 
573 		page = alloc_pages(gfp, order);
574 		if (unlikely(!page))
575 			goto skip;
576 
577 		addr = dma_map_page(vm->dma, page, 0, size,
578 				    PCI_DMA_BIDIRECTIONAL);
579 		if (unlikely(dma_mapping_error(vm->dma, addr)))
580 			goto free_page;
581 
582 		if (unlikely(!IS_ALIGNED(addr, size)))
583 			goto unmap_page;
584 
585 		vm->scratch_page.page = page;
586 		vm->scratch_page.daddr = addr;
587 		vm->scratch_page.order = order;
588 		return 0;
589 
590 unmap_page:
591 		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
592 free_page:
593 		__free_pages(page, order);
594 skip:
595 		if (size == I915_GTT_PAGE_SIZE_4K)
596 			return -ENOMEM;
597 
598 		size = I915_GTT_PAGE_SIZE_4K;
599 		gfp &= ~__GFP_NOWARN;
600 	} while (1);
601 }
602 
603 static void cleanup_scratch_page(struct i915_address_space *vm)
604 {
605 	struct i915_page_dma *p = &vm->scratch_page;
606 
607 	dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
608 		       PCI_DMA_BIDIRECTIONAL);
609 	__free_pages(p->page, p->order);
610 }
611 
612 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
613 {
614 	struct i915_page_table *pt;
615 
616 	pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
617 	if (unlikely(!pt))
618 		return ERR_PTR(-ENOMEM);
619 
620 	if (unlikely(setup_px(vm, pt))) {
621 		kfree(pt);
622 		return ERR_PTR(-ENOMEM);
623 	}
624 
625 	pt->used_ptes = 0;
626 	return pt;
627 }
628 
629 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
630 {
631 	cleanup_px(vm, pt);
632 	kfree(pt);
633 }
634 
635 static void gen8_initialize_pt(struct i915_address_space *vm,
636 			       struct i915_page_table *pt)
637 {
638 	fill_px(vm, pt,
639 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
640 }
641 
642 static void gen6_initialize_pt(struct i915_address_space *vm,
643 			       struct i915_page_table *pt)
644 {
645 	fill32_px(vm, pt,
646 		  vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
647 }
648 
649 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
650 {
651 	struct i915_page_directory *pd;
652 
653 	pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
654 	if (unlikely(!pd))
655 		return ERR_PTR(-ENOMEM);
656 
657 	if (unlikely(setup_px(vm, pd))) {
658 		kfree(pd);
659 		return ERR_PTR(-ENOMEM);
660 	}
661 
662 	pd->used_pdes = 0;
663 	return pd;
664 }
665 
666 static void free_pd(struct i915_address_space *vm,
667 		    struct i915_page_directory *pd)
668 {
669 	cleanup_px(vm, pd);
670 	kfree(pd);
671 }
672 
673 static void gen8_initialize_pd(struct i915_address_space *vm,
674 			       struct i915_page_directory *pd)
675 {
676 	unsigned int i;
677 
678 	fill_px(vm, pd,
679 		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
680 	for (i = 0; i < I915_PDES; i++)
681 		pd->page_table[i] = vm->scratch_pt;
682 }
683 
684 static int __pdp_init(struct i915_address_space *vm,
685 		      struct i915_page_directory_pointer *pdp)
686 {
687 	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
688 	unsigned int i;
689 
690 	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
691 					    GFP_KERNEL | __GFP_NOWARN);
692 	if (unlikely(!pdp->page_directory))
693 		return -ENOMEM;
694 
695 	for (i = 0; i < pdpes; i++)
696 		pdp->page_directory[i] = vm->scratch_pd;
697 
698 	return 0;
699 }
700 
701 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
702 {
703 	kfree(pdp->page_directory);
704 	pdp->page_directory = NULL;
705 }
706 
707 static inline bool use_4lvl(const struct i915_address_space *vm)
708 {
709 	return i915_vm_is_48bit(vm);
710 }
711 
712 static struct i915_page_directory_pointer *
713 alloc_pdp(struct i915_address_space *vm)
714 {
715 	struct i915_page_directory_pointer *pdp;
716 	int ret = -ENOMEM;
717 
718 	WARN_ON(!use_4lvl(vm));
719 
720 	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
721 	if (!pdp)
722 		return ERR_PTR(-ENOMEM);
723 
724 	ret = __pdp_init(vm, pdp);
725 	if (ret)
726 		goto fail_bitmap;
727 
728 	ret = setup_px(vm, pdp);
729 	if (ret)
730 		goto fail_page_m;
731 
732 	return pdp;
733 
734 fail_page_m:
735 	__pdp_fini(pdp);
736 fail_bitmap:
737 	kfree(pdp);
738 
739 	return ERR_PTR(ret);
740 }
741 
742 static void free_pdp(struct i915_address_space *vm,
743 		     struct i915_page_directory_pointer *pdp)
744 {
745 	__pdp_fini(pdp);
746 
747 	if (!use_4lvl(vm))
748 		return;
749 
750 	cleanup_px(vm, pdp);
751 	kfree(pdp);
752 }
753 
754 static void gen8_initialize_pdp(struct i915_address_space *vm,
755 				struct i915_page_directory_pointer *pdp)
756 {
757 	gen8_ppgtt_pdpe_t scratch_pdpe;
758 
759 	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
760 
761 	fill_px(vm, pdp, scratch_pdpe);
762 }
763 
764 static void gen8_initialize_pml4(struct i915_address_space *vm,
765 				 struct i915_pml4 *pml4)
766 {
767 	unsigned int i;
768 
769 	fill_px(vm, pml4,
770 		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
771 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
772 		pml4->pdps[i] = vm->scratch_pdp;
773 }
774 
775 /* Broadwell Page Directory Pointer Descriptors */
776 static int gen8_write_pdp(struct drm_i915_gem_request *req,
777 			  unsigned entry,
778 			  dma_addr_t addr)
779 {
780 	struct intel_engine_cs *engine = req->engine;
781 	u32 *cs;
782 
783 	BUG_ON(entry >= 4);
784 
785 	cs = intel_ring_begin(req, 6);
786 	if (IS_ERR(cs))
787 		return PTR_ERR(cs);
788 
789 	*cs++ = MI_LOAD_REGISTER_IMM(1);
790 	*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
791 	*cs++ = upper_32_bits(addr);
792 	*cs++ = MI_LOAD_REGISTER_IMM(1);
793 	*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
794 	*cs++ = lower_32_bits(addr);
795 	intel_ring_advance(req, cs);
796 
797 	return 0;
798 }
799 
800 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
801 			       struct drm_i915_gem_request *req)
802 {
803 	int i, ret;
804 
805 	for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
806 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
807 
808 		ret = gen8_write_pdp(req, i, pd_daddr);
809 		if (ret)
810 			return ret;
811 	}
812 
813 	return 0;
814 }
815 
816 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
817 			       struct drm_i915_gem_request *req)
818 {
819 	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
820 }
821 
822 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
823  * the page table structures, we mark them dirty so that
824  * context switching/execlist queuing code takes extra steps
825  * to ensure that tlbs are flushed.
826  */
827 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
828 {
829 	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
830 }
831 
832 /* Removes entries from a single page table, releasing it if it's empty.
833  * Caller can use the return value to update higher-level entries.
834  */
835 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
836 				struct i915_page_table *pt,
837 				u64 start, u64 length)
838 {
839 	unsigned int num_entries = gen8_pte_count(start, length);
840 	unsigned int pte = gen8_pte_index(start);
841 	unsigned int pte_end = pte + num_entries;
842 	const gen8_pte_t scratch_pte =
843 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
844 	gen8_pte_t *vaddr;
845 
846 	GEM_BUG_ON(num_entries > pt->used_ptes);
847 
848 	pt->used_ptes -= num_entries;
849 	if (!pt->used_ptes)
850 		return true;
851 
852 	vaddr = kmap_atomic_px(pt);
853 	while (pte < pte_end)
854 		vaddr[pte++] = scratch_pte;
855 	kunmap_atomic(vaddr);
856 
857 	return false;
858 }
859 
860 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
861 			       struct i915_page_directory *pd,
862 			       struct i915_page_table *pt,
863 			       unsigned int pde)
864 {
865 	gen8_pde_t *vaddr;
866 
867 	pd->page_table[pde] = pt;
868 
869 	vaddr = kmap_atomic_px(pd);
870 	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
871 	kunmap_atomic(vaddr);
872 }
873 
874 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
875 				struct i915_page_directory *pd,
876 				u64 start, u64 length)
877 {
878 	struct i915_page_table *pt;
879 	u32 pde;
880 
881 	gen8_for_each_pde(pt, pd, start, length, pde) {
882 		GEM_BUG_ON(pt == vm->scratch_pt);
883 
884 		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
885 			continue;
886 
887 		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
888 		GEM_BUG_ON(!pd->used_pdes);
889 		pd->used_pdes--;
890 
891 		free_pt(vm, pt);
892 	}
893 
894 	return !pd->used_pdes;
895 }
896 
897 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
898 				struct i915_page_directory_pointer *pdp,
899 				struct i915_page_directory *pd,
900 				unsigned int pdpe)
901 {
902 	gen8_ppgtt_pdpe_t *vaddr;
903 
904 	pdp->page_directory[pdpe] = pd;
905 	if (!use_4lvl(vm))
906 		return;
907 
908 	vaddr = kmap_atomic_px(pdp);
909 	vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
910 	kunmap_atomic(vaddr);
911 }
912 
913 /* Removes entries from a single page dir pointer, releasing it if it's empty.
914  * Caller can use the return value to update higher-level entries
915  */
916 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
917 				 struct i915_page_directory_pointer *pdp,
918 				 u64 start, u64 length)
919 {
920 	struct i915_page_directory *pd;
921 	unsigned int pdpe;
922 
923 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
924 		GEM_BUG_ON(pd == vm->scratch_pd);
925 
926 		if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
927 			continue;
928 
929 		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
930 		GEM_BUG_ON(!pdp->used_pdpes);
931 		pdp->used_pdpes--;
932 
933 		free_pd(vm, pd);
934 	}
935 
936 	return !pdp->used_pdpes;
937 }
938 
939 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
940 				  u64 start, u64 length)
941 {
942 	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
943 }
944 
945 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
946 				 struct i915_page_directory_pointer *pdp,
947 				 unsigned int pml4e)
948 {
949 	gen8_ppgtt_pml4e_t *vaddr;
950 
951 	pml4->pdps[pml4e] = pdp;
952 
953 	vaddr = kmap_atomic_px(pml4);
954 	vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
955 	kunmap_atomic(vaddr);
956 }
957 
958 /* Removes entries from a single pml4.
959  * This is the top-level structure in 4-level page tables used on gen8+.
960  * Empty entries are always scratch pml4e.
961  */
962 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
963 				  u64 start, u64 length)
964 {
965 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
966 	struct i915_pml4 *pml4 = &ppgtt->pml4;
967 	struct i915_page_directory_pointer *pdp;
968 	unsigned int pml4e;
969 
970 	GEM_BUG_ON(!use_4lvl(vm));
971 
972 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
973 		GEM_BUG_ON(pdp == vm->scratch_pdp);
974 
975 		if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
976 			continue;
977 
978 		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
979 
980 		free_pdp(vm, pdp);
981 	}
982 }
983 
984 static inline struct sgt_dma {
985 	struct scatterlist *sg;
986 	dma_addr_t dma, max;
987 } sgt_dma(struct i915_vma *vma) {
988 	struct scatterlist *sg = vma->pages->sgl;
989 	dma_addr_t addr = sg_dma_address(sg);
990 	return (struct sgt_dma) { sg, addr, addr + sg->length };
991 }
992 
993 struct gen8_insert_pte {
994 	u16 pml4e;
995 	u16 pdpe;
996 	u16 pde;
997 	u16 pte;
998 };
999 
1000 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
1001 {
1002 	return (struct gen8_insert_pte) {
1003 		 gen8_pml4e_index(start),
1004 		 gen8_pdpe_index(start),
1005 		 gen8_pde_index(start),
1006 		 gen8_pte_index(start),
1007 	};
1008 }
1009 
1010 static __always_inline bool
1011 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
1012 			      struct i915_page_directory_pointer *pdp,
1013 			      struct sgt_dma *iter,
1014 			      struct gen8_insert_pte *idx,
1015 			      enum i915_cache_level cache_level)
1016 {
1017 	struct i915_page_directory *pd;
1018 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1019 	gen8_pte_t *vaddr;
1020 	bool ret;
1021 
1022 	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1023 	pd = pdp->page_directory[idx->pdpe];
1024 	vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1025 	do {
1026 		vaddr[idx->pte] = pte_encode | iter->dma;
1027 
1028 		iter->dma += PAGE_SIZE;
1029 		if (iter->dma >= iter->max) {
1030 			iter->sg = __sg_next(iter->sg);
1031 			if (!iter->sg) {
1032 				ret = false;
1033 				break;
1034 			}
1035 
1036 			iter->dma = sg_dma_address(iter->sg);
1037 			iter->max = iter->dma + iter->sg->length;
1038 		}
1039 
1040 		if (++idx->pte == GEN8_PTES) {
1041 			idx->pte = 0;
1042 
1043 			if (++idx->pde == I915_PDES) {
1044 				idx->pde = 0;
1045 
1046 				/* Limited by sg length for 3lvl */
1047 				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1048 					idx->pdpe = 0;
1049 					ret = true;
1050 					break;
1051 				}
1052 
1053 				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1054 				pd = pdp->page_directory[idx->pdpe];
1055 			}
1056 
1057 			kunmap_atomic(vaddr);
1058 			vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1059 		}
1060 	} while (1);
1061 	kunmap_atomic(vaddr);
1062 
1063 	return ret;
1064 }
1065 
1066 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1067 				   struct i915_vma *vma,
1068 				   enum i915_cache_level cache_level,
1069 				   u32 unused)
1070 {
1071 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1072 	struct sgt_dma iter = sgt_dma(vma);
1073 	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1074 
1075 	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1076 				      cache_level);
1077 
1078 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1079 }
1080 
1081 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1082 					   struct i915_page_directory_pointer **pdps,
1083 					   struct sgt_dma *iter,
1084 					   enum i915_cache_level cache_level)
1085 {
1086 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1087 	u64 start = vma->node.start;
1088 	dma_addr_t rem = iter->sg->length;
1089 
1090 	do {
1091 		struct gen8_insert_pte idx = gen8_insert_pte(start);
1092 		struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1093 		struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1094 		unsigned int page_size;
1095 		bool maybe_64K = false;
1096 		gen8_pte_t encode = pte_encode;
1097 		gen8_pte_t *vaddr;
1098 		u16 index, max;
1099 
1100 		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1101 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1102 		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1103 			index = idx.pde;
1104 			max = I915_PDES;
1105 			page_size = I915_GTT_PAGE_SIZE_2M;
1106 
1107 			encode |= GEN8_PDE_PS_2M;
1108 
1109 			vaddr = kmap_atomic_px(pd);
1110 		} else {
1111 			struct i915_page_table *pt = pd->page_table[idx.pde];
1112 
1113 			index = idx.pte;
1114 			max = GEN8_PTES;
1115 			page_size = I915_GTT_PAGE_SIZE;
1116 
1117 			if (!index &&
1118 			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1119 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1120 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1121 			     rem >= (max - index) << PAGE_SHIFT))
1122 				maybe_64K = true;
1123 
1124 			vaddr = kmap_atomic_px(pt);
1125 		}
1126 
1127 		do {
1128 			GEM_BUG_ON(iter->sg->length < page_size);
1129 			vaddr[index++] = encode | iter->dma;
1130 
1131 			start += page_size;
1132 			iter->dma += page_size;
1133 			rem -= page_size;
1134 			if (iter->dma >= iter->max) {
1135 				iter->sg = __sg_next(iter->sg);
1136 				if (!iter->sg)
1137 					break;
1138 
1139 				rem = iter->sg->length;
1140 				iter->dma = sg_dma_address(iter->sg);
1141 				iter->max = iter->dma + rem;
1142 
1143 				if (maybe_64K && index < max &&
1144 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1145 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1146 				       rem >= (max - index) << PAGE_SHIFT)))
1147 					maybe_64K = false;
1148 
1149 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1150 					break;
1151 			}
1152 		} while (rem >= page_size && index < max);
1153 
1154 		kunmap_atomic(vaddr);
1155 
1156 		/*
1157 		 * Is it safe to mark the 2M block as 64K? -- Either we have
1158 		 * filled whole page-table with 64K entries, or filled part of
1159 		 * it and have reached the end of the sg table and we have
1160 		 * enough padding.
1161 		 */
1162 		if (maybe_64K &&
1163 		    (index == max ||
1164 		     (i915_vm_has_scratch_64K(vma->vm) &&
1165 		      !iter->sg && IS_ALIGNED(vma->node.start +
1166 					      vma->node.size,
1167 					      I915_GTT_PAGE_SIZE_2M)))) {
1168 			vaddr = kmap_atomic_px(pd);
1169 			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1170 			kunmap_atomic(vaddr);
1171 			page_size = I915_GTT_PAGE_SIZE_64K;
1172 		}
1173 
1174 		vma->page_sizes.gtt |= page_size;
1175 	} while (iter->sg);
1176 }
1177 
1178 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1179 				   struct i915_vma *vma,
1180 				   enum i915_cache_level cache_level,
1181 				   u32 unused)
1182 {
1183 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1184 	struct sgt_dma iter = sgt_dma(vma);
1185 	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1186 
1187 	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1188 		gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
1189 	} else {
1190 		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1191 
1192 		while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1193 						     &iter, &idx, cache_level))
1194 			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1195 
1196 		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1197 	}
1198 }
1199 
1200 static void gen8_free_page_tables(struct i915_address_space *vm,
1201 				  struct i915_page_directory *pd)
1202 {
1203 	int i;
1204 
1205 	if (!px_page(pd))
1206 		return;
1207 
1208 	for (i = 0; i < I915_PDES; i++) {
1209 		if (pd->page_table[i] != vm->scratch_pt)
1210 			free_pt(vm, pd->page_table[i]);
1211 	}
1212 }
1213 
1214 static int gen8_init_scratch(struct i915_address_space *vm)
1215 {
1216 	int ret;
1217 
1218 	ret = setup_scratch_page(vm, I915_GFP_DMA);
1219 	if (ret)
1220 		return ret;
1221 
1222 	vm->scratch_pt = alloc_pt(vm);
1223 	if (IS_ERR(vm->scratch_pt)) {
1224 		ret = PTR_ERR(vm->scratch_pt);
1225 		goto free_scratch_page;
1226 	}
1227 
1228 	vm->scratch_pd = alloc_pd(vm);
1229 	if (IS_ERR(vm->scratch_pd)) {
1230 		ret = PTR_ERR(vm->scratch_pd);
1231 		goto free_pt;
1232 	}
1233 
1234 	if (use_4lvl(vm)) {
1235 		vm->scratch_pdp = alloc_pdp(vm);
1236 		if (IS_ERR(vm->scratch_pdp)) {
1237 			ret = PTR_ERR(vm->scratch_pdp);
1238 			goto free_pd;
1239 		}
1240 	}
1241 
1242 	gen8_initialize_pt(vm, vm->scratch_pt);
1243 	gen8_initialize_pd(vm, vm->scratch_pd);
1244 	if (use_4lvl(vm))
1245 		gen8_initialize_pdp(vm, vm->scratch_pdp);
1246 
1247 	return 0;
1248 
1249 free_pd:
1250 	free_pd(vm, vm->scratch_pd);
1251 free_pt:
1252 	free_pt(vm, vm->scratch_pt);
1253 free_scratch_page:
1254 	cleanup_scratch_page(vm);
1255 
1256 	return ret;
1257 }
1258 
1259 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1260 {
1261 	struct i915_address_space *vm = &ppgtt->base;
1262 	struct drm_i915_private *dev_priv = vm->i915;
1263 	enum vgt_g2v_type msg;
1264 	int i;
1265 
1266 	if (use_4lvl(vm)) {
1267 		const u64 daddr = px_dma(&ppgtt->pml4);
1268 
1269 		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1270 		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1271 
1272 		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1273 				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1274 	} else {
1275 		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1276 			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1277 
1278 			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1279 			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1280 		}
1281 
1282 		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1283 				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1284 	}
1285 
1286 	I915_WRITE(vgtif_reg(g2v_notify), msg);
1287 
1288 	return 0;
1289 }
1290 
1291 static void gen8_free_scratch(struct i915_address_space *vm)
1292 {
1293 	if (use_4lvl(vm))
1294 		free_pdp(vm, vm->scratch_pdp);
1295 	free_pd(vm, vm->scratch_pd);
1296 	free_pt(vm, vm->scratch_pt);
1297 	cleanup_scratch_page(vm);
1298 }
1299 
1300 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1301 				    struct i915_page_directory_pointer *pdp)
1302 {
1303 	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1304 	int i;
1305 
1306 	for (i = 0; i < pdpes; i++) {
1307 		if (pdp->page_directory[i] == vm->scratch_pd)
1308 			continue;
1309 
1310 		gen8_free_page_tables(vm, pdp->page_directory[i]);
1311 		free_pd(vm, pdp->page_directory[i]);
1312 	}
1313 
1314 	free_pdp(vm, pdp);
1315 }
1316 
1317 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1318 {
1319 	int i;
1320 
1321 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1322 		if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
1323 			continue;
1324 
1325 		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
1326 	}
1327 
1328 	cleanup_px(&ppgtt->base, &ppgtt->pml4);
1329 }
1330 
1331 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1332 {
1333 	struct drm_i915_private *dev_priv = vm->i915;
1334 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1335 
1336 	if (intel_vgpu_active(dev_priv))
1337 		gen8_ppgtt_notify_vgt(ppgtt, false);
1338 
1339 	if (use_4lvl(vm))
1340 		gen8_ppgtt_cleanup_4lvl(ppgtt);
1341 	else
1342 		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
1343 
1344 	gen8_free_scratch(vm);
1345 }
1346 
1347 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1348 			       struct i915_page_directory *pd,
1349 			       u64 start, u64 length)
1350 {
1351 	struct i915_page_table *pt;
1352 	u64 from = start;
1353 	unsigned int pde;
1354 
1355 	gen8_for_each_pde(pt, pd, start, length, pde) {
1356 		int count = gen8_pte_count(start, length);
1357 
1358 		if (pt == vm->scratch_pt) {
1359 			pd->used_pdes++;
1360 
1361 			pt = alloc_pt(vm);
1362 			if (IS_ERR(pt)) {
1363 				pd->used_pdes--;
1364 				goto unwind;
1365 			}
1366 
1367 			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1368 				gen8_initialize_pt(vm, pt);
1369 
1370 			gen8_ppgtt_set_pde(vm, pd, pt, pde);
1371 			GEM_BUG_ON(pd->used_pdes > I915_PDES);
1372 		}
1373 
1374 		pt->used_ptes += count;
1375 	}
1376 	return 0;
1377 
1378 unwind:
1379 	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1380 	return -ENOMEM;
1381 }
1382 
1383 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1384 				struct i915_page_directory_pointer *pdp,
1385 				u64 start, u64 length)
1386 {
1387 	struct i915_page_directory *pd;
1388 	u64 from = start;
1389 	unsigned int pdpe;
1390 	int ret;
1391 
1392 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1393 		if (pd == vm->scratch_pd) {
1394 			pdp->used_pdpes++;
1395 
1396 			pd = alloc_pd(vm);
1397 			if (IS_ERR(pd)) {
1398 				pdp->used_pdpes--;
1399 				goto unwind;
1400 			}
1401 
1402 			gen8_initialize_pd(vm, pd);
1403 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1404 			GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1405 
1406 			mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1407 		}
1408 
1409 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1410 		if (unlikely(ret))
1411 			goto unwind_pd;
1412 	}
1413 
1414 	return 0;
1415 
1416 unwind_pd:
1417 	if (!pd->used_pdes) {
1418 		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1419 		GEM_BUG_ON(!pdp->used_pdpes);
1420 		pdp->used_pdpes--;
1421 		free_pd(vm, pd);
1422 	}
1423 unwind:
1424 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1425 	return -ENOMEM;
1426 }
1427 
1428 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1429 				 u64 start, u64 length)
1430 {
1431 	return gen8_ppgtt_alloc_pdp(vm,
1432 				    &i915_vm_to_ppgtt(vm)->pdp, start, length);
1433 }
1434 
1435 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1436 				 u64 start, u64 length)
1437 {
1438 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1439 	struct i915_pml4 *pml4 = &ppgtt->pml4;
1440 	struct i915_page_directory_pointer *pdp;
1441 	u64 from = start;
1442 	u32 pml4e;
1443 	int ret;
1444 
1445 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1446 		if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1447 			pdp = alloc_pdp(vm);
1448 			if (IS_ERR(pdp))
1449 				goto unwind;
1450 
1451 			gen8_initialize_pdp(vm, pdp);
1452 			gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1453 		}
1454 
1455 		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1456 		if (unlikely(ret))
1457 			goto unwind_pdp;
1458 	}
1459 
1460 	return 0;
1461 
1462 unwind_pdp:
1463 	if (!pdp->used_pdpes) {
1464 		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1465 		free_pdp(vm, pdp);
1466 	}
1467 unwind:
1468 	gen8_ppgtt_clear_4lvl(vm, from, start - from);
1469 	return -ENOMEM;
1470 }
1471 
1472 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1473 			  struct i915_page_directory_pointer *pdp,
1474 			  u64 start, u64 length,
1475 			  gen8_pte_t scratch_pte,
1476 			  struct seq_file *m)
1477 {
1478 	struct i915_address_space *vm = &ppgtt->base;
1479 	struct i915_page_directory *pd;
1480 	u32 pdpe;
1481 
1482 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1483 		struct i915_page_table *pt;
1484 		u64 pd_len = length;
1485 		u64 pd_start = start;
1486 		u32 pde;
1487 
1488 		if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
1489 			continue;
1490 
1491 		seq_printf(m, "\tPDPE #%d\n", pdpe);
1492 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1493 			u32 pte;
1494 			gen8_pte_t *pt_vaddr;
1495 
1496 			if (pd->page_table[pde] == ppgtt->base.scratch_pt)
1497 				continue;
1498 
1499 			pt_vaddr = kmap_atomic_px(pt);
1500 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
1501 				u64 va = (pdpe << GEN8_PDPE_SHIFT |
1502 					  pde << GEN8_PDE_SHIFT |
1503 					  pte << GEN8_PTE_SHIFT);
1504 				int i;
1505 				bool found = false;
1506 
1507 				for (i = 0; i < 4; i++)
1508 					if (pt_vaddr[pte + i] != scratch_pte)
1509 						found = true;
1510 				if (!found)
1511 					continue;
1512 
1513 				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1514 				for (i = 0; i < 4; i++) {
1515 					if (pt_vaddr[pte + i] != scratch_pte)
1516 						seq_printf(m, " %llx", pt_vaddr[pte + i]);
1517 					else
1518 						seq_puts(m, "  SCRATCH ");
1519 				}
1520 				seq_puts(m, "\n");
1521 			}
1522 			kunmap_atomic(pt_vaddr);
1523 		}
1524 	}
1525 }
1526 
1527 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1528 {
1529 	struct i915_address_space *vm = &ppgtt->base;
1530 	const gen8_pte_t scratch_pte =
1531 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
1532 	u64 start = 0, length = ppgtt->base.total;
1533 
1534 	if (use_4lvl(vm)) {
1535 		u64 pml4e;
1536 		struct i915_pml4 *pml4 = &ppgtt->pml4;
1537 		struct i915_page_directory_pointer *pdp;
1538 
1539 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1540 			if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
1541 				continue;
1542 
1543 			seq_printf(m, "    PML4E #%llu\n", pml4e);
1544 			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1545 		}
1546 	} else {
1547 		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1548 	}
1549 }
1550 
1551 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1552 {
1553 	struct i915_address_space *vm = &ppgtt->base;
1554 	struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1555 	struct i915_page_directory *pd;
1556 	u64 start = 0, length = ppgtt->base.total;
1557 	u64 from = start;
1558 	unsigned int pdpe;
1559 
1560 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1561 		pd = alloc_pd(vm);
1562 		if (IS_ERR(pd))
1563 			goto unwind;
1564 
1565 		gen8_initialize_pd(vm, pd);
1566 		gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1567 		pdp->used_pdpes++;
1568 	}
1569 
1570 	pdp->used_pdpes++; /* never remove */
1571 	return 0;
1572 
1573 unwind:
1574 	start -= from;
1575 	gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1576 		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1577 		free_pd(vm, pd);
1578 	}
1579 	pdp->used_pdpes = 0;
1580 	return -ENOMEM;
1581 }
1582 
1583 /*
1584  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1585  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1586  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1587  * space.
1588  *
1589  */
1590 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1591 {
1592 	struct i915_address_space *vm = &ppgtt->base;
1593 	struct drm_i915_private *dev_priv = vm->i915;
1594 	int ret;
1595 
1596 	ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ?
1597 		1ULL << 48 :
1598 		1ULL << 32;
1599 
1600 	/* There are only few exceptions for gen >=6. chv and bxt.
1601 	 * And we are not sure about the latter so play safe for now.
1602 	 */
1603 	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
1604 		ppgtt->base.pt_kmap_wc = true;
1605 
1606 	ret = gen8_init_scratch(&ppgtt->base);
1607 	if (ret) {
1608 		ppgtt->base.total = 0;
1609 		return ret;
1610 	}
1611 
1612 	if (use_4lvl(vm)) {
1613 		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
1614 		if (ret)
1615 			goto free_scratch;
1616 
1617 		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1618 
1619 		ppgtt->switch_mm = gen8_mm_switch_4lvl;
1620 		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1621 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
1622 		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
1623 	} else {
1624 		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
1625 		if (ret)
1626 			goto free_scratch;
1627 
1628 		if (intel_vgpu_active(dev_priv)) {
1629 			ret = gen8_preallocate_top_level_pdp(ppgtt);
1630 			if (ret) {
1631 				__pdp_fini(&ppgtt->pdp);
1632 				goto free_scratch;
1633 			}
1634 		}
1635 
1636 		ppgtt->switch_mm = gen8_mm_switch_3lvl;
1637 		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1638 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
1639 		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
1640 	}
1641 
1642 	if (intel_vgpu_active(dev_priv))
1643 		gen8_ppgtt_notify_vgt(ppgtt, true);
1644 
1645 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1646 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1647 	ppgtt->base.bind_vma = ppgtt_bind_vma;
1648 	ppgtt->base.set_pages = ppgtt_set_pages;
1649 	ppgtt->base.clear_pages = clear_pages;
1650 	ppgtt->debug_dump = gen8_dump_ppgtt;
1651 
1652 	return 0;
1653 
1654 free_scratch:
1655 	gen8_free_scratch(&ppgtt->base);
1656 	return ret;
1657 }
1658 
1659 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1660 {
1661 	struct i915_address_space *vm = &ppgtt->base;
1662 	struct i915_page_table *unused;
1663 	gen6_pte_t scratch_pte;
1664 	u32 pd_entry, pte, pde;
1665 	u32 start = 0, length = ppgtt->base.total;
1666 
1667 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1668 				     I915_CACHE_LLC, 0);
1669 
1670 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1671 		u32 expected;
1672 		gen6_pte_t *pt_vaddr;
1673 		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1674 		pd_entry = readl(ppgtt->pd_addr + pde);
1675 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1676 
1677 		if (pd_entry != expected)
1678 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1679 				   pde,
1680 				   pd_entry,
1681 				   expected);
1682 		seq_printf(m, "\tPDE: %x\n", pd_entry);
1683 
1684 		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
1685 
1686 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1687 			unsigned long va =
1688 				(pde * PAGE_SIZE * GEN6_PTES) +
1689 				(pte * PAGE_SIZE);
1690 			int i;
1691 			bool found = false;
1692 			for (i = 0; i < 4; i++)
1693 				if (pt_vaddr[pte + i] != scratch_pte)
1694 					found = true;
1695 			if (!found)
1696 				continue;
1697 
1698 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1699 			for (i = 0; i < 4; i++) {
1700 				if (pt_vaddr[pte + i] != scratch_pte)
1701 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1702 				else
1703 					seq_puts(m, "  SCRATCH ");
1704 			}
1705 			seq_puts(m, "\n");
1706 		}
1707 		kunmap_atomic(pt_vaddr);
1708 	}
1709 }
1710 
1711 /* Write pde (index) from the page directory @pd to the page table @pt */
1712 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
1713 				  const unsigned int pde,
1714 				  const struct i915_page_table *pt)
1715 {
1716 	/* Caller needs to make sure the write completes if necessary */
1717 	writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1718 		       ppgtt->pd_addr + pde);
1719 }
1720 
1721 /* Write all the page tables found in the ppgtt structure to incrementing page
1722  * directories. */
1723 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
1724 				  u32 start, u32 length)
1725 {
1726 	struct i915_page_table *pt;
1727 	unsigned int pde;
1728 
1729 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
1730 		gen6_write_pde(ppgtt, pde, pt);
1731 
1732 	mark_tlbs_dirty(ppgtt);
1733 	wmb();
1734 }
1735 
1736 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1737 {
1738 	GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1739 	return ppgtt->pd.base.ggtt_offset << 10;
1740 }
1741 
1742 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1743 			 struct drm_i915_gem_request *req)
1744 {
1745 	struct intel_engine_cs *engine = req->engine;
1746 	u32 *cs;
1747 
1748 	/* NB: TLBs must be flushed and invalidated before a switch */
1749 	cs = intel_ring_begin(req, 6);
1750 	if (IS_ERR(cs))
1751 		return PTR_ERR(cs);
1752 
1753 	*cs++ = MI_LOAD_REGISTER_IMM(2);
1754 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1755 	*cs++ = PP_DIR_DCLV_2G;
1756 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1757 	*cs++ = get_pd_offset(ppgtt);
1758 	*cs++ = MI_NOOP;
1759 	intel_ring_advance(req, cs);
1760 
1761 	return 0;
1762 }
1763 
1764 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1765 			  struct drm_i915_gem_request *req)
1766 {
1767 	struct intel_engine_cs *engine = req->engine;
1768 	u32 *cs;
1769 
1770 	/* NB: TLBs must be flushed and invalidated before a switch */
1771 	cs = intel_ring_begin(req, 6);
1772 	if (IS_ERR(cs))
1773 		return PTR_ERR(cs);
1774 
1775 	*cs++ = MI_LOAD_REGISTER_IMM(2);
1776 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1777 	*cs++ = PP_DIR_DCLV_2G;
1778 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1779 	*cs++ = get_pd_offset(ppgtt);
1780 	*cs++ = MI_NOOP;
1781 	intel_ring_advance(req, cs);
1782 
1783 	return 0;
1784 }
1785 
1786 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1787 			  struct drm_i915_gem_request *req)
1788 {
1789 	struct intel_engine_cs *engine = req->engine;
1790 	struct drm_i915_private *dev_priv = req->i915;
1791 
1792 	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1793 	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1794 	return 0;
1795 }
1796 
1797 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1798 {
1799 	struct intel_engine_cs *engine;
1800 	enum intel_engine_id id;
1801 
1802 	for_each_engine(engine, dev_priv, id) {
1803 		u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1804 				 GEN8_GFX_PPGTT_48B : 0;
1805 		I915_WRITE(RING_MODE_GEN7(engine),
1806 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1807 	}
1808 }
1809 
1810 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1811 {
1812 	struct intel_engine_cs *engine;
1813 	u32 ecochk, ecobits;
1814 	enum intel_engine_id id;
1815 
1816 	ecobits = I915_READ(GAC_ECO_BITS);
1817 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1818 
1819 	ecochk = I915_READ(GAM_ECOCHK);
1820 	if (IS_HASWELL(dev_priv)) {
1821 		ecochk |= ECOCHK_PPGTT_WB_HSW;
1822 	} else {
1823 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1824 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1825 	}
1826 	I915_WRITE(GAM_ECOCHK, ecochk);
1827 
1828 	for_each_engine(engine, dev_priv, id) {
1829 		/* GFX_MODE is per-ring on gen7+ */
1830 		I915_WRITE(RING_MODE_GEN7(engine),
1831 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1832 	}
1833 }
1834 
1835 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1836 {
1837 	u32 ecochk, gab_ctl, ecobits;
1838 
1839 	ecobits = I915_READ(GAC_ECO_BITS);
1840 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1841 		   ECOBITS_PPGTT_CACHE64B);
1842 
1843 	gab_ctl = I915_READ(GAB_CTL);
1844 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1845 
1846 	ecochk = I915_READ(GAM_ECOCHK);
1847 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1848 
1849 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1850 }
1851 
1852 /* PPGTT support for Sandybdrige/Gen6 and later */
1853 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1854 				   u64 start, u64 length)
1855 {
1856 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1857 	unsigned int first_entry = start >> PAGE_SHIFT;
1858 	unsigned int pde = first_entry / GEN6_PTES;
1859 	unsigned int pte = first_entry % GEN6_PTES;
1860 	unsigned int num_entries = length >> PAGE_SHIFT;
1861 	gen6_pte_t scratch_pte =
1862 		vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1863 
1864 	while (num_entries) {
1865 		struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
1866 		unsigned int end = min(pte + num_entries, GEN6_PTES);
1867 		gen6_pte_t *vaddr;
1868 
1869 		num_entries -= end - pte;
1870 
1871 		/* Note that the hw doesn't support removing PDE on the fly
1872 		 * (they are cached inside the context with no means to
1873 		 * invalidate the cache), so we can only reset the PTE
1874 		 * entries back to scratch.
1875 		 */
1876 
1877 		vaddr = kmap_atomic_px(pt);
1878 		do {
1879 			vaddr[pte++] = scratch_pte;
1880 		} while (pte < end);
1881 		kunmap_atomic(vaddr);
1882 
1883 		pte = 0;
1884 	}
1885 }
1886 
1887 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1888 				      struct i915_vma *vma,
1889 				      enum i915_cache_level cache_level,
1890 				      u32 flags)
1891 {
1892 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1893 	unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1894 	unsigned act_pt = first_entry / GEN6_PTES;
1895 	unsigned act_pte = first_entry % GEN6_PTES;
1896 	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1897 	struct sgt_dma iter = sgt_dma(vma);
1898 	gen6_pte_t *vaddr;
1899 
1900 	vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1901 	do {
1902 		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1903 
1904 		iter.dma += PAGE_SIZE;
1905 		if (iter.dma == iter.max) {
1906 			iter.sg = __sg_next(iter.sg);
1907 			if (!iter.sg)
1908 				break;
1909 
1910 			iter.dma = sg_dma_address(iter.sg);
1911 			iter.max = iter.dma + iter.sg->length;
1912 		}
1913 
1914 		if (++act_pte == GEN6_PTES) {
1915 			kunmap_atomic(vaddr);
1916 			vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1917 			act_pte = 0;
1918 		}
1919 	} while (1);
1920 	kunmap_atomic(vaddr);
1921 
1922 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1923 }
1924 
1925 static int gen6_alloc_va_range(struct i915_address_space *vm,
1926 			       u64 start, u64 length)
1927 {
1928 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1929 	struct i915_page_table *pt;
1930 	u64 from = start;
1931 	unsigned int pde;
1932 	bool flush = false;
1933 
1934 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1935 		if (pt == vm->scratch_pt) {
1936 			pt = alloc_pt(vm);
1937 			if (IS_ERR(pt))
1938 				goto unwind_out;
1939 
1940 			gen6_initialize_pt(vm, pt);
1941 			ppgtt->pd.page_table[pde] = pt;
1942 			gen6_write_pde(ppgtt, pde, pt);
1943 			flush = true;
1944 		}
1945 	}
1946 
1947 	if (flush) {
1948 		mark_tlbs_dirty(ppgtt);
1949 		wmb();
1950 	}
1951 
1952 	return 0;
1953 
1954 unwind_out:
1955 	gen6_ppgtt_clear_range(vm, from, start);
1956 	return -ENOMEM;
1957 }
1958 
1959 static int gen6_init_scratch(struct i915_address_space *vm)
1960 {
1961 	int ret;
1962 
1963 	ret = setup_scratch_page(vm, I915_GFP_DMA);
1964 	if (ret)
1965 		return ret;
1966 
1967 	vm->scratch_pt = alloc_pt(vm);
1968 	if (IS_ERR(vm->scratch_pt)) {
1969 		cleanup_scratch_page(vm);
1970 		return PTR_ERR(vm->scratch_pt);
1971 	}
1972 
1973 	gen6_initialize_pt(vm, vm->scratch_pt);
1974 
1975 	return 0;
1976 }
1977 
1978 static void gen6_free_scratch(struct i915_address_space *vm)
1979 {
1980 	free_pt(vm, vm->scratch_pt);
1981 	cleanup_scratch_page(vm);
1982 }
1983 
1984 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1985 {
1986 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1987 	struct i915_page_directory *pd = &ppgtt->pd;
1988 	struct i915_page_table *pt;
1989 	u32 pde;
1990 
1991 	drm_mm_remove_node(&ppgtt->node);
1992 
1993 	gen6_for_all_pdes(pt, pd, pde)
1994 		if (pt != vm->scratch_pt)
1995 			free_pt(vm, pt);
1996 
1997 	gen6_free_scratch(vm);
1998 }
1999 
2000 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2001 {
2002 	struct i915_address_space *vm = &ppgtt->base;
2003 	struct drm_i915_private *dev_priv = ppgtt->base.i915;
2004 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2005 	int ret;
2006 
2007 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2008 	 * allocator works in address space sizes, so it's multiplied by page
2009 	 * size. We allocate at the top of the GTT to avoid fragmentation.
2010 	 */
2011 	BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2012 
2013 	ret = gen6_init_scratch(vm);
2014 	if (ret)
2015 		return ret;
2016 
2017 	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
2018 				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
2019 				  I915_COLOR_UNEVICTABLE,
2020 				  0, ggtt->base.total,
2021 				  PIN_HIGH);
2022 	if (ret)
2023 		goto err_out;
2024 
2025 	if (ppgtt->node.start < ggtt->mappable_end)
2026 		DRM_DEBUG("Forced to use aperture for PDEs\n");
2027 
2028 	ppgtt->pd.base.ggtt_offset =
2029 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2030 
2031 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2032 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2033 
2034 	return 0;
2035 
2036 err_out:
2037 	gen6_free_scratch(vm);
2038 	return ret;
2039 }
2040 
2041 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2042 {
2043 	return gen6_ppgtt_allocate_page_directories(ppgtt);
2044 }
2045 
2046 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2047 				  u64 start, u64 length)
2048 {
2049 	struct i915_page_table *unused;
2050 	u32 pde;
2051 
2052 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2053 		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2054 }
2055 
2056 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2057 {
2058 	struct drm_i915_private *dev_priv = ppgtt->base.i915;
2059 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2060 	int ret;
2061 
2062 	ppgtt->base.pte_encode = ggtt->base.pte_encode;
2063 	if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
2064 		ppgtt->switch_mm = gen6_mm_switch;
2065 	else if (IS_HASWELL(dev_priv))
2066 		ppgtt->switch_mm = hsw_mm_switch;
2067 	else if (IS_GEN7(dev_priv))
2068 		ppgtt->switch_mm = gen7_mm_switch;
2069 	else
2070 		BUG();
2071 
2072 	ret = gen6_ppgtt_alloc(ppgtt);
2073 	if (ret)
2074 		return ret;
2075 
2076 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2077 
2078 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2079 	gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
2080 
2081 	ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
2082 	if (ret) {
2083 		gen6_ppgtt_cleanup(&ppgtt->base);
2084 		return ret;
2085 	}
2086 
2087 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2088 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2089 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2090 	ppgtt->base.bind_vma = ppgtt_bind_vma;
2091 	ppgtt->base.set_pages = ppgtt_set_pages;
2092 	ppgtt->base.clear_pages = clear_pages;
2093 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2094 	ppgtt->debug_dump = gen6_dump_ppgtt;
2095 
2096 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2097 			 ppgtt->node.size >> 20,
2098 			 ppgtt->node.start / PAGE_SIZE);
2099 
2100 	DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
2101 			 ppgtt->pd.base.ggtt_offset << 10);
2102 
2103 	return 0;
2104 }
2105 
2106 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2107 			   struct drm_i915_private *dev_priv)
2108 {
2109 	ppgtt->base.i915 = dev_priv;
2110 	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
2111 
2112 	if (INTEL_INFO(dev_priv)->gen < 8)
2113 		return gen6_ppgtt_init(ppgtt);
2114 	else
2115 		return gen8_ppgtt_init(ppgtt);
2116 }
2117 
2118 static void i915_address_space_init(struct i915_address_space *vm,
2119 				    struct drm_i915_private *dev_priv,
2120 				    const char *name)
2121 {
2122 	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
2123 
2124 	drm_mm_init(&vm->mm, 0, vm->total);
2125 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2126 
2127 	INIT_LIST_HEAD(&vm->active_list);
2128 	INIT_LIST_HEAD(&vm->inactive_list);
2129 	INIT_LIST_HEAD(&vm->unbound_list);
2130 
2131 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
2132 	pagevec_init(&vm->free_pages);
2133 }
2134 
2135 static void i915_address_space_fini(struct i915_address_space *vm)
2136 {
2137 	if (pagevec_count(&vm->free_pages))
2138 		vm_free_pages_release(vm, true);
2139 
2140 	i915_gem_timeline_fini(&vm->timeline);
2141 	drm_mm_takedown(&vm->mm);
2142 	list_del(&vm->global_link);
2143 }
2144 
2145 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2146 {
2147 	/* This function is for gtt related workarounds. This function is
2148 	 * called on driver load and after a GPU reset, so you can place
2149 	 * workarounds here even if they get overwritten by GPU reset.
2150 	 */
2151 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
2152 	if (IS_BROADWELL(dev_priv))
2153 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2154 	else if (IS_CHERRYVIEW(dev_priv))
2155 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2156 	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
2157 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2158 	else if (IS_GEN9_LP(dev_priv))
2159 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2160 
2161 	/*
2162 	 * To support 64K PTEs we need to first enable the use of the
2163 	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2164 	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2165 	 * shouldn't be needed after GEN10.
2166 	 *
2167 	 * 64K pages were first introduced from BDW+, although technically they
2168 	 * only *work* from gen9+. For pre-BDW we instead have the option for
2169 	 * 32K pages, but we don't currently have any support for it in our
2170 	 * driver.
2171 	 */
2172 	if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2173 	    INTEL_GEN(dev_priv) <= 10)
2174 		I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2175 			   I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2176 			   GAMW_ECO_ENABLE_64K_IPS_FIELD);
2177 }
2178 
2179 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2180 {
2181 	gtt_write_workarounds(dev_priv);
2182 
2183 	/* In the case of execlists, PPGTT is enabled by the context descriptor
2184 	 * and the PDPs are contained within the context itself.  We don't
2185 	 * need to do anything here. */
2186 	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
2187 		return 0;
2188 
2189 	if (!USES_PPGTT(dev_priv))
2190 		return 0;
2191 
2192 	if (IS_GEN6(dev_priv))
2193 		gen6_ppgtt_enable(dev_priv);
2194 	else if (IS_GEN7(dev_priv))
2195 		gen7_ppgtt_enable(dev_priv);
2196 	else if (INTEL_GEN(dev_priv) >= 8)
2197 		gen8_ppgtt_enable(dev_priv);
2198 	else
2199 		MISSING_CASE(INTEL_GEN(dev_priv));
2200 
2201 	return 0;
2202 }
2203 
2204 struct i915_hw_ppgtt *
2205 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2206 		  struct drm_i915_file_private *fpriv,
2207 		  const char *name)
2208 {
2209 	struct i915_hw_ppgtt *ppgtt;
2210 	int ret;
2211 
2212 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2213 	if (!ppgtt)
2214 		return ERR_PTR(-ENOMEM);
2215 
2216 	ret = __hw_ppgtt_init(ppgtt, dev_priv);
2217 	if (ret) {
2218 		kfree(ppgtt);
2219 		return ERR_PTR(ret);
2220 	}
2221 
2222 	kref_init(&ppgtt->ref);
2223 	i915_address_space_init(&ppgtt->base, dev_priv, name);
2224 	ppgtt->base.file = fpriv;
2225 
2226 	trace_i915_ppgtt_create(&ppgtt->base);
2227 
2228 	return ppgtt;
2229 }
2230 
2231 void i915_ppgtt_close(struct i915_address_space *vm)
2232 {
2233 	struct list_head *phases[] = {
2234 		&vm->active_list,
2235 		&vm->inactive_list,
2236 		&vm->unbound_list,
2237 		NULL,
2238 	}, **phase;
2239 
2240 	GEM_BUG_ON(vm->closed);
2241 	vm->closed = true;
2242 
2243 	for (phase = phases; *phase; phase++) {
2244 		struct i915_vma *vma, *vn;
2245 
2246 		list_for_each_entry_safe(vma, vn, *phase, vm_link)
2247 			if (!i915_vma_is_closed(vma))
2248 				i915_vma_close(vma);
2249 	}
2250 }
2251 
2252 void i915_ppgtt_release(struct kref *kref)
2253 {
2254 	struct i915_hw_ppgtt *ppgtt =
2255 		container_of(kref, struct i915_hw_ppgtt, ref);
2256 
2257 	trace_i915_ppgtt_release(&ppgtt->base);
2258 
2259 	/* vmas should already be unbound and destroyed */
2260 	WARN_ON(!list_empty(&ppgtt->base.active_list));
2261 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2262 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2263 
2264 	ppgtt->base.cleanup(&ppgtt->base);
2265 	i915_address_space_fini(&ppgtt->base);
2266 	kfree(ppgtt);
2267 }
2268 
2269 /* Certain Gen5 chipsets require require idling the GPU before
2270  * unmapping anything from the GTT when VT-d is enabled.
2271  */
2272 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2273 {
2274 	/* Query intel_iommu to see if we need the workaround. Presumably that
2275 	 * was loaded first.
2276 	 */
2277 	return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2278 }
2279 
2280 static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv)
2281 {
2282 	struct intel_engine_cs *engine;
2283 	enum intel_engine_id id;
2284 	u32 fault;
2285 
2286 	for_each_engine(engine, dev_priv, id) {
2287 		fault = I915_READ(RING_FAULT_REG(engine));
2288 		if (fault & RING_FAULT_VALID) {
2289 			DRM_DEBUG_DRIVER("Unexpected fault\n"
2290 					 "\tAddr: 0x%08lx\n"
2291 					 "\tAddress space: %s\n"
2292 					 "\tSource ID: %d\n"
2293 					 "\tType: %d\n",
2294 					 fault & PAGE_MASK,
2295 					 fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2296 					 RING_FAULT_SRCID(fault),
2297 					 RING_FAULT_FAULT_TYPE(fault));
2298 			I915_WRITE(RING_FAULT_REG(engine),
2299 				   fault & ~RING_FAULT_VALID);
2300 		}
2301 	}
2302 
2303 	POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2304 }
2305 
2306 static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv)
2307 {
2308 	u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2309 
2310 	if (fault & RING_FAULT_VALID) {
2311 		u32 fault_data0, fault_data1;
2312 		u64 fault_addr;
2313 
2314 		fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
2315 		fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
2316 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
2317 			     ((u64)fault_data0 << 12);
2318 
2319 		DRM_DEBUG_DRIVER("Unexpected fault\n"
2320 				 "\tAddr: 0x%08x_%08x\n"
2321 				 "\tAddress space: %s\n"
2322 				 "\tEngine ID: %d\n"
2323 				 "\tSource ID: %d\n"
2324 				 "\tType: %d\n",
2325 				 upper_32_bits(fault_addr),
2326 				 lower_32_bits(fault_addr),
2327 				 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
2328 				 GEN8_RING_FAULT_ENGINE_ID(fault),
2329 				 RING_FAULT_SRCID(fault),
2330 				 RING_FAULT_FAULT_TYPE(fault));
2331 		I915_WRITE(GEN8_RING_FAULT_REG,
2332 			   fault & ~RING_FAULT_VALID);
2333 	}
2334 
2335 	POSTING_READ(GEN8_RING_FAULT_REG);
2336 }
2337 
2338 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2339 {
2340 	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
2341 	if (INTEL_GEN(dev_priv) >= 8)
2342 		gen8_check_and_clear_faults(dev_priv);
2343 	else if (INTEL_GEN(dev_priv) >= 6)
2344 		gen6_check_and_clear_faults(dev_priv);
2345 	else
2346 		return;
2347 }
2348 
2349 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2350 {
2351 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2352 
2353 	/* Don't bother messing with faults pre GEN6 as we have little
2354 	 * documentation supporting that it's a good idea.
2355 	 */
2356 	if (INTEL_GEN(dev_priv) < 6)
2357 		return;
2358 
2359 	i915_check_and_clear_faults(dev_priv);
2360 
2361 	ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
2362 
2363 	i915_ggtt_invalidate(dev_priv);
2364 }
2365 
2366 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2367 			       struct sg_table *pages)
2368 {
2369 	do {
2370 		if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2371 				     pages->sgl, pages->nents,
2372 				     PCI_DMA_BIDIRECTIONAL,
2373 				     DMA_ATTR_NO_WARN))
2374 			return 0;
2375 
2376 		/* If the DMA remap fails, one cause can be that we have
2377 		 * too many objects pinned in a small remapping table,
2378 		 * such as swiotlb. Incrementally purge all other objects and
2379 		 * try again - if there are no more pages to remove from
2380 		 * the DMA remapper, i915_gem_shrink will return 0.
2381 		 */
2382 		GEM_BUG_ON(obj->mm.pages == pages);
2383 	} while (i915_gem_shrink(to_i915(obj->base.dev),
2384 				 obj->base.size >> PAGE_SHIFT, NULL,
2385 				 I915_SHRINK_BOUND |
2386 				 I915_SHRINK_UNBOUND |
2387 				 I915_SHRINK_ACTIVE));
2388 
2389 	return -ENOSPC;
2390 }
2391 
2392 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2393 {
2394 	writeq(pte, addr);
2395 }
2396 
2397 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2398 				  dma_addr_t addr,
2399 				  u64 offset,
2400 				  enum i915_cache_level level,
2401 				  u32 unused)
2402 {
2403 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2404 	gen8_pte_t __iomem *pte =
2405 		(gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2406 
2407 	gen8_set_pte(pte, gen8_pte_encode(addr, level));
2408 
2409 	ggtt->invalidate(vm->i915);
2410 }
2411 
2412 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2413 				     struct i915_vma *vma,
2414 				     enum i915_cache_level level,
2415 				     u32 unused)
2416 {
2417 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2418 	struct sgt_iter sgt_iter;
2419 	gen8_pte_t __iomem *gtt_entries;
2420 	const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
2421 	dma_addr_t addr;
2422 
2423 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2424 	gtt_entries += vma->node.start >> PAGE_SHIFT;
2425 	for_each_sgt_dma(addr, sgt_iter, vma->pages)
2426 		gen8_set_pte(gtt_entries++, pte_encode | addr);
2427 
2428 	wmb();
2429 
2430 	/* This next bit makes the above posting read even more important. We
2431 	 * want to flush the TLBs only after we're certain all the PTE updates
2432 	 * have finished.
2433 	 */
2434 	ggtt->invalidate(vm->i915);
2435 }
2436 
2437 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2438 				  dma_addr_t addr,
2439 				  u64 offset,
2440 				  enum i915_cache_level level,
2441 				  u32 flags)
2442 {
2443 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2444 	gen6_pte_t __iomem *pte =
2445 		(gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2446 
2447 	iowrite32(vm->pte_encode(addr, level, flags), pte);
2448 
2449 	ggtt->invalidate(vm->i915);
2450 }
2451 
2452 /*
2453  * Binds an object into the global gtt with the specified cache level. The object
2454  * will be accessible to the GPU via commands whose operands reference offsets
2455  * within the global GTT as well as accessible by the GPU through the GMADR
2456  * mapped BAR (dev_priv->mm.gtt->gtt).
2457  */
2458 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2459 				     struct i915_vma *vma,
2460 				     enum i915_cache_level level,
2461 				     u32 flags)
2462 {
2463 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2464 	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2465 	unsigned int i = vma->node.start >> PAGE_SHIFT;
2466 	struct sgt_iter iter;
2467 	dma_addr_t addr;
2468 	for_each_sgt_dma(addr, iter, vma->pages)
2469 		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2470 	wmb();
2471 
2472 	/* This next bit makes the above posting read even more important. We
2473 	 * want to flush the TLBs only after we're certain all the PTE updates
2474 	 * have finished.
2475 	 */
2476 	ggtt->invalidate(vm->i915);
2477 }
2478 
2479 static void nop_clear_range(struct i915_address_space *vm,
2480 			    u64 start, u64 length)
2481 {
2482 }
2483 
2484 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2485 				  u64 start, u64 length)
2486 {
2487 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2488 	unsigned first_entry = start >> PAGE_SHIFT;
2489 	unsigned num_entries = length >> PAGE_SHIFT;
2490 	const gen8_pte_t scratch_pte =
2491 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
2492 	gen8_pte_t __iomem *gtt_base =
2493 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2494 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2495 	int i;
2496 
2497 	if (WARN(num_entries > max_entries,
2498 		 "First entry = %d; Num entries = %d (max=%d)\n",
2499 		 first_entry, num_entries, max_entries))
2500 		num_entries = max_entries;
2501 
2502 	for (i = 0; i < num_entries; i++)
2503 		gen8_set_pte(&gtt_base[i], scratch_pte);
2504 }
2505 
2506 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2507 {
2508 	struct drm_i915_private *dev_priv = vm->i915;
2509 
2510 	/*
2511 	 * Make sure the internal GAM fifo has been cleared of all GTT
2512 	 * writes before exiting stop_machine(). This guarantees that
2513 	 * any aperture accesses waiting to start in another process
2514 	 * cannot back up behind the GTT writes causing a hang.
2515 	 * The register can be any arbitrary GAM register.
2516 	 */
2517 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2518 }
2519 
2520 struct insert_page {
2521 	struct i915_address_space *vm;
2522 	dma_addr_t addr;
2523 	u64 offset;
2524 	enum i915_cache_level level;
2525 };
2526 
2527 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2528 {
2529 	struct insert_page *arg = _arg;
2530 
2531 	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2532 	bxt_vtd_ggtt_wa(arg->vm);
2533 
2534 	return 0;
2535 }
2536 
2537 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2538 					  dma_addr_t addr,
2539 					  u64 offset,
2540 					  enum i915_cache_level level,
2541 					  u32 unused)
2542 {
2543 	struct insert_page arg = { vm, addr, offset, level };
2544 
2545 	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2546 }
2547 
2548 struct insert_entries {
2549 	struct i915_address_space *vm;
2550 	struct i915_vma *vma;
2551 	enum i915_cache_level level;
2552 };
2553 
2554 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2555 {
2556 	struct insert_entries *arg = _arg;
2557 
2558 	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
2559 	bxt_vtd_ggtt_wa(arg->vm);
2560 
2561 	return 0;
2562 }
2563 
2564 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2565 					     struct i915_vma *vma,
2566 					     enum i915_cache_level level,
2567 					     u32 unused)
2568 {
2569 	struct insert_entries arg = { vm, vma, level };
2570 
2571 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2572 }
2573 
2574 struct clear_range {
2575 	struct i915_address_space *vm;
2576 	u64 start;
2577 	u64 length;
2578 };
2579 
2580 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2581 {
2582 	struct clear_range *arg = _arg;
2583 
2584 	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2585 	bxt_vtd_ggtt_wa(arg->vm);
2586 
2587 	return 0;
2588 }
2589 
2590 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2591 					  u64 start,
2592 					  u64 length)
2593 {
2594 	struct clear_range arg = { vm, start, length };
2595 
2596 	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2597 }
2598 
2599 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2600 				  u64 start, u64 length)
2601 {
2602 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2603 	unsigned first_entry = start >> PAGE_SHIFT;
2604 	unsigned num_entries = length >> PAGE_SHIFT;
2605 	gen6_pte_t scratch_pte, __iomem *gtt_base =
2606 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2607 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2608 	int i;
2609 
2610 	if (WARN(num_entries > max_entries,
2611 		 "First entry = %d; Num entries = %d (max=%d)\n",
2612 		 first_entry, num_entries, max_entries))
2613 		num_entries = max_entries;
2614 
2615 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2616 				     I915_CACHE_LLC, 0);
2617 
2618 	for (i = 0; i < num_entries; i++)
2619 		iowrite32(scratch_pte, &gtt_base[i]);
2620 }
2621 
2622 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2623 				  dma_addr_t addr,
2624 				  u64 offset,
2625 				  enum i915_cache_level cache_level,
2626 				  u32 unused)
2627 {
2628 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2629 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2630 
2631 	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2632 }
2633 
2634 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2635 				     struct i915_vma *vma,
2636 				     enum i915_cache_level cache_level,
2637 				     u32 unused)
2638 {
2639 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2640 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2641 
2642 	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2643 				    flags);
2644 }
2645 
2646 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2647 				  u64 start, u64 length)
2648 {
2649 	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2650 }
2651 
2652 static int ggtt_bind_vma(struct i915_vma *vma,
2653 			 enum i915_cache_level cache_level,
2654 			 u32 flags)
2655 {
2656 	struct drm_i915_private *i915 = vma->vm->i915;
2657 	struct drm_i915_gem_object *obj = vma->obj;
2658 	u32 pte_flags;
2659 
2660 	/* Currently applicable only to VLV */
2661 	pte_flags = 0;
2662 	if (obj->gt_ro)
2663 		pte_flags |= PTE_READ_ONLY;
2664 
2665 	intel_runtime_pm_get(i915);
2666 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2667 	intel_runtime_pm_put(i915);
2668 
2669 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2670 
2671 	/*
2672 	 * Without aliasing PPGTT there's no difference between
2673 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2674 	 * upgrade to both bound if we bind either to avoid double-binding.
2675 	 */
2676 	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2677 
2678 	return 0;
2679 }
2680 
2681 static void ggtt_unbind_vma(struct i915_vma *vma)
2682 {
2683 	struct drm_i915_private *i915 = vma->vm->i915;
2684 
2685 	intel_runtime_pm_get(i915);
2686 	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2687 	intel_runtime_pm_put(i915);
2688 }
2689 
2690 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2691 				 enum i915_cache_level cache_level,
2692 				 u32 flags)
2693 {
2694 	struct drm_i915_private *i915 = vma->vm->i915;
2695 	u32 pte_flags;
2696 	int ret;
2697 
2698 	/* Currently applicable only to VLV */
2699 	pte_flags = 0;
2700 	if (vma->obj->gt_ro)
2701 		pte_flags |= PTE_READ_ONLY;
2702 
2703 	if (flags & I915_VMA_LOCAL_BIND) {
2704 		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2705 
2706 		if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2707 		    appgtt->base.allocate_va_range) {
2708 			ret = appgtt->base.allocate_va_range(&appgtt->base,
2709 							     vma->node.start,
2710 							     vma->size);
2711 			if (ret)
2712 				return ret;
2713 		}
2714 
2715 		appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
2716 					    pte_flags);
2717 	}
2718 
2719 	if (flags & I915_VMA_GLOBAL_BIND) {
2720 		intel_runtime_pm_get(i915);
2721 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2722 		intel_runtime_pm_put(i915);
2723 	}
2724 
2725 	return 0;
2726 }
2727 
2728 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2729 {
2730 	struct drm_i915_private *i915 = vma->vm->i915;
2731 
2732 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
2733 		intel_runtime_pm_get(i915);
2734 		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2735 		intel_runtime_pm_put(i915);
2736 	}
2737 
2738 	if (vma->flags & I915_VMA_LOCAL_BIND) {
2739 		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
2740 
2741 		vm->clear_range(vm, vma->node.start, vma->size);
2742 	}
2743 }
2744 
2745 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2746 			       struct sg_table *pages)
2747 {
2748 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2749 	struct device *kdev = &dev_priv->drm.pdev->dev;
2750 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2751 
2752 	if (unlikely(ggtt->do_idle_maps)) {
2753 		if (i915_gem_wait_for_idle(dev_priv, 0)) {
2754 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2755 			/* Wait a bit, in hopes it avoids the hang */
2756 			udelay(10);
2757 		}
2758 	}
2759 
2760 	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2761 }
2762 
2763 static int ggtt_set_pages(struct i915_vma *vma)
2764 {
2765 	int ret;
2766 
2767 	GEM_BUG_ON(vma->pages);
2768 
2769 	ret = i915_get_ggtt_vma_pages(vma);
2770 	if (ret)
2771 		return ret;
2772 
2773 	vma->page_sizes = vma->obj->mm.page_sizes;
2774 
2775 	return 0;
2776 }
2777 
2778 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2779 				  unsigned long color,
2780 				  u64 *start,
2781 				  u64 *end)
2782 {
2783 	if (node->allocated && node->color != color)
2784 		*start += I915_GTT_PAGE_SIZE;
2785 
2786 	/* Also leave a space between the unallocated reserved node after the
2787 	 * GTT and any objects within the GTT, i.e. we use the color adjustment
2788 	 * to insert a guard page to prevent prefetches crossing over the
2789 	 * GTT boundary.
2790 	 */
2791 	node = list_next_entry(node, node_list);
2792 	if (node->color != color)
2793 		*end -= I915_GTT_PAGE_SIZE;
2794 }
2795 
2796 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2797 {
2798 	struct i915_ggtt *ggtt = &i915->ggtt;
2799 	struct i915_hw_ppgtt *ppgtt;
2800 	int err;
2801 
2802 	ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
2803 	if (IS_ERR(ppgtt))
2804 		return PTR_ERR(ppgtt);
2805 
2806 	if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
2807 		err = -ENODEV;
2808 		goto err_ppgtt;
2809 	}
2810 
2811 	if (ppgtt->base.allocate_va_range) {
2812 		/* Note we only pre-allocate as far as the end of the global
2813 		 * GTT. On 48b / 4-level page-tables, the difference is very,
2814 		 * very significant! We have to preallocate as GVT/vgpu does
2815 		 * not like the page directory disappearing.
2816 		 */
2817 		err = ppgtt->base.allocate_va_range(&ppgtt->base,
2818 						    0, ggtt->base.total);
2819 		if (err)
2820 			goto err_ppgtt;
2821 	}
2822 
2823 	i915->mm.aliasing_ppgtt = ppgtt;
2824 
2825 	WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2826 	ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2827 
2828 	WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
2829 	ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
2830 
2831 	return 0;
2832 
2833 err_ppgtt:
2834 	i915_ppgtt_put(ppgtt);
2835 	return err;
2836 }
2837 
2838 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2839 {
2840 	struct i915_ggtt *ggtt = &i915->ggtt;
2841 	struct i915_hw_ppgtt *ppgtt;
2842 
2843 	ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2844 	if (!ppgtt)
2845 		return;
2846 
2847 	i915_ppgtt_put(ppgtt);
2848 
2849 	ggtt->base.bind_vma = ggtt_bind_vma;
2850 	ggtt->base.unbind_vma = ggtt_unbind_vma;
2851 }
2852 
2853 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2854 {
2855 	/* Let GEM Manage all of the aperture.
2856 	 *
2857 	 * However, leave one page at the end still bound to the scratch page.
2858 	 * There are a number of places where the hardware apparently prefetches
2859 	 * past the end of the object, and we've seen multiple hangs with the
2860 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2861 	 * aperture.  One page should be enough to keep any prefetching inside
2862 	 * of the aperture.
2863 	 */
2864 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2865 	unsigned long hole_start, hole_end;
2866 	struct drm_mm_node *entry;
2867 	int ret;
2868 
2869 	ret = intel_vgt_balloon(dev_priv);
2870 	if (ret)
2871 		return ret;
2872 
2873 	/* Reserve a mappable slot for our lockless error capture */
2874 	ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
2875 					  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2876 					  0, ggtt->mappable_end,
2877 					  DRM_MM_INSERT_LOW);
2878 	if (ret)
2879 		return ret;
2880 
2881 	/* Clear any non-preallocated blocks */
2882 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2883 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2884 			      hole_start, hole_end);
2885 		ggtt->base.clear_range(&ggtt->base, hole_start,
2886 				       hole_end - hole_start);
2887 	}
2888 
2889 	/* And finally clear the reserved guard page */
2890 	ggtt->base.clear_range(&ggtt->base,
2891 			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2892 
2893 	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2894 		ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2895 		if (ret)
2896 			goto err;
2897 	}
2898 
2899 	return 0;
2900 
2901 err:
2902 	drm_mm_remove_node(&ggtt->error_capture);
2903 	return ret;
2904 }
2905 
2906 /**
2907  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2908  * @dev_priv: i915 device
2909  */
2910 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2911 {
2912 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2913 	struct i915_vma *vma, *vn;
2914 	struct pagevec *pvec;
2915 
2916 	ggtt->base.closed = true;
2917 
2918 	mutex_lock(&dev_priv->drm.struct_mutex);
2919 	WARN_ON(!list_empty(&ggtt->base.active_list));
2920 	list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
2921 		WARN_ON(i915_vma_unbind(vma));
2922 	mutex_unlock(&dev_priv->drm.struct_mutex);
2923 
2924 	i915_gem_cleanup_stolen(&dev_priv->drm);
2925 
2926 	mutex_lock(&dev_priv->drm.struct_mutex);
2927 	i915_gem_fini_aliasing_ppgtt(dev_priv);
2928 
2929 	if (drm_mm_node_allocated(&ggtt->error_capture))
2930 		drm_mm_remove_node(&ggtt->error_capture);
2931 
2932 	if (drm_mm_initialized(&ggtt->base.mm)) {
2933 		intel_vgt_deballoon(dev_priv);
2934 		i915_address_space_fini(&ggtt->base);
2935 	}
2936 
2937 	ggtt->base.cleanup(&ggtt->base);
2938 
2939 	pvec = &dev_priv->mm.wc_stash;
2940 	if (pvec->nr) {
2941 		set_pages_array_wb(pvec->pages, pvec->nr);
2942 		__pagevec_release(pvec);
2943 	}
2944 
2945 	mutex_unlock(&dev_priv->drm.struct_mutex);
2946 
2947 	arch_phys_wc_del(ggtt->mtrr);
2948 	io_mapping_fini(&ggtt->iomap);
2949 }
2950 
2951 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2952 {
2953 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2954 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2955 	return snb_gmch_ctl << 20;
2956 }
2957 
2958 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2959 {
2960 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2961 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2962 	if (bdw_gmch_ctl)
2963 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2964 
2965 #ifdef CONFIG_X86_32
2966 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2967 	if (bdw_gmch_ctl > 4)
2968 		bdw_gmch_ctl = 4;
2969 #endif
2970 
2971 	return bdw_gmch_ctl << 20;
2972 }
2973 
2974 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2975 {
2976 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2977 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2978 
2979 	if (gmch_ctrl)
2980 		return 1 << (20 + gmch_ctrl);
2981 
2982 	return 0;
2983 }
2984 
2985 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2986 {
2987 	struct drm_i915_private *dev_priv = ggtt->base.i915;
2988 	struct pci_dev *pdev = dev_priv->drm.pdev;
2989 	phys_addr_t phys_addr;
2990 	int ret;
2991 
2992 	/* For Modern GENs the PTEs and register space are split in the BAR */
2993 	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2994 
2995 	/*
2996 	 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2997 	 * will be dropped. For WC mappings in general we have 64 byte burst
2998 	 * writes when the WC buffer is flushed, so we can't use it, but have to
2999 	 * resort to an uncached mapping. The WC issue is easily caught by the
3000 	 * readback check when writing GTT PTE entries.
3001 	 */
3002 	if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3003 		ggtt->gsm = ioremap_nocache(phys_addr, size);
3004 	else
3005 		ggtt->gsm = ioremap_wc(phys_addr, size);
3006 	if (!ggtt->gsm) {
3007 		DRM_ERROR("Failed to map the ggtt page table\n");
3008 		return -ENOMEM;
3009 	}
3010 
3011 	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
3012 	if (ret) {
3013 		DRM_ERROR("Scratch setup failed\n");
3014 		/* iounmap will also get called at remove, but meh */
3015 		iounmap(ggtt->gsm);
3016 		return ret;
3017 	}
3018 
3019 	return 0;
3020 }
3021 
3022 static struct intel_ppat_entry *
3023 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3024 {
3025 	struct intel_ppat_entry *entry = &ppat->entries[index];
3026 
3027 	GEM_BUG_ON(index >= ppat->max_entries);
3028 	GEM_BUG_ON(test_bit(index, ppat->used));
3029 
3030 	entry->ppat = ppat;
3031 	entry->value = value;
3032 	kref_init(&entry->ref);
3033 	set_bit(index, ppat->used);
3034 	set_bit(index, ppat->dirty);
3035 
3036 	return entry;
3037 }
3038 
3039 static void __free_ppat_entry(struct intel_ppat_entry *entry)
3040 {
3041 	struct intel_ppat *ppat = entry->ppat;
3042 	unsigned int index = entry - ppat->entries;
3043 
3044 	GEM_BUG_ON(index >= ppat->max_entries);
3045 	GEM_BUG_ON(!test_bit(index, ppat->used));
3046 
3047 	entry->value = ppat->clear_value;
3048 	clear_bit(index, ppat->used);
3049 	set_bit(index, ppat->dirty);
3050 }
3051 
3052 /**
3053  * intel_ppat_get - get a usable PPAT entry
3054  * @i915: i915 device instance
3055  * @value: the PPAT value required by the caller
3056  *
3057  * The function tries to search if there is an existing PPAT entry which
3058  * matches with the required value. If perfectly matched, the existing PPAT
3059  * entry will be used. If only partially matched, it will try to check if
3060  * there is any available PPAT index. If yes, it will allocate a new PPAT
3061  * index for the required entry and update the HW. If not, the partially
3062  * matched entry will be used.
3063  */
3064 const struct intel_ppat_entry *
3065 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3066 {
3067 	struct intel_ppat *ppat = &i915->ppat;
3068 	struct intel_ppat_entry *entry = NULL;
3069 	unsigned int scanned, best_score;
3070 	int i;
3071 
3072 	GEM_BUG_ON(!ppat->max_entries);
3073 
3074 	scanned = best_score = 0;
3075 	for_each_set_bit(i, ppat->used, ppat->max_entries) {
3076 		unsigned int score;
3077 
3078 		score = ppat->match(ppat->entries[i].value, value);
3079 		if (score > best_score) {
3080 			entry = &ppat->entries[i];
3081 			if (score == INTEL_PPAT_PERFECT_MATCH) {
3082 				kref_get(&entry->ref);
3083 				return entry;
3084 			}
3085 			best_score = score;
3086 		}
3087 		scanned++;
3088 	}
3089 
3090 	if (scanned == ppat->max_entries) {
3091 		if (!entry)
3092 			return ERR_PTR(-ENOSPC);
3093 
3094 		kref_get(&entry->ref);
3095 		return entry;
3096 	}
3097 
3098 	i = find_first_zero_bit(ppat->used, ppat->max_entries);
3099 	entry = __alloc_ppat_entry(ppat, i, value);
3100 	ppat->update_hw(i915);
3101 	return entry;
3102 }
3103 
3104 static void release_ppat(struct kref *kref)
3105 {
3106 	struct intel_ppat_entry *entry =
3107 		container_of(kref, struct intel_ppat_entry, ref);
3108 	struct drm_i915_private *i915 = entry->ppat->i915;
3109 
3110 	__free_ppat_entry(entry);
3111 	entry->ppat->update_hw(i915);
3112 }
3113 
3114 /**
3115  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3116  * @entry: an intel PPAT entry
3117  *
3118  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3119  * entry is dynamically allocated, its reference count will be decreased. Once
3120  * the reference count becomes into zero, the PPAT index becomes free again.
3121  */
3122 void intel_ppat_put(const struct intel_ppat_entry *entry)
3123 {
3124 	struct intel_ppat *ppat = entry->ppat;
3125 	unsigned int index = entry - ppat->entries;
3126 
3127 	GEM_BUG_ON(!ppat->max_entries);
3128 
3129 	kref_put(&ppat->entries[index].ref, release_ppat);
3130 }
3131 
3132 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3133 {
3134 	struct intel_ppat *ppat = &dev_priv->ppat;
3135 	int i;
3136 
3137 	for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3138 		I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3139 		clear_bit(i, ppat->dirty);
3140 	}
3141 }
3142 
3143 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3144 {
3145 	struct intel_ppat *ppat = &dev_priv->ppat;
3146 	u64 pat = 0;
3147 	int i;
3148 
3149 	for (i = 0; i < ppat->max_entries; i++)
3150 		pat |= GEN8_PPAT(i, ppat->entries[i].value);
3151 
3152 	bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3153 
3154 	I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3155 	I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3156 }
3157 
3158 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3159 {
3160 	unsigned int score = 0;
3161 	enum {
3162 		AGE_MATCH = BIT(0),
3163 		TC_MATCH = BIT(1),
3164 		CA_MATCH = BIT(2),
3165 	};
3166 
3167 	/* Cache attribute has to be matched. */
3168 	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3169 		return 0;
3170 
3171 	score |= CA_MATCH;
3172 
3173 	if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3174 		score |= TC_MATCH;
3175 
3176 	if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3177 		score |= AGE_MATCH;
3178 
3179 	if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3180 		return INTEL_PPAT_PERFECT_MATCH;
3181 
3182 	return score;
3183 }
3184 
3185 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3186 {
3187 	return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3188 		INTEL_PPAT_PERFECT_MATCH : 0;
3189 }
3190 
3191 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3192 {
3193 	ppat->max_entries = 8;
3194 	ppat->update_hw = cnl_private_pat_update_hw;
3195 	ppat->match = bdw_private_pat_match;
3196 	ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3197 
3198 	__alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3199 	__alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3200 	__alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3201 	__alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3202 	__alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3203 	__alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3204 	__alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3205 	__alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3206 }
3207 
3208 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3209  * bits. When using advanced contexts each context stores its own PAT, but
3210  * writing this data shouldn't be harmful even in those cases. */
3211 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3212 {
3213 	ppat->max_entries = 8;
3214 	ppat->update_hw = bdw_private_pat_update_hw;
3215 	ppat->match = bdw_private_pat_match;
3216 	ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3217 
3218 	if (!USES_PPGTT(ppat->i915)) {
3219 		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3220 		 * so RTL will always use the value corresponding to
3221 		 * pat_sel = 000".
3222 		 * So let's disable cache for GGTT to avoid screen corruptions.
3223 		 * MOCS still can be used though.
3224 		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3225 		 * before this patch, i.e. the same uncached + snooping access
3226 		 * like on gen6/7 seems to be in effect.
3227 		 * - So this just fixes blitter/render access. Again it looks
3228 		 * like it's not just uncached access, but uncached + snooping.
3229 		 * So we can still hold onto all our assumptions wrt cpu
3230 		 * clflushing on LLC machines.
3231 		 */
3232 		__alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3233 		return;
3234 	}
3235 
3236 	__alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3237 	__alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3238 	__alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3239 	__alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3240 	__alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3241 	__alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3242 	__alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3243 	__alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3244 }
3245 
3246 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3247 {
3248 	ppat->max_entries = 8;
3249 	ppat->update_hw = bdw_private_pat_update_hw;
3250 	ppat->match = chv_private_pat_match;
3251 	ppat->clear_value = CHV_PPAT_SNOOP;
3252 
3253 	/*
3254 	 * Map WB on BDW to snooped on CHV.
3255 	 *
3256 	 * Only the snoop bit has meaning for CHV, the rest is
3257 	 * ignored.
3258 	 *
3259 	 * The hardware will never snoop for certain types of accesses:
3260 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3261 	 * - PPGTT page tables
3262 	 * - some other special cycles
3263 	 *
3264 	 * As with BDW, we also need to consider the following for GT accesses:
3265 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3266 	 * so RTL will always use the value corresponding to
3267 	 * pat_sel = 000".
3268 	 * Which means we must set the snoop bit in PAT entry 0
3269 	 * in order to keep the global status page working.
3270 	 */
3271 
3272 	__alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3273 	__alloc_ppat_entry(ppat, 1, 0);
3274 	__alloc_ppat_entry(ppat, 2, 0);
3275 	__alloc_ppat_entry(ppat, 3, 0);
3276 	__alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3277 	__alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3278 	__alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3279 	__alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3280 }
3281 
3282 static void gen6_gmch_remove(struct i915_address_space *vm)
3283 {
3284 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3285 
3286 	iounmap(ggtt->gsm);
3287 	cleanup_scratch_page(vm);
3288 }
3289 
3290 static void setup_private_pat(struct drm_i915_private *dev_priv)
3291 {
3292 	struct intel_ppat *ppat = &dev_priv->ppat;
3293 	int i;
3294 
3295 	ppat->i915 = dev_priv;
3296 
3297 	if (INTEL_GEN(dev_priv) >= 10)
3298 		cnl_setup_private_ppat(ppat);
3299 	else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3300 		chv_setup_private_ppat(ppat);
3301 	else
3302 		bdw_setup_private_ppat(ppat);
3303 
3304 	GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3305 
3306 	for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3307 		ppat->entries[i].value = ppat->clear_value;
3308 		ppat->entries[i].ppat = ppat;
3309 		set_bit(i, ppat->dirty);
3310 	}
3311 
3312 	ppat->update_hw(dev_priv);
3313 }
3314 
3315 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3316 {
3317 	struct drm_i915_private *dev_priv = ggtt->base.i915;
3318 	struct pci_dev *pdev = dev_priv->drm.pdev;
3319 	unsigned int size;
3320 	u16 snb_gmch_ctl;
3321 	int err;
3322 
3323 	/* TODO: We're not aware of mappable constraints on gen8 yet */
3324 	ggtt->gmadr =
3325 		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3326 						 pci_resource_len(pdev, 2));
3327 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
3328 
3329 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3330 	if (!err)
3331 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3332 	if (err)
3333 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3334 
3335 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3336 
3337 	if (INTEL_GEN(dev_priv) >= 9) {
3338 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
3339 	} else if (IS_CHERRYVIEW(dev_priv)) {
3340 		size = chv_get_total_gtt_size(snb_gmch_ctl);
3341 	} else {
3342 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
3343 	}
3344 
3345 	ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3346 	ggtt->base.cleanup = gen6_gmch_remove;
3347 	ggtt->base.bind_vma = ggtt_bind_vma;
3348 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3349 	ggtt->base.set_pages = ggtt_set_pages;
3350 	ggtt->base.clear_pages = clear_pages;
3351 	ggtt->base.insert_page = gen8_ggtt_insert_page;
3352 	ggtt->base.clear_range = nop_clear_range;
3353 	if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3354 		ggtt->base.clear_range = gen8_ggtt_clear_range;
3355 
3356 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3357 
3358 	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3359 	if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3360 		ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3361 		ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3362 		if (ggtt->base.clear_range != nop_clear_range)
3363 			ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3364 	}
3365 
3366 	ggtt->invalidate = gen6_ggtt_invalidate;
3367 
3368 	setup_private_pat(dev_priv);
3369 
3370 	return ggtt_probe_common(ggtt, size);
3371 }
3372 
3373 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3374 {
3375 	struct drm_i915_private *dev_priv = ggtt->base.i915;
3376 	struct pci_dev *pdev = dev_priv->drm.pdev;
3377 	unsigned int size;
3378 	u16 snb_gmch_ctl;
3379 	int err;
3380 
3381 	ggtt->gmadr =
3382 		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3383 						 pci_resource_len(pdev, 2));
3384 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
3385 
3386 	/* 64/512MB is the current min/max we actually know of, but this is just
3387 	 * a coarse sanity check.
3388 	 */
3389 	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3390 		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3391 		return -ENXIO;
3392 	}
3393 
3394 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3395 	if (!err)
3396 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3397 	if (err)
3398 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3399 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3400 
3401 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
3402 	ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3403 
3404 	ggtt->base.clear_range = gen6_ggtt_clear_range;
3405 	ggtt->base.insert_page = gen6_ggtt_insert_page;
3406 	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3407 	ggtt->base.bind_vma = ggtt_bind_vma;
3408 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3409 	ggtt->base.set_pages = ggtt_set_pages;
3410 	ggtt->base.clear_pages = clear_pages;
3411 	ggtt->base.cleanup = gen6_gmch_remove;
3412 
3413 	ggtt->invalidate = gen6_ggtt_invalidate;
3414 
3415 	if (HAS_EDRAM(dev_priv))
3416 		ggtt->base.pte_encode = iris_pte_encode;
3417 	else if (IS_HASWELL(dev_priv))
3418 		ggtt->base.pte_encode = hsw_pte_encode;
3419 	else if (IS_VALLEYVIEW(dev_priv))
3420 		ggtt->base.pte_encode = byt_pte_encode;
3421 	else if (INTEL_GEN(dev_priv) >= 7)
3422 		ggtt->base.pte_encode = ivb_pte_encode;
3423 	else
3424 		ggtt->base.pte_encode = snb_pte_encode;
3425 
3426 	return ggtt_probe_common(ggtt, size);
3427 }
3428 
3429 static void i915_gmch_remove(struct i915_address_space *vm)
3430 {
3431 	intel_gmch_remove();
3432 }
3433 
3434 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3435 {
3436 	struct drm_i915_private *dev_priv = ggtt->base.i915;
3437 	phys_addr_t gmadr_base;
3438 	int ret;
3439 
3440 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3441 	if (!ret) {
3442 		DRM_ERROR("failed to set up gmch\n");
3443 		return -EIO;
3444 	}
3445 
3446 	intel_gtt_get(&ggtt->base.total,
3447 		      &gmadr_base,
3448 		      &ggtt->mappable_end);
3449 
3450 	ggtt->gmadr =
3451 		(struct resource) DEFINE_RES_MEM(gmadr_base,
3452 						 ggtt->mappable_end);
3453 
3454 	ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3455 	ggtt->base.insert_page = i915_ggtt_insert_page;
3456 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
3457 	ggtt->base.clear_range = i915_ggtt_clear_range;
3458 	ggtt->base.bind_vma = ggtt_bind_vma;
3459 	ggtt->base.unbind_vma = ggtt_unbind_vma;
3460 	ggtt->base.set_pages = ggtt_set_pages;
3461 	ggtt->base.clear_pages = clear_pages;
3462 	ggtt->base.cleanup = i915_gmch_remove;
3463 
3464 	ggtt->invalidate = gmch_ggtt_invalidate;
3465 
3466 	if (unlikely(ggtt->do_idle_maps))
3467 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3468 
3469 	return 0;
3470 }
3471 
3472 /**
3473  * i915_ggtt_probe_hw - Probe GGTT hardware location
3474  * @dev_priv: i915 device
3475  */
3476 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3477 {
3478 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3479 	int ret;
3480 
3481 	ggtt->base.i915 = dev_priv;
3482 	ggtt->base.dma = &dev_priv->drm.pdev->dev;
3483 
3484 	if (INTEL_GEN(dev_priv) <= 5)
3485 		ret = i915_gmch_probe(ggtt);
3486 	else if (INTEL_GEN(dev_priv) < 8)
3487 		ret = gen6_gmch_probe(ggtt);
3488 	else
3489 		ret = gen8_gmch_probe(ggtt);
3490 	if (ret)
3491 		return ret;
3492 
3493 	/* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3494 	 * This is easier than doing range restriction on the fly, as we
3495 	 * currently don't have any bits spare to pass in this upper
3496 	 * restriction!
3497 	 */
3498 	if (USES_GUC(dev_priv)) {
3499 		ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
3500 		ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
3501 	}
3502 
3503 	if ((ggtt->base.total - 1) >> 32) {
3504 		DRM_ERROR("We never expected a Global GTT with more than 32bits"
3505 			  " of address space! Found %lldM!\n",
3506 			  ggtt->base.total >> 20);
3507 		ggtt->base.total = 1ULL << 32;
3508 		ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
3509 	}
3510 
3511 	if (ggtt->mappable_end > ggtt->base.total) {
3512 		DRM_ERROR("mappable aperture extends past end of GGTT,"
3513 			  " aperture=%pa, total=%llx\n",
3514 			  &ggtt->mappable_end, ggtt->base.total);
3515 		ggtt->mappable_end = ggtt->base.total;
3516 	}
3517 
3518 	/* GMADR is the PCI mmio aperture into the global GTT. */
3519 	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20);
3520 	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3521 	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3522 			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3523 	if (intel_vtd_active())
3524 		DRM_INFO("VT-d active for gfx access\n");
3525 
3526 	return 0;
3527 }
3528 
3529 /**
3530  * i915_ggtt_init_hw - Initialize GGTT hardware
3531  * @dev_priv: i915 device
3532  */
3533 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3534 {
3535 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3536 	int ret;
3537 
3538 	INIT_LIST_HEAD(&dev_priv->vm_list);
3539 
3540 	/* Note that we use page colouring to enforce a guard page at the
3541 	 * end of the address space. This is required as the CS may prefetch
3542 	 * beyond the end of the batch buffer, across the page boundary,
3543 	 * and beyond the end of the GTT if we do not provide a guard.
3544 	 */
3545 	mutex_lock(&dev_priv->drm.struct_mutex);
3546 	i915_address_space_init(&ggtt->base, dev_priv, "[global]");
3547 	if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3548 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3549 	mutex_unlock(&dev_priv->drm.struct_mutex);
3550 
3551 	if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3552 				dev_priv->ggtt.gmadr.start,
3553 				dev_priv->ggtt.mappable_end)) {
3554 		ret = -EIO;
3555 		goto out_gtt_cleanup;
3556 	}
3557 
3558 	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3559 
3560 	/*
3561 	 * Initialise stolen early so that we may reserve preallocated
3562 	 * objects for the BIOS to KMS transition.
3563 	 */
3564 	ret = i915_gem_init_stolen(dev_priv);
3565 	if (ret)
3566 		goto out_gtt_cleanup;
3567 
3568 	return 0;
3569 
3570 out_gtt_cleanup:
3571 	ggtt->base.cleanup(&ggtt->base);
3572 	return ret;
3573 }
3574 
3575 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3576 {
3577 	if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3578 		return -EIO;
3579 
3580 	return 0;
3581 }
3582 
3583 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3584 {
3585 	GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3586 
3587 	i915->ggtt.invalidate = guc_ggtt_invalidate;
3588 
3589 	i915_ggtt_invalidate(i915);
3590 }
3591 
3592 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3593 {
3594 	/* We should only be called after i915_ggtt_enable_guc() */
3595 	GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3596 
3597 	i915->ggtt.invalidate = gen6_ggtt_invalidate;
3598 
3599 	i915_ggtt_invalidate(i915);
3600 }
3601 
3602 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3603 {
3604 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3605 	struct drm_i915_gem_object *obj, *on;
3606 
3607 	i915_check_and_clear_faults(dev_priv);
3608 
3609 	/* First fill our portion of the GTT with scratch pages */
3610 	ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
3611 
3612 	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3613 
3614 	/* clflush objects bound into the GGTT and rebind them. */
3615 	list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) {
3616 		bool ggtt_bound = false;
3617 		struct i915_vma *vma;
3618 
3619 		for_each_ggtt_vma(vma, obj) {
3620 			if (!i915_vma_unbind(vma))
3621 				continue;
3622 
3623 			WARN_ON(i915_vma_bind(vma, obj->cache_level,
3624 					      PIN_UPDATE));
3625 			ggtt_bound = true;
3626 		}
3627 
3628 		if (ggtt_bound)
3629 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3630 	}
3631 
3632 	ggtt->base.closed = false;
3633 
3634 	if (INTEL_GEN(dev_priv) >= 8) {
3635 		struct intel_ppat *ppat = &dev_priv->ppat;
3636 
3637 		bitmap_set(ppat->dirty, 0, ppat->max_entries);
3638 		dev_priv->ppat.update_hw(dev_priv);
3639 		return;
3640 	}
3641 
3642 	if (USES_PPGTT(dev_priv)) {
3643 		struct i915_address_space *vm;
3644 
3645 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3646 			struct i915_hw_ppgtt *ppgtt;
3647 
3648 			if (i915_is_ggtt(vm))
3649 				ppgtt = dev_priv->mm.aliasing_ppgtt;
3650 			else
3651 				ppgtt = i915_vm_to_ppgtt(vm);
3652 
3653 			gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
3654 		}
3655 	}
3656 
3657 	i915_ggtt_invalidate(dev_priv);
3658 }
3659 
3660 static struct scatterlist *
3661 rotate_pages(const dma_addr_t *in, unsigned int offset,
3662 	     unsigned int width, unsigned int height,
3663 	     unsigned int stride,
3664 	     struct sg_table *st, struct scatterlist *sg)
3665 {
3666 	unsigned int column, row;
3667 	unsigned int src_idx;
3668 
3669 	for (column = 0; column < width; column++) {
3670 		src_idx = stride * (height - 1) + column;
3671 		for (row = 0; row < height; row++) {
3672 			st->nents++;
3673 			/* We don't need the pages, but need to initialize
3674 			 * the entries so the sg list can be happily traversed.
3675 			 * The only thing we need are DMA addresses.
3676 			 */
3677 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3678 			sg_dma_address(sg) = in[offset + src_idx];
3679 			sg_dma_len(sg) = PAGE_SIZE;
3680 			sg = sg_next(sg);
3681 			src_idx -= stride;
3682 		}
3683 	}
3684 
3685 	return sg;
3686 }
3687 
3688 static noinline struct sg_table *
3689 intel_rotate_pages(struct intel_rotation_info *rot_info,
3690 		   struct drm_i915_gem_object *obj)
3691 {
3692 	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
3693 	unsigned int size = intel_rotation_info_size(rot_info);
3694 	struct sgt_iter sgt_iter;
3695 	dma_addr_t dma_addr;
3696 	unsigned long i;
3697 	dma_addr_t *page_addr_list;
3698 	struct sg_table *st;
3699 	struct scatterlist *sg;
3700 	int ret = -ENOMEM;
3701 
3702 	/* Allocate a temporary list of source pages for random access. */
3703 	page_addr_list = kvmalloc_array(n_pages,
3704 					sizeof(dma_addr_t),
3705 					GFP_KERNEL);
3706 	if (!page_addr_list)
3707 		return ERR_PTR(ret);
3708 
3709 	/* Allocate target SG list. */
3710 	st = kmalloc(sizeof(*st), GFP_KERNEL);
3711 	if (!st)
3712 		goto err_st_alloc;
3713 
3714 	ret = sg_alloc_table(st, size, GFP_KERNEL);
3715 	if (ret)
3716 		goto err_sg_alloc;
3717 
3718 	/* Populate source page list from the object. */
3719 	i = 0;
3720 	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3721 		page_addr_list[i++] = dma_addr;
3722 
3723 	GEM_BUG_ON(i != n_pages);
3724 	st->nents = 0;
3725 	sg = st->sgl;
3726 
3727 	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3728 		sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3729 				  rot_info->plane[i].width, rot_info->plane[i].height,
3730 				  rot_info->plane[i].stride, st, sg);
3731 	}
3732 
3733 	kvfree(page_addr_list);
3734 
3735 	return st;
3736 
3737 err_sg_alloc:
3738 	kfree(st);
3739 err_st_alloc:
3740 	kvfree(page_addr_list);
3741 
3742 	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3743 			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3744 
3745 	return ERR_PTR(ret);
3746 }
3747 
3748 static noinline struct sg_table *
3749 intel_partial_pages(const struct i915_ggtt_view *view,
3750 		    struct drm_i915_gem_object *obj)
3751 {
3752 	struct sg_table *st;
3753 	struct scatterlist *sg, *iter;
3754 	unsigned int count = view->partial.size;
3755 	unsigned int offset;
3756 	int ret = -ENOMEM;
3757 
3758 	st = kmalloc(sizeof(*st), GFP_KERNEL);
3759 	if (!st)
3760 		goto err_st_alloc;
3761 
3762 	ret = sg_alloc_table(st, count, GFP_KERNEL);
3763 	if (ret)
3764 		goto err_sg_alloc;
3765 
3766 	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3767 	GEM_BUG_ON(!iter);
3768 
3769 	sg = st->sgl;
3770 	st->nents = 0;
3771 	do {
3772 		unsigned int len;
3773 
3774 		len = min(iter->length - (offset << PAGE_SHIFT),
3775 			  count << PAGE_SHIFT);
3776 		sg_set_page(sg, NULL, len, 0);
3777 		sg_dma_address(sg) =
3778 			sg_dma_address(iter) + (offset << PAGE_SHIFT);
3779 		sg_dma_len(sg) = len;
3780 
3781 		st->nents++;
3782 		count -= len >> PAGE_SHIFT;
3783 		if (count == 0) {
3784 			sg_mark_end(sg);
3785 			return st;
3786 		}
3787 
3788 		sg = __sg_next(sg);
3789 		iter = __sg_next(iter);
3790 		offset = 0;
3791 	} while (1);
3792 
3793 err_sg_alloc:
3794 	kfree(st);
3795 err_st_alloc:
3796 	return ERR_PTR(ret);
3797 }
3798 
3799 static int
3800 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3801 {
3802 	int ret;
3803 
3804 	/* The vma->pages are only valid within the lifespan of the borrowed
3805 	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3806 	 * must be the vma->pages. A simple rule is that vma->pages must only
3807 	 * be accessed when the obj->mm.pages are pinned.
3808 	 */
3809 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3810 
3811 	switch (vma->ggtt_view.type) {
3812 	case I915_GGTT_VIEW_NORMAL:
3813 		vma->pages = vma->obj->mm.pages;
3814 		return 0;
3815 
3816 	case I915_GGTT_VIEW_ROTATED:
3817 		vma->pages =
3818 			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3819 		break;
3820 
3821 	case I915_GGTT_VIEW_PARTIAL:
3822 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3823 		break;
3824 
3825 	default:
3826 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
3827 			  vma->ggtt_view.type);
3828 		return -EINVAL;
3829 	}
3830 
3831 	ret = 0;
3832 	if (unlikely(IS_ERR(vma->pages))) {
3833 		ret = PTR_ERR(vma->pages);
3834 		vma->pages = NULL;
3835 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3836 			  vma->ggtt_view.type, ret);
3837 	}
3838 	return ret;
3839 }
3840 
3841 /**
3842  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3843  * @vm: the &struct i915_address_space
3844  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3845  * @size: how much space to allocate inside the GTT,
3846  *        must be #I915_GTT_PAGE_SIZE aligned
3847  * @offset: where to insert inside the GTT,
3848  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3849  *          (@offset + @size) must fit within the address space
3850  * @color: color to apply to node, if this node is not from a VMA,
3851  *         color must be #I915_COLOR_UNEVICTABLE
3852  * @flags: control search and eviction behaviour
3853  *
3854  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3855  * the address space (using @size and @color). If the @node does not fit, it
3856  * tries to evict any overlapping nodes from the GTT, including any
3857  * neighbouring nodes if the colors do not match (to ensure guard pages between
3858  * differing domains). See i915_gem_evict_for_node() for the gory details
3859  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3860  * evicting active overlapping objects, and any overlapping node that is pinned
3861  * or marked as unevictable will also result in failure.
3862  *
3863  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3864  * asked to wait for eviction and interrupted.
3865  */
3866 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3867 			 struct drm_mm_node *node,
3868 			 u64 size, u64 offset, unsigned long color,
3869 			 unsigned int flags)
3870 {
3871 	int err;
3872 
3873 	GEM_BUG_ON(!size);
3874 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3875 	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3876 	GEM_BUG_ON(range_overflows(offset, size, vm->total));
3877 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3878 	GEM_BUG_ON(drm_mm_node_allocated(node));
3879 
3880 	node->size = size;
3881 	node->start = offset;
3882 	node->color = color;
3883 
3884 	err = drm_mm_reserve_node(&vm->mm, node);
3885 	if (err != -ENOSPC)
3886 		return err;
3887 
3888 	if (flags & PIN_NOEVICT)
3889 		return -ENOSPC;
3890 
3891 	err = i915_gem_evict_for_node(vm, node, flags);
3892 	if (err == 0)
3893 		err = drm_mm_reserve_node(&vm->mm, node);
3894 
3895 	return err;
3896 }
3897 
3898 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3899 {
3900 	u64 range, addr;
3901 
3902 	GEM_BUG_ON(range_overflows(start, len, end));
3903 	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3904 
3905 	range = round_down(end - len, align) - round_up(start, align);
3906 	if (range) {
3907 		if (sizeof(unsigned long) == sizeof(u64)) {
3908 			addr = get_random_long();
3909 		} else {
3910 			addr = get_random_int();
3911 			if (range > U32_MAX) {
3912 				addr <<= 32;
3913 				addr |= get_random_int();
3914 			}
3915 		}
3916 		div64_u64_rem(addr, range, &addr);
3917 		start += addr;
3918 	}
3919 
3920 	return round_up(start, align);
3921 }
3922 
3923 /**
3924  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3925  * @vm: the &struct i915_address_space
3926  * @node: the &struct drm_mm_node (typically i915_vma.node)
3927  * @size: how much space to allocate inside the GTT,
3928  *        must be #I915_GTT_PAGE_SIZE aligned
3929  * @alignment: required alignment of starting offset, may be 0 but
3930  *             if specified, this must be a power-of-two and at least
3931  *             #I915_GTT_MIN_ALIGNMENT
3932  * @color: color to apply to node
3933  * @start: start of any range restriction inside GTT (0 for all),
3934  *         must be #I915_GTT_PAGE_SIZE aligned
3935  * @end: end of any range restriction inside GTT (U64_MAX for all),
3936  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3937  * @flags: control search and eviction behaviour
3938  *
3939  * i915_gem_gtt_insert() first searches for an available hole into which
3940  * is can insert the node. The hole address is aligned to @alignment and
3941  * its @size must then fit entirely within the [@start, @end] bounds. The
3942  * nodes on either side of the hole must match @color, or else a guard page
3943  * will be inserted between the two nodes (or the node evicted). If no
3944  * suitable hole is found, first a victim is randomly selected and tested
3945  * for eviction, otherwise then the LRU list of objects within the GTT
3946  * is scanned to find the first set of replacement nodes to create the hole.
3947  * Those old overlapping nodes are evicted from the GTT (and so must be
3948  * rebound before any future use). Any node that is currently pinned cannot
3949  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3950  * active and #PIN_NONBLOCK is specified, that node is also skipped when
3951  * searching for an eviction candidate. See i915_gem_evict_something() for
3952  * the gory details on the eviction algorithm.
3953  *
3954  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3955  * asked to wait for eviction and interrupted.
3956  */
3957 int i915_gem_gtt_insert(struct i915_address_space *vm,
3958 			struct drm_mm_node *node,
3959 			u64 size, u64 alignment, unsigned long color,
3960 			u64 start, u64 end, unsigned int flags)
3961 {
3962 	enum drm_mm_insert_mode mode;
3963 	u64 offset;
3964 	int err;
3965 
3966 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
3967 	GEM_BUG_ON(!size);
3968 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3969 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3970 	GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3971 	GEM_BUG_ON(start >= end);
3972 	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3973 	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3974 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3975 	GEM_BUG_ON(drm_mm_node_allocated(node));
3976 
3977 	if (unlikely(range_overflows(start, size, end)))
3978 		return -ENOSPC;
3979 
3980 	if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3981 		return -ENOSPC;
3982 
3983 	mode = DRM_MM_INSERT_BEST;
3984 	if (flags & PIN_HIGH)
3985 		mode = DRM_MM_INSERT_HIGH;
3986 	if (flags & PIN_MAPPABLE)
3987 		mode = DRM_MM_INSERT_LOW;
3988 
3989 	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3990 	 * so we know that we always have a minimum alignment of 4096.
3991 	 * The drm_mm range manager is optimised to return results
3992 	 * with zero alignment, so where possible use the optimal
3993 	 * path.
3994 	 */
3995 	BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
3996 	if (alignment <= I915_GTT_MIN_ALIGNMENT)
3997 		alignment = 0;
3998 
3999 	err = drm_mm_insert_node_in_range(&vm->mm, node,
4000 					  size, alignment, color,
4001 					  start, end, mode);
4002 	if (err != -ENOSPC)
4003 		return err;
4004 
4005 	if (flags & PIN_NOEVICT)
4006 		return -ENOSPC;
4007 
4008 	/* No free space, pick a slot at random.
4009 	 *
4010 	 * There is a pathological case here using a GTT shared between
4011 	 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4012 	 *
4013 	 *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4014 	 *         (64k objects)             (448k objects)
4015 	 *
4016 	 * Now imagine that the eviction LRU is ordered top-down (just because
4017 	 * pathology meets real life), and that we need to evict an object to
4018 	 * make room inside the aperture. The eviction scan then has to walk
4019 	 * the 448k list before it finds one within range. And now imagine that
4020 	 * it has to search for a new hole between every byte inside the memcpy,
4021 	 * for several simultaneous clients.
4022 	 *
4023 	 * On a full-ppgtt system, if we have run out of available space, there
4024 	 * will be lots and lots of objects in the eviction list! Again,
4025 	 * searching that LRU list may be slow if we are also applying any
4026 	 * range restrictions (e.g. restriction to low 4GiB) and so, for
4027 	 * simplicity and similarilty between different GTT, try the single
4028 	 * random replacement first.
4029 	 */
4030 	offset = random_offset(start, end,
4031 			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4032 	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4033 	if (err != -ENOSPC)
4034 		return err;
4035 
4036 	/* Randomly selected placement is pinned, do a search */
4037 	err = i915_gem_evict_something(vm, size, alignment, color,
4038 				       start, end, flags);
4039 	if (err)
4040 		return err;
4041 
4042 	return drm_mm_insert_node_in_range(&vm->mm, node,
4043 					   size, alignment, color,
4044 					   start, end, DRM_MM_INSERT_EVICT);
4045 }
4046 
4047 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4048 #include "selftests/mock_gtt.c"
4049 #include "selftests/i915_gem_gtt.c"
4050 #endif
4051