xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_gtt.c (revision 82d71e31)
12c86e55dSMatthew Auld // SPDX-License-Identifier: MIT
22c86e55dSMatthew Auld /*
32c86e55dSMatthew Auld  * Copyright © 2020 Intel Corporation
42c86e55dSMatthew Auld  */
52c86e55dSMatthew Auld 
62c86e55dSMatthew Auld #include <linux/slab.h> /* fault-inject.h is not standalone! */
72c86e55dSMatthew Auld 
82c86e55dSMatthew Auld #include <linux/fault-inject.h>
92c86e55dSMatthew Auld 
102c86e55dSMatthew Auld #include "i915_trace.h"
112c86e55dSMatthew Auld #include "intel_gt.h"
122c86e55dSMatthew Auld #include "intel_gtt.h"
132c86e55dSMatthew Auld 
142c86e55dSMatthew Auld void stash_init(struct pagestash *stash)
152c86e55dSMatthew Auld {
162c86e55dSMatthew Auld 	pagevec_init(&stash->pvec);
172c86e55dSMatthew Auld 	spin_lock_init(&stash->lock);
182c86e55dSMatthew Auld }
192c86e55dSMatthew Auld 
202c86e55dSMatthew Auld static struct page *stash_pop_page(struct pagestash *stash)
212c86e55dSMatthew Auld {
222c86e55dSMatthew Auld 	struct page *page = NULL;
232c86e55dSMatthew Auld 
242c86e55dSMatthew Auld 	spin_lock(&stash->lock);
252c86e55dSMatthew Auld 	if (likely(stash->pvec.nr))
262c86e55dSMatthew Auld 		page = stash->pvec.pages[--stash->pvec.nr];
272c86e55dSMatthew Auld 	spin_unlock(&stash->lock);
282c86e55dSMatthew Auld 
292c86e55dSMatthew Auld 	return page;
302c86e55dSMatthew Auld }
312c86e55dSMatthew Auld 
322c86e55dSMatthew Auld static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
332c86e55dSMatthew Auld {
342c86e55dSMatthew Auld 	unsigned int nr;
352c86e55dSMatthew Auld 
362c86e55dSMatthew Auld 	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
372c86e55dSMatthew Auld 
382c86e55dSMatthew Auld 	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
392c86e55dSMatthew Auld 	memcpy(stash->pvec.pages + stash->pvec.nr,
402c86e55dSMatthew Auld 	       pvec->pages + pvec->nr - nr,
412c86e55dSMatthew Auld 	       sizeof(pvec->pages[0]) * nr);
422c86e55dSMatthew Auld 	stash->pvec.nr += nr;
432c86e55dSMatthew Auld 
442c86e55dSMatthew Auld 	spin_unlock(&stash->lock);
452c86e55dSMatthew Auld 
462c86e55dSMatthew Auld 	pvec->nr -= nr;
472c86e55dSMatthew Auld }
482c86e55dSMatthew Auld 
492c86e55dSMatthew Auld static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
502c86e55dSMatthew Auld {
512c86e55dSMatthew Auld 	struct pagevec stack;
522c86e55dSMatthew Auld 	struct page *page;
532c86e55dSMatthew Auld 
542c86e55dSMatthew Auld 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
552c86e55dSMatthew Auld 		i915_gem_shrink_all(vm->i915);
562c86e55dSMatthew Auld 
572c86e55dSMatthew Auld 	page = stash_pop_page(&vm->free_pages);
582c86e55dSMatthew Auld 	if (page)
592c86e55dSMatthew Auld 		return page;
602c86e55dSMatthew Auld 
612c86e55dSMatthew Auld 	if (!vm->pt_kmap_wc)
622c86e55dSMatthew Auld 		return alloc_page(gfp);
632c86e55dSMatthew Auld 
642c86e55dSMatthew Auld 	/* Look in our global stash of WC pages... */
652c86e55dSMatthew Auld 	page = stash_pop_page(&vm->i915->mm.wc_stash);
662c86e55dSMatthew Auld 	if (page)
672c86e55dSMatthew Auld 		return page;
682c86e55dSMatthew Auld 
692c86e55dSMatthew Auld 	/*
702c86e55dSMatthew Auld 	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
712c86e55dSMatthew Auld 	 *
722c86e55dSMatthew Auld 	 * We have to be careful as page allocation may trigger the shrinker
732c86e55dSMatthew Auld 	 * (via direct reclaim) which will fill up the WC stash underneath us.
742c86e55dSMatthew Auld 	 * So we add our WB pages into a temporary pvec on the stack and merge
752c86e55dSMatthew Auld 	 * them into the WC stash after all the allocations are complete.
762c86e55dSMatthew Auld 	 */
772c86e55dSMatthew Auld 	pagevec_init(&stack);
782c86e55dSMatthew Auld 	do {
792c86e55dSMatthew Auld 		struct page *page;
802c86e55dSMatthew Auld 
812c86e55dSMatthew Auld 		page = alloc_page(gfp);
822c86e55dSMatthew Auld 		if (unlikely(!page))
832c86e55dSMatthew Auld 			break;
842c86e55dSMatthew Auld 
852c86e55dSMatthew Auld 		stack.pages[stack.nr++] = page;
862c86e55dSMatthew Auld 	} while (pagevec_space(&stack));
872c86e55dSMatthew Auld 
882c86e55dSMatthew Auld 	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
892c86e55dSMatthew Auld 		page = stack.pages[--stack.nr];
902c86e55dSMatthew Auld 
912c86e55dSMatthew Auld 		/* Merge spare WC pages to the global stash */
922c86e55dSMatthew Auld 		if (stack.nr)
932c86e55dSMatthew Auld 			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
942c86e55dSMatthew Auld 
952c86e55dSMatthew Auld 		/* Push any surplus WC pages onto the local VM stash */
962c86e55dSMatthew Auld 		if (stack.nr)
972c86e55dSMatthew Auld 			stash_push_pagevec(&vm->free_pages, &stack);
982c86e55dSMatthew Auld 	}
992c86e55dSMatthew Auld 
1002c86e55dSMatthew Auld 	/* Return unwanted leftovers */
1012c86e55dSMatthew Auld 	if (unlikely(stack.nr)) {
1022c86e55dSMatthew Auld 		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
1032c86e55dSMatthew Auld 		__pagevec_release(&stack);
1042c86e55dSMatthew Auld 	}
1052c86e55dSMatthew Auld 
1062c86e55dSMatthew Auld 	return page;
1072c86e55dSMatthew Auld }
1082c86e55dSMatthew Auld 
1092c86e55dSMatthew Auld static void vm_free_pages_release(struct i915_address_space *vm,
1102c86e55dSMatthew Auld 				  bool immediate)
1112c86e55dSMatthew Auld {
1122c86e55dSMatthew Auld 	struct pagevec *pvec = &vm->free_pages.pvec;
1132c86e55dSMatthew Auld 	struct pagevec stack;
1142c86e55dSMatthew Auld 
1152c86e55dSMatthew Auld 	lockdep_assert_held(&vm->free_pages.lock);
1162c86e55dSMatthew Auld 	GEM_BUG_ON(!pagevec_count(pvec));
1172c86e55dSMatthew Auld 
1182c86e55dSMatthew Auld 	if (vm->pt_kmap_wc) {
1192c86e55dSMatthew Auld 		/*
1202c86e55dSMatthew Auld 		 * When we use WC, first fill up the global stash and then
1212c86e55dSMatthew Auld 		 * only if full immediately free the overflow.
1222c86e55dSMatthew Auld 		 */
1232c86e55dSMatthew Auld 		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
1242c86e55dSMatthew Auld 
1252c86e55dSMatthew Auld 		/*
1262c86e55dSMatthew Auld 		 * As we have made some room in the VM's free_pages,
1272c86e55dSMatthew Auld 		 * we can wait for it to fill again. Unless we are
1282c86e55dSMatthew Auld 		 * inside i915_address_space_fini() and must
1292c86e55dSMatthew Auld 		 * immediately release the pages!
1302c86e55dSMatthew Auld 		 */
1312c86e55dSMatthew Auld 		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
1322c86e55dSMatthew Auld 			return;
1332c86e55dSMatthew Auld 
1342c86e55dSMatthew Auld 		/*
1352c86e55dSMatthew Auld 		 * We have to drop the lock to allow ourselves to sleep,
1362c86e55dSMatthew Auld 		 * so take a copy of the pvec and clear the stash for
1372c86e55dSMatthew Auld 		 * others to use it as we sleep.
1382c86e55dSMatthew Auld 		 */
1392c86e55dSMatthew Auld 		stack = *pvec;
1402c86e55dSMatthew Auld 		pagevec_reinit(pvec);
1412c86e55dSMatthew Auld 		spin_unlock(&vm->free_pages.lock);
1422c86e55dSMatthew Auld 
1432c86e55dSMatthew Auld 		pvec = &stack;
1442c86e55dSMatthew Auld 		set_pages_array_wb(pvec->pages, pvec->nr);
1452c86e55dSMatthew Auld 
1462c86e55dSMatthew Auld 		spin_lock(&vm->free_pages.lock);
1472c86e55dSMatthew Auld 	}
1482c86e55dSMatthew Auld 
1492c86e55dSMatthew Auld 	__pagevec_release(pvec);
1502c86e55dSMatthew Auld }
1512c86e55dSMatthew Auld 
1522c86e55dSMatthew Auld static void vm_free_page(struct i915_address_space *vm, struct page *page)
1532c86e55dSMatthew Auld {
1542c86e55dSMatthew Auld 	/*
1552c86e55dSMatthew Auld 	 * On !llc, we need to change the pages back to WB. We only do so
1562c86e55dSMatthew Auld 	 * in bulk, so we rarely need to change the page attributes here,
1572c86e55dSMatthew Auld 	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
1582c86e55dSMatthew Auld 	 * To make detection of the possible sleep more likely, use an
1592c86e55dSMatthew Auld 	 * unconditional might_sleep() for everybody.
1602c86e55dSMatthew Auld 	 */
1612c86e55dSMatthew Auld 	might_sleep();
1622c86e55dSMatthew Auld 	spin_lock(&vm->free_pages.lock);
1632c86e55dSMatthew Auld 	while (!pagevec_space(&vm->free_pages.pvec))
1642c86e55dSMatthew Auld 		vm_free_pages_release(vm, false);
1652c86e55dSMatthew Auld 	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
1662c86e55dSMatthew Auld 	pagevec_add(&vm->free_pages.pvec, page);
1672c86e55dSMatthew Auld 	spin_unlock(&vm->free_pages.lock);
1682c86e55dSMatthew Auld }
1692c86e55dSMatthew Auld 
1702c86e55dSMatthew Auld void __i915_vm_close(struct i915_address_space *vm)
1712c86e55dSMatthew Auld {
1722c86e55dSMatthew Auld 	struct i915_vma *vma, *vn;
1732c86e55dSMatthew Auld 
1742c86e55dSMatthew Auld 	mutex_lock(&vm->mutex);
1752c86e55dSMatthew Auld 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
1762c86e55dSMatthew Auld 		struct drm_i915_gem_object *obj = vma->obj;
1772c86e55dSMatthew Auld 
1782c86e55dSMatthew Auld 		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
1792c86e55dSMatthew Auld 		if (!kref_get_unless_zero(&obj->base.refcount))
1802c86e55dSMatthew Auld 			continue;
1812c86e55dSMatthew Auld 
1822c86e55dSMatthew Auld 		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
1832c86e55dSMatthew Auld 		WARN_ON(__i915_vma_unbind(vma));
1842c86e55dSMatthew Auld 		__i915_vma_put(vma);
1852c86e55dSMatthew Auld 
1862c86e55dSMatthew Auld 		i915_gem_object_put(obj);
1872c86e55dSMatthew Auld 	}
1882c86e55dSMatthew Auld 	GEM_BUG_ON(!list_empty(&vm->bound_list));
1892c86e55dSMatthew Auld 	mutex_unlock(&vm->mutex);
1902c86e55dSMatthew Auld }
1912c86e55dSMatthew Auld 
1922c86e55dSMatthew Auld void i915_address_space_fini(struct i915_address_space *vm)
1932c86e55dSMatthew Auld {
1942c86e55dSMatthew Auld 	spin_lock(&vm->free_pages.lock);
1952c86e55dSMatthew Auld 	if (pagevec_count(&vm->free_pages.pvec))
1962c86e55dSMatthew Auld 		vm_free_pages_release(vm, true);
1972c86e55dSMatthew Auld 	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
1982c86e55dSMatthew Auld 	spin_unlock(&vm->free_pages.lock);
1992c86e55dSMatthew Auld 
2002c86e55dSMatthew Auld 	drm_mm_takedown(&vm->mm);
2012c86e55dSMatthew Auld 
2022c86e55dSMatthew Auld 	mutex_destroy(&vm->mutex);
2032c86e55dSMatthew Auld }
2042c86e55dSMatthew Auld 
2052c86e55dSMatthew Auld static void __i915_vm_release(struct work_struct *work)
2062c86e55dSMatthew Auld {
2072c86e55dSMatthew Auld 	struct i915_address_space *vm =
2082c86e55dSMatthew Auld 		container_of(work, struct i915_address_space, rcu.work);
2092c86e55dSMatthew Auld 
2102c86e55dSMatthew Auld 	vm->cleanup(vm);
2112c86e55dSMatthew Auld 	i915_address_space_fini(vm);
2122c86e55dSMatthew Auld 
2132c86e55dSMatthew Auld 	kfree(vm);
2142c86e55dSMatthew Auld }
2152c86e55dSMatthew Auld 
2162c86e55dSMatthew Auld void i915_vm_release(struct kref *kref)
2172c86e55dSMatthew Auld {
2182c86e55dSMatthew Auld 	struct i915_address_space *vm =
2192c86e55dSMatthew Auld 		container_of(kref, struct i915_address_space, ref);
2202c86e55dSMatthew Auld 
2212c86e55dSMatthew Auld 	GEM_BUG_ON(i915_is_ggtt(vm));
2222c86e55dSMatthew Auld 	trace_i915_ppgtt_release(vm);
2232c86e55dSMatthew Auld 
2242c86e55dSMatthew Auld 	queue_rcu_work(vm->i915->wq, &vm->rcu);
2252c86e55dSMatthew Auld }
2262c86e55dSMatthew Auld 
2272c86e55dSMatthew Auld void i915_address_space_init(struct i915_address_space *vm, int subclass)
2282c86e55dSMatthew Auld {
2292c86e55dSMatthew Auld 	kref_init(&vm->ref);
2302c86e55dSMatthew Auld 	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
2312c86e55dSMatthew Auld 	atomic_set(&vm->open, 1);
2322c86e55dSMatthew Auld 
2332c86e55dSMatthew Auld 	/*
2342c86e55dSMatthew Auld 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
2352c86e55dSMatthew Auld 	 * Do a dummy acquire now under fs_reclaim so that any allocation
2362c86e55dSMatthew Auld 	 * attempt holding the lock is immediately reported by lockdep.
2372c86e55dSMatthew Auld 	 */
2382c86e55dSMatthew Auld 	mutex_init(&vm->mutex);
2392c86e55dSMatthew Auld 	lockdep_set_subclass(&vm->mutex, subclass);
2402c86e55dSMatthew Auld 	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
2412c86e55dSMatthew Auld 
2422c86e55dSMatthew Auld 	GEM_BUG_ON(!vm->total);
2432c86e55dSMatthew Auld 	drm_mm_init(&vm->mm, 0, vm->total);
2442c86e55dSMatthew Auld 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2452c86e55dSMatthew Auld 
2462c86e55dSMatthew Auld 	stash_init(&vm->free_pages);
2472c86e55dSMatthew Auld 
2482c86e55dSMatthew Auld 	INIT_LIST_HEAD(&vm->bound_list);
2492c86e55dSMatthew Auld }
2502c86e55dSMatthew Auld 
2512c86e55dSMatthew Auld void clear_pages(struct i915_vma *vma)
2522c86e55dSMatthew Auld {
2532c86e55dSMatthew Auld 	GEM_BUG_ON(!vma->pages);
2542c86e55dSMatthew Auld 
2552c86e55dSMatthew Auld 	if (vma->pages != vma->obj->mm.pages) {
2562c86e55dSMatthew Auld 		sg_free_table(vma->pages);
2572c86e55dSMatthew Auld 		kfree(vma->pages);
2582c86e55dSMatthew Auld 	}
2592c86e55dSMatthew Auld 	vma->pages = NULL;
2602c86e55dSMatthew Auld 
2612c86e55dSMatthew Auld 	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
2622c86e55dSMatthew Auld }
2632c86e55dSMatthew Auld 
2642c86e55dSMatthew Auld static int __setup_page_dma(struct i915_address_space *vm,
2652c86e55dSMatthew Auld 			    struct i915_page_dma *p,
2662c86e55dSMatthew Auld 			    gfp_t gfp)
2672c86e55dSMatthew Auld {
2682c86e55dSMatthew Auld 	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
2692c86e55dSMatthew Auld 	if (unlikely(!p->page))
2702c86e55dSMatthew Auld 		return -ENOMEM;
2712c86e55dSMatthew Auld 
2722c86e55dSMatthew Auld 	p->daddr = dma_map_page_attrs(vm->dma,
2732c86e55dSMatthew Auld 				      p->page, 0, PAGE_SIZE,
2742c86e55dSMatthew Auld 				      PCI_DMA_BIDIRECTIONAL,
2752c86e55dSMatthew Auld 				      DMA_ATTR_SKIP_CPU_SYNC |
2762c86e55dSMatthew Auld 				      DMA_ATTR_NO_WARN);
2772c86e55dSMatthew Auld 	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
2782c86e55dSMatthew Auld 		vm_free_page(vm, p->page);
2792c86e55dSMatthew Auld 		return -ENOMEM;
2802c86e55dSMatthew Auld 	}
2812c86e55dSMatthew Auld 
2822c86e55dSMatthew Auld 	return 0;
2832c86e55dSMatthew Auld }
2842c86e55dSMatthew Auld 
2852c86e55dSMatthew Auld int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
2862c86e55dSMatthew Auld {
2872c86e55dSMatthew Auld 	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
2882c86e55dSMatthew Auld }
2892c86e55dSMatthew Auld 
2902c86e55dSMatthew Auld void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
2912c86e55dSMatthew Auld {
2922c86e55dSMatthew Auld 	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
2932c86e55dSMatthew Auld 	vm_free_page(vm, p->page);
2942c86e55dSMatthew Auld }
2952c86e55dSMatthew Auld 
2962c86e55dSMatthew Auld void
2972c86e55dSMatthew Auld fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
2982c86e55dSMatthew Auld {
2992c86e55dSMatthew Auld 	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
3002c86e55dSMatthew Auld }
3012c86e55dSMatthew Auld 
30282d71e31SChris Wilson static void poison_scratch_page(struct page *page, unsigned long size)
30382d71e31SChris Wilson {
30482d71e31SChris Wilson 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
30582d71e31SChris Wilson 		return;
30682d71e31SChris Wilson 
30782d71e31SChris Wilson 	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
30882d71e31SChris Wilson 
30982d71e31SChris Wilson 	do {
31082d71e31SChris Wilson 		void *vaddr;
31182d71e31SChris Wilson 
31282d71e31SChris Wilson 		vaddr = kmap(page);
31382d71e31SChris Wilson 		memset(vaddr, POISON_FREE, PAGE_SIZE);
31482d71e31SChris Wilson 		kunmap(page);
31582d71e31SChris Wilson 
31682d71e31SChris Wilson 		page = pfn_to_page(page_to_pfn(page) + 1);
31782d71e31SChris Wilson 		size -= PAGE_SIZE;
31882d71e31SChris Wilson 	} while (size);
31982d71e31SChris Wilson }
32082d71e31SChris Wilson 
3212c86e55dSMatthew Auld int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
3222c86e55dSMatthew Auld {
3232c86e55dSMatthew Auld 	unsigned long size;
3242c86e55dSMatthew Auld 
3252c86e55dSMatthew Auld 	/*
3262c86e55dSMatthew Auld 	 * In order to utilize 64K pages for an object with a size < 2M, we will
3272c86e55dSMatthew Auld 	 * need to support a 64K scratch page, given that every 16th entry for a
3282c86e55dSMatthew Auld 	 * page-table operating in 64K mode must point to a properly aligned 64K
3292c86e55dSMatthew Auld 	 * region, including any PTEs which happen to point to scratch.
3302c86e55dSMatthew Auld 	 *
3312c86e55dSMatthew Auld 	 * This is only relevant for the 48b PPGTT where we support
3322c86e55dSMatthew Auld 	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
3332c86e55dSMatthew Auld 	 * scratch (read-only) between all vm, we create one 64k scratch page
3342c86e55dSMatthew Auld 	 * for all.
3352c86e55dSMatthew Auld 	 */
3362c86e55dSMatthew Auld 	size = I915_GTT_PAGE_SIZE_4K;
3372c86e55dSMatthew Auld 	if (i915_vm_is_4lvl(vm) &&
3382c86e55dSMatthew Auld 	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
3392c86e55dSMatthew Auld 		size = I915_GTT_PAGE_SIZE_64K;
3402c86e55dSMatthew Auld 		gfp |= __GFP_NOWARN;
3412c86e55dSMatthew Auld 	}
3422c86e55dSMatthew Auld 	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
3432c86e55dSMatthew Auld 
3442c86e55dSMatthew Auld 	do {
3452c86e55dSMatthew Auld 		unsigned int order = get_order(size);
3462c86e55dSMatthew Auld 		struct page *page;
3472c86e55dSMatthew Auld 		dma_addr_t addr;
3482c86e55dSMatthew Auld 
3492c86e55dSMatthew Auld 		page = alloc_pages(gfp, order);
3502c86e55dSMatthew Auld 		if (unlikely(!page))
3512c86e55dSMatthew Auld 			goto skip;
3522c86e55dSMatthew Auld 
35382d71e31SChris Wilson 		/*
35482d71e31SChris Wilson 		 * Use a non-zero scratch page for debugging.
35582d71e31SChris Wilson 		 *
35682d71e31SChris Wilson 		 * We want a value that should be reasonably obvious
35782d71e31SChris Wilson 		 * to spot in the error state, while also causing a GPU hang
35882d71e31SChris Wilson 		 * if executed. We prefer using a clear page in production, so
35982d71e31SChris Wilson 		 * should it ever be accidentally used, the effect should be
36082d71e31SChris Wilson 		 * fairly benign.
36182d71e31SChris Wilson 		 */
36282d71e31SChris Wilson 		poison_scratch_page(page, size);
36382d71e31SChris Wilson 
3642c86e55dSMatthew Auld 		addr = dma_map_page_attrs(vm->dma,
3652c86e55dSMatthew Auld 					  page, 0, size,
3662c86e55dSMatthew Auld 					  PCI_DMA_BIDIRECTIONAL,
3672c86e55dSMatthew Auld 					  DMA_ATTR_SKIP_CPU_SYNC |
3682c86e55dSMatthew Auld 					  DMA_ATTR_NO_WARN);
3692c86e55dSMatthew Auld 		if (unlikely(dma_mapping_error(vm->dma, addr)))
3702c86e55dSMatthew Auld 			goto free_page;
3712c86e55dSMatthew Auld 
3722c86e55dSMatthew Auld 		if (unlikely(!IS_ALIGNED(addr, size)))
3732c86e55dSMatthew Auld 			goto unmap_page;
3742c86e55dSMatthew Auld 
3752c86e55dSMatthew Auld 		vm->scratch[0].base.page = page;
3762c86e55dSMatthew Auld 		vm->scratch[0].base.daddr = addr;
3772c86e55dSMatthew Auld 		vm->scratch_order = order;
3782c86e55dSMatthew Auld 		return 0;
3792c86e55dSMatthew Auld 
3802c86e55dSMatthew Auld unmap_page:
3812c86e55dSMatthew Auld 		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
3822c86e55dSMatthew Auld free_page:
3832c86e55dSMatthew Auld 		__free_pages(page, order);
3842c86e55dSMatthew Auld skip:
3852c86e55dSMatthew Auld 		if (size == I915_GTT_PAGE_SIZE_4K)
3862c86e55dSMatthew Auld 			return -ENOMEM;
3872c86e55dSMatthew Auld 
3882c86e55dSMatthew Auld 		size = I915_GTT_PAGE_SIZE_4K;
3892c86e55dSMatthew Auld 		gfp &= ~__GFP_NOWARN;
3902c86e55dSMatthew Auld 	} while (1);
3912c86e55dSMatthew Auld }
3922c86e55dSMatthew Auld 
3932c86e55dSMatthew Auld void cleanup_scratch_page(struct i915_address_space *vm)
3942c86e55dSMatthew Auld {
3952c86e55dSMatthew Auld 	struct i915_page_dma *p = px_base(&vm->scratch[0]);
3962c86e55dSMatthew Auld 	unsigned int order = vm->scratch_order;
3972c86e55dSMatthew Auld 
3982c86e55dSMatthew Auld 	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
3992c86e55dSMatthew Auld 		       PCI_DMA_BIDIRECTIONAL);
4002c86e55dSMatthew Auld 	__free_pages(p->page, order);
4012c86e55dSMatthew Auld }
4022c86e55dSMatthew Auld 
4032c86e55dSMatthew Auld void free_scratch(struct i915_address_space *vm)
4042c86e55dSMatthew Auld {
4052c86e55dSMatthew Auld 	int i;
4062c86e55dSMatthew Auld 
4072c86e55dSMatthew Auld 	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
4082c86e55dSMatthew Auld 		return;
4092c86e55dSMatthew Auld 
4102c86e55dSMatthew Auld 	for (i = 1; i <= vm->top; i++) {
4112c86e55dSMatthew Auld 		if (!px_dma(&vm->scratch[i]))
4122c86e55dSMatthew Auld 			break;
4132c86e55dSMatthew Auld 		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
4142c86e55dSMatthew Auld 	}
4152c86e55dSMatthew Auld 
4162c86e55dSMatthew Auld 	cleanup_scratch_page(vm);
4172c86e55dSMatthew Auld }
4182c86e55dSMatthew Auld 
4192c86e55dSMatthew Auld void gtt_write_workarounds(struct intel_gt *gt)
4202c86e55dSMatthew Auld {
4212c86e55dSMatthew Auld 	struct drm_i915_private *i915 = gt->i915;
4222c86e55dSMatthew Auld 	struct intel_uncore *uncore = gt->uncore;
4232c86e55dSMatthew Auld 
4242c86e55dSMatthew Auld 	/*
4252c86e55dSMatthew Auld 	 * This function is for gtt related workarounds. This function is
4262c86e55dSMatthew Auld 	 * called on driver load and after a GPU reset, so you can place
4272c86e55dSMatthew Auld 	 * workarounds here even if they get overwritten by GPU reset.
4282c86e55dSMatthew Auld 	 */
4292c86e55dSMatthew Auld 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
4302c86e55dSMatthew Auld 	if (IS_BROADWELL(i915))
4312c86e55dSMatthew Auld 		intel_uncore_write(uncore,
4322c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU,
4332c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
4342c86e55dSMatthew Auld 	else if (IS_CHERRYVIEW(i915))
4352c86e55dSMatthew Auld 		intel_uncore_write(uncore,
4362c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU,
4372c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
4382c86e55dSMatthew Auld 	else if (IS_GEN9_LP(i915))
4392c86e55dSMatthew Auld 		intel_uncore_write(uncore,
4402c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU,
4412c86e55dSMatthew Auld 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
4422c86e55dSMatthew Auld 	else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
4432c86e55dSMatthew Auld 		intel_uncore_write(uncore,
4442c86e55dSMatthew Auld 				   GEN8_L3_LRA_1_GPGPU,
4452c86e55dSMatthew Auld 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
4462c86e55dSMatthew Auld 
4472c86e55dSMatthew Auld 	/*
4482c86e55dSMatthew Auld 	 * To support 64K PTEs we need to first enable the use of the
4492c86e55dSMatthew Auld 	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
4502c86e55dSMatthew Auld 	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
4512c86e55dSMatthew Auld 	 * shouldn't be needed after GEN10.
4522c86e55dSMatthew Auld 	 *
4532c86e55dSMatthew Auld 	 * 64K pages were first introduced from BDW+, although technically they
4542c86e55dSMatthew Auld 	 * only *work* from gen9+. For pre-BDW we instead have the option for
4552c86e55dSMatthew Auld 	 * 32K pages, but we don't currently have any support for it in our
4562c86e55dSMatthew Auld 	 * driver.
4572c86e55dSMatthew Auld 	 */
4582c86e55dSMatthew Auld 	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
4592c86e55dSMatthew Auld 	    INTEL_GEN(i915) <= 10)
4602c86e55dSMatthew Auld 		intel_uncore_rmw(uncore,
4612c86e55dSMatthew Auld 				 GEN8_GAMW_ECO_DEV_RW_IA,
4622c86e55dSMatthew Auld 				 0,
4632c86e55dSMatthew Auld 				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
4642c86e55dSMatthew Auld 
4652c86e55dSMatthew Auld 	if (IS_GEN_RANGE(i915, 8, 11)) {
4662c86e55dSMatthew Auld 		bool can_use_gtt_cache = true;
4672c86e55dSMatthew Auld 
4682c86e55dSMatthew Auld 		/*
4692c86e55dSMatthew Auld 		 * According to the BSpec if we use 2M/1G pages then we also
4702c86e55dSMatthew Auld 		 * need to disable the GTT cache. At least on BDW we can see
4712c86e55dSMatthew Auld 		 * visual corruption when using 2M pages, and not disabling the
4722c86e55dSMatthew Auld 		 * GTT cache.
4732c86e55dSMatthew Auld 		 */
4742c86e55dSMatthew Auld 		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
4752c86e55dSMatthew Auld 			can_use_gtt_cache = false;
4762c86e55dSMatthew Auld 
4772c86e55dSMatthew Auld 		/* WaGttCachingOffByDefault */
4782c86e55dSMatthew Auld 		intel_uncore_write(uncore,
4792c86e55dSMatthew Auld 				   HSW_GTT_CACHE_EN,
4802c86e55dSMatthew Auld 				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
4810d4c351aSPankaj Bharadiya 		drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
4822c86e55dSMatthew Auld 				 intel_uncore_read(uncore,
4832c86e55dSMatthew Auld 						   HSW_GTT_CACHE_EN) == 0);
4842c86e55dSMatthew Auld 	}
4852c86e55dSMatthew Auld }
4862c86e55dSMatthew Auld 
4872c86e55dSMatthew Auld u64 gen8_pte_encode(dma_addr_t addr,
4882c86e55dSMatthew Auld 		    enum i915_cache_level level,
4892c86e55dSMatthew Auld 		    u32 flags)
4902c86e55dSMatthew Auld {
4912c86e55dSMatthew Auld 	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
4922c86e55dSMatthew Auld 
4932c86e55dSMatthew Auld 	if (unlikely(flags & PTE_READ_ONLY))
4942c86e55dSMatthew Auld 		pte &= ~_PAGE_RW;
4952c86e55dSMatthew Auld 
4962c86e55dSMatthew Auld 	switch (level) {
4972c86e55dSMatthew Auld 	case I915_CACHE_NONE:
4982c86e55dSMatthew Auld 		pte |= PPAT_UNCACHED;
4992c86e55dSMatthew Auld 		break;
5002c86e55dSMatthew Auld 	case I915_CACHE_WT:
5012c86e55dSMatthew Auld 		pte |= PPAT_DISPLAY_ELLC;
5022c86e55dSMatthew Auld 		break;
5032c86e55dSMatthew Auld 	default:
5042c86e55dSMatthew Auld 		pte |= PPAT_CACHED;
5052c86e55dSMatthew Auld 		break;
5062c86e55dSMatthew Auld 	}
5072c86e55dSMatthew Auld 
5082c86e55dSMatthew Auld 	return pte;
5092c86e55dSMatthew Auld }
5102c86e55dSMatthew Auld 
5112c86e55dSMatthew Auld static void tgl_setup_private_ppat(struct intel_uncore *uncore)
5122c86e55dSMatthew Auld {
5132c86e55dSMatthew Auld 	/* TGL doesn't support LLC or AGE settings */
5142c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
5152c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
5162c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
5172c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
5182c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
5192c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
5202c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
5212c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
5222c86e55dSMatthew Auld }
5232c86e55dSMatthew Auld 
5242c86e55dSMatthew Auld static void cnl_setup_private_ppat(struct intel_uncore *uncore)
5252c86e55dSMatthew Auld {
5262c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5272c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(0),
5282c86e55dSMatthew Auld 			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
5292c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5302c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(1),
5312c86e55dSMatthew Auld 			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
5322c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5332c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(2),
5342c86e55dSMatthew Auld 			   GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
5352c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5362c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(3),
5372c86e55dSMatthew Auld 			   GEN8_PPAT_UC);
5382c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5392c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(4),
5402c86e55dSMatthew Auld 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
5412c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5422c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(5),
5432c86e55dSMatthew Auld 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
5442c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5452c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(6),
5462c86e55dSMatthew Auld 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
5472c86e55dSMatthew Auld 	intel_uncore_write(uncore,
5482c86e55dSMatthew Auld 			   GEN10_PAT_INDEX(7),
5492c86e55dSMatthew Auld 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
5502c86e55dSMatthew Auld }
5512c86e55dSMatthew Auld 
5522c86e55dSMatthew Auld /*
5532c86e55dSMatthew Auld  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
5542c86e55dSMatthew Auld  * bits. When using advanced contexts each context stores its own PAT, but
5552c86e55dSMatthew Auld  * writing this data shouldn't be harmful even in those cases.
5562c86e55dSMatthew Auld  */
5572c86e55dSMatthew Auld static void bdw_setup_private_ppat(struct intel_uncore *uncore)
5582c86e55dSMatthew Auld {
5592c86e55dSMatthew Auld 	u64 pat;
5602c86e55dSMatthew Auld 
5612c86e55dSMatthew Auld 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
5622c86e55dSMatthew Auld 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
5632c86e55dSMatthew Auld 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
5642c86e55dSMatthew Auld 	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
5652c86e55dSMatthew Auld 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
5662c86e55dSMatthew Auld 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
5672c86e55dSMatthew Auld 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
5682c86e55dSMatthew Auld 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
5692c86e55dSMatthew Auld 
5702c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
5712c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
5722c86e55dSMatthew Auld }
5732c86e55dSMatthew Auld 
5742c86e55dSMatthew Auld static void chv_setup_private_ppat(struct intel_uncore *uncore)
5752c86e55dSMatthew Auld {
5762c86e55dSMatthew Auld 	u64 pat;
5772c86e55dSMatthew Auld 
5782c86e55dSMatthew Auld 	/*
5792c86e55dSMatthew Auld 	 * Map WB on BDW to snooped on CHV.
5802c86e55dSMatthew Auld 	 *
5812c86e55dSMatthew Auld 	 * Only the snoop bit has meaning for CHV, the rest is
5822c86e55dSMatthew Auld 	 * ignored.
5832c86e55dSMatthew Auld 	 *
5842c86e55dSMatthew Auld 	 * The hardware will never snoop for certain types of accesses:
5852c86e55dSMatthew Auld 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
5862c86e55dSMatthew Auld 	 * - PPGTT page tables
5872c86e55dSMatthew Auld 	 * - some other special cycles
5882c86e55dSMatthew Auld 	 *
5892c86e55dSMatthew Auld 	 * As with BDW, we also need to consider the following for GT accesses:
5902c86e55dSMatthew Auld 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
5912c86e55dSMatthew Auld 	 * so RTL will always use the value corresponding to
5922c86e55dSMatthew Auld 	 * pat_sel = 000".
5932c86e55dSMatthew Auld 	 * Which means we must set the snoop bit in PAT entry 0
5942c86e55dSMatthew Auld 	 * in order to keep the global status page working.
5952c86e55dSMatthew Auld 	 */
5962c86e55dSMatthew Auld 
5972c86e55dSMatthew Auld 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
5982c86e55dSMatthew Auld 	      GEN8_PPAT(1, 0) |
5992c86e55dSMatthew Auld 	      GEN8_PPAT(2, 0) |
6002c86e55dSMatthew Auld 	      GEN8_PPAT(3, 0) |
6012c86e55dSMatthew Auld 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
6022c86e55dSMatthew Auld 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
6032c86e55dSMatthew Auld 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
6042c86e55dSMatthew Auld 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
6052c86e55dSMatthew Auld 
6062c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
6072c86e55dSMatthew Auld 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
6082c86e55dSMatthew Auld }
6092c86e55dSMatthew Auld 
6102c86e55dSMatthew Auld void setup_private_pat(struct intel_uncore *uncore)
6112c86e55dSMatthew Auld {
6122c86e55dSMatthew Auld 	struct drm_i915_private *i915 = uncore->i915;
6132c86e55dSMatthew Auld 
6142c86e55dSMatthew Auld 	GEM_BUG_ON(INTEL_GEN(i915) < 8);
6152c86e55dSMatthew Auld 
6162c86e55dSMatthew Auld 	if (INTEL_GEN(i915) >= 12)
6172c86e55dSMatthew Auld 		tgl_setup_private_ppat(uncore);
6182c86e55dSMatthew Auld 	else if (INTEL_GEN(i915) >= 10)
6192c86e55dSMatthew Auld 		cnl_setup_private_ppat(uncore);
6202c86e55dSMatthew Auld 	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
6212c86e55dSMatthew Auld 		chv_setup_private_ppat(uncore);
6222c86e55dSMatthew Auld 	else
6232c86e55dSMatthew Auld 		bdw_setup_private_ppat(uncore);
6242c86e55dSMatthew Auld }
6252c86e55dSMatthew Auld 
6262c86e55dSMatthew Auld #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6272c86e55dSMatthew Auld #include "selftests/mock_gtt.c"
6282c86e55dSMatthew Auld #endif
629