12c86e55dSMatthew Auld // SPDX-License-Identifier: MIT 22c86e55dSMatthew Auld /* 32c86e55dSMatthew Auld * Copyright © 2020 Intel Corporation 42c86e55dSMatthew Auld */ 52c86e55dSMatthew Auld 62c86e55dSMatthew Auld #include <linux/slab.h> /* fault-inject.h is not standalone! */ 72c86e55dSMatthew Auld 82c86e55dSMatthew Auld #include <linux/fault-inject.h> 92c86e55dSMatthew Auld 102c86e55dSMatthew Auld #include "i915_trace.h" 112c86e55dSMatthew Auld #include "intel_gt.h" 122c86e55dSMatthew Auld #include "intel_gtt.h" 132c86e55dSMatthew Auld 142c86e55dSMatthew Auld void stash_init(struct pagestash *stash) 152c86e55dSMatthew Auld { 162c86e55dSMatthew Auld pagevec_init(&stash->pvec); 172c86e55dSMatthew Auld spin_lock_init(&stash->lock); 182c86e55dSMatthew Auld } 192c86e55dSMatthew Auld 202c86e55dSMatthew Auld static struct page *stash_pop_page(struct pagestash *stash) 212c86e55dSMatthew Auld { 222c86e55dSMatthew Auld struct page *page = NULL; 232c86e55dSMatthew Auld 242c86e55dSMatthew Auld spin_lock(&stash->lock); 252c86e55dSMatthew Auld if (likely(stash->pvec.nr)) 262c86e55dSMatthew Auld page = stash->pvec.pages[--stash->pvec.nr]; 272c86e55dSMatthew Auld spin_unlock(&stash->lock); 282c86e55dSMatthew Auld 292c86e55dSMatthew Auld return page; 302c86e55dSMatthew Auld } 312c86e55dSMatthew Auld 322c86e55dSMatthew Auld static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) 332c86e55dSMatthew Auld { 342c86e55dSMatthew Auld unsigned int nr; 352c86e55dSMatthew Auld 362c86e55dSMatthew Auld spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); 372c86e55dSMatthew Auld 382c86e55dSMatthew Auld nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); 392c86e55dSMatthew Auld memcpy(stash->pvec.pages + stash->pvec.nr, 402c86e55dSMatthew Auld pvec->pages + pvec->nr - nr, 412c86e55dSMatthew Auld sizeof(pvec->pages[0]) * nr); 422c86e55dSMatthew Auld stash->pvec.nr += nr; 432c86e55dSMatthew Auld 442c86e55dSMatthew Auld spin_unlock(&stash->lock); 452c86e55dSMatthew Auld 462c86e55dSMatthew Auld pvec->nr -= nr; 472c86e55dSMatthew Auld } 482c86e55dSMatthew Auld 492c86e55dSMatthew Auld static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 502c86e55dSMatthew Auld { 512c86e55dSMatthew Auld struct pagevec stack; 522c86e55dSMatthew Auld struct page *page; 532c86e55dSMatthew Auld 542c86e55dSMatthew Auld if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 552c86e55dSMatthew Auld i915_gem_shrink_all(vm->i915); 562c86e55dSMatthew Auld 572c86e55dSMatthew Auld page = stash_pop_page(&vm->free_pages); 582c86e55dSMatthew Auld if (page) 592c86e55dSMatthew Auld return page; 602c86e55dSMatthew Auld 612c86e55dSMatthew Auld if (!vm->pt_kmap_wc) 622c86e55dSMatthew Auld return alloc_page(gfp); 632c86e55dSMatthew Auld 642c86e55dSMatthew Auld /* Look in our global stash of WC pages... */ 652c86e55dSMatthew Auld page = stash_pop_page(&vm->i915->mm.wc_stash); 662c86e55dSMatthew Auld if (page) 672c86e55dSMatthew Auld return page; 682c86e55dSMatthew Auld 692c86e55dSMatthew Auld /* 702c86e55dSMatthew Auld * Otherwise batch allocate pages to amortize cost of set_pages_wc. 712c86e55dSMatthew Auld * 722c86e55dSMatthew Auld * We have to be careful as page allocation may trigger the shrinker 732c86e55dSMatthew Auld * (via direct reclaim) which will fill up the WC stash underneath us. 742c86e55dSMatthew Auld * So we add our WB pages into a temporary pvec on the stack and merge 752c86e55dSMatthew Auld * them into the WC stash after all the allocations are complete. 762c86e55dSMatthew Auld */ 772c86e55dSMatthew Auld pagevec_init(&stack); 782c86e55dSMatthew Auld do { 792c86e55dSMatthew Auld struct page *page; 802c86e55dSMatthew Auld 812c86e55dSMatthew Auld page = alloc_page(gfp); 822c86e55dSMatthew Auld if (unlikely(!page)) 832c86e55dSMatthew Auld break; 842c86e55dSMatthew Auld 852c86e55dSMatthew Auld stack.pages[stack.nr++] = page; 862c86e55dSMatthew Auld } while (pagevec_space(&stack)); 872c86e55dSMatthew Auld 882c86e55dSMatthew Auld if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { 892c86e55dSMatthew Auld page = stack.pages[--stack.nr]; 902c86e55dSMatthew Auld 912c86e55dSMatthew Auld /* Merge spare WC pages to the global stash */ 922c86e55dSMatthew Auld if (stack.nr) 932c86e55dSMatthew Auld stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); 942c86e55dSMatthew Auld 952c86e55dSMatthew Auld /* Push any surplus WC pages onto the local VM stash */ 962c86e55dSMatthew Auld if (stack.nr) 972c86e55dSMatthew Auld stash_push_pagevec(&vm->free_pages, &stack); 982c86e55dSMatthew Auld } 992c86e55dSMatthew Auld 1002c86e55dSMatthew Auld /* Return unwanted leftovers */ 1012c86e55dSMatthew Auld if (unlikely(stack.nr)) { 1022c86e55dSMatthew Auld WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); 1032c86e55dSMatthew Auld __pagevec_release(&stack); 1042c86e55dSMatthew Auld } 1052c86e55dSMatthew Auld 1062c86e55dSMatthew Auld return page; 1072c86e55dSMatthew Auld } 1082c86e55dSMatthew Auld 1092c86e55dSMatthew Auld static void vm_free_pages_release(struct i915_address_space *vm, 1102c86e55dSMatthew Auld bool immediate) 1112c86e55dSMatthew Auld { 1122c86e55dSMatthew Auld struct pagevec *pvec = &vm->free_pages.pvec; 1132c86e55dSMatthew Auld struct pagevec stack; 1142c86e55dSMatthew Auld 1152c86e55dSMatthew Auld lockdep_assert_held(&vm->free_pages.lock); 1162c86e55dSMatthew Auld GEM_BUG_ON(!pagevec_count(pvec)); 1172c86e55dSMatthew Auld 1182c86e55dSMatthew Auld if (vm->pt_kmap_wc) { 1192c86e55dSMatthew Auld /* 1202c86e55dSMatthew Auld * When we use WC, first fill up the global stash and then 1212c86e55dSMatthew Auld * only if full immediately free the overflow. 1222c86e55dSMatthew Auld */ 1232c86e55dSMatthew Auld stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); 1242c86e55dSMatthew Auld 1252c86e55dSMatthew Auld /* 1262c86e55dSMatthew Auld * As we have made some room in the VM's free_pages, 1272c86e55dSMatthew Auld * we can wait for it to fill again. Unless we are 1282c86e55dSMatthew Auld * inside i915_address_space_fini() and must 1292c86e55dSMatthew Auld * immediately release the pages! 1302c86e55dSMatthew Auld */ 1312c86e55dSMatthew Auld if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) 1322c86e55dSMatthew Auld return; 1332c86e55dSMatthew Auld 1342c86e55dSMatthew Auld /* 1352c86e55dSMatthew Auld * We have to drop the lock to allow ourselves to sleep, 1362c86e55dSMatthew Auld * so take a copy of the pvec and clear the stash for 1372c86e55dSMatthew Auld * others to use it as we sleep. 1382c86e55dSMatthew Auld */ 1392c86e55dSMatthew Auld stack = *pvec; 1402c86e55dSMatthew Auld pagevec_reinit(pvec); 1412c86e55dSMatthew Auld spin_unlock(&vm->free_pages.lock); 1422c86e55dSMatthew Auld 1432c86e55dSMatthew Auld pvec = &stack; 1442c86e55dSMatthew Auld set_pages_array_wb(pvec->pages, pvec->nr); 1452c86e55dSMatthew Auld 1462c86e55dSMatthew Auld spin_lock(&vm->free_pages.lock); 1472c86e55dSMatthew Auld } 1482c86e55dSMatthew Auld 1492c86e55dSMatthew Auld __pagevec_release(pvec); 1502c86e55dSMatthew Auld } 1512c86e55dSMatthew Auld 1522c86e55dSMatthew Auld static void vm_free_page(struct i915_address_space *vm, struct page *page) 1532c86e55dSMatthew Auld { 1542c86e55dSMatthew Auld /* 1552c86e55dSMatthew Auld * On !llc, we need to change the pages back to WB. We only do so 1562c86e55dSMatthew Auld * in bulk, so we rarely need to change the page attributes here, 1572c86e55dSMatthew Auld * but doing so requires a stop_machine() from deep inside arch/x86/mm. 1582c86e55dSMatthew Auld * To make detection of the possible sleep more likely, use an 1592c86e55dSMatthew Auld * unconditional might_sleep() for everybody. 1602c86e55dSMatthew Auld */ 1612c86e55dSMatthew Auld might_sleep(); 1622c86e55dSMatthew Auld spin_lock(&vm->free_pages.lock); 1632c86e55dSMatthew Auld while (!pagevec_space(&vm->free_pages.pvec)) 1642c86e55dSMatthew Auld vm_free_pages_release(vm, false); 1652c86e55dSMatthew Auld GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); 1662c86e55dSMatthew Auld pagevec_add(&vm->free_pages.pvec, page); 1672c86e55dSMatthew Auld spin_unlock(&vm->free_pages.lock); 1682c86e55dSMatthew Auld } 1692c86e55dSMatthew Auld 1702c86e55dSMatthew Auld void __i915_vm_close(struct i915_address_space *vm) 1712c86e55dSMatthew Auld { 1722c86e55dSMatthew Auld struct i915_vma *vma, *vn; 1732c86e55dSMatthew Auld 1742c86e55dSMatthew Auld mutex_lock(&vm->mutex); 1752c86e55dSMatthew Auld list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 1762c86e55dSMatthew Auld struct drm_i915_gem_object *obj = vma->obj; 1772c86e55dSMatthew Auld 1782c86e55dSMatthew Auld /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 1792c86e55dSMatthew Auld if (!kref_get_unless_zero(&obj->base.refcount)) 1802c86e55dSMatthew Auld continue; 1812c86e55dSMatthew Auld 1822c86e55dSMatthew Auld atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 1832c86e55dSMatthew Auld WARN_ON(__i915_vma_unbind(vma)); 1842c86e55dSMatthew Auld __i915_vma_put(vma); 1852c86e55dSMatthew Auld 1862c86e55dSMatthew Auld i915_gem_object_put(obj); 1872c86e55dSMatthew Auld } 1882c86e55dSMatthew Auld GEM_BUG_ON(!list_empty(&vm->bound_list)); 1892c86e55dSMatthew Auld mutex_unlock(&vm->mutex); 1902c86e55dSMatthew Auld } 1912c86e55dSMatthew Auld 1922c86e55dSMatthew Auld void i915_address_space_fini(struct i915_address_space *vm) 1932c86e55dSMatthew Auld { 1942c86e55dSMatthew Auld spin_lock(&vm->free_pages.lock); 1952c86e55dSMatthew Auld if (pagevec_count(&vm->free_pages.pvec)) 1962c86e55dSMatthew Auld vm_free_pages_release(vm, true); 1972c86e55dSMatthew Auld GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); 1982c86e55dSMatthew Auld spin_unlock(&vm->free_pages.lock); 1992c86e55dSMatthew Auld 2002c86e55dSMatthew Auld drm_mm_takedown(&vm->mm); 2012c86e55dSMatthew Auld 2022c86e55dSMatthew Auld mutex_destroy(&vm->mutex); 2032c86e55dSMatthew Auld } 2042c86e55dSMatthew Auld 2052c86e55dSMatthew Auld static void __i915_vm_release(struct work_struct *work) 2062c86e55dSMatthew Auld { 2072c86e55dSMatthew Auld struct i915_address_space *vm = 2082c86e55dSMatthew Auld container_of(work, struct i915_address_space, rcu.work); 2092c86e55dSMatthew Auld 2102c86e55dSMatthew Auld vm->cleanup(vm); 2112c86e55dSMatthew Auld i915_address_space_fini(vm); 2122c86e55dSMatthew Auld 2132c86e55dSMatthew Auld kfree(vm); 2142c86e55dSMatthew Auld } 2152c86e55dSMatthew Auld 2162c86e55dSMatthew Auld void i915_vm_release(struct kref *kref) 2172c86e55dSMatthew Auld { 2182c86e55dSMatthew Auld struct i915_address_space *vm = 2192c86e55dSMatthew Auld container_of(kref, struct i915_address_space, ref); 2202c86e55dSMatthew Auld 2212c86e55dSMatthew Auld GEM_BUG_ON(i915_is_ggtt(vm)); 2222c86e55dSMatthew Auld trace_i915_ppgtt_release(vm); 2232c86e55dSMatthew Auld 2242c86e55dSMatthew Auld queue_rcu_work(vm->i915->wq, &vm->rcu); 2252c86e55dSMatthew Auld } 2262c86e55dSMatthew Auld 2272c86e55dSMatthew Auld void i915_address_space_init(struct i915_address_space *vm, int subclass) 2282c86e55dSMatthew Auld { 2292c86e55dSMatthew Auld kref_init(&vm->ref); 2302c86e55dSMatthew Auld INIT_RCU_WORK(&vm->rcu, __i915_vm_release); 2312c86e55dSMatthew Auld atomic_set(&vm->open, 1); 2322c86e55dSMatthew Auld 2332c86e55dSMatthew Auld /* 2342c86e55dSMatthew Auld * The vm->mutex must be reclaim safe (for use in the shrinker). 2352c86e55dSMatthew Auld * Do a dummy acquire now under fs_reclaim so that any allocation 2362c86e55dSMatthew Auld * attempt holding the lock is immediately reported by lockdep. 2372c86e55dSMatthew Auld */ 2382c86e55dSMatthew Auld mutex_init(&vm->mutex); 2392c86e55dSMatthew Auld lockdep_set_subclass(&vm->mutex, subclass); 2402c86e55dSMatthew Auld i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 2412c86e55dSMatthew Auld 2422c86e55dSMatthew Auld GEM_BUG_ON(!vm->total); 2432c86e55dSMatthew Auld drm_mm_init(&vm->mm, 0, vm->total); 2442c86e55dSMatthew Auld vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 2452c86e55dSMatthew Auld 2462c86e55dSMatthew Auld stash_init(&vm->free_pages); 2472c86e55dSMatthew Auld 2482c86e55dSMatthew Auld INIT_LIST_HEAD(&vm->bound_list); 2492c86e55dSMatthew Auld } 2502c86e55dSMatthew Auld 2512c86e55dSMatthew Auld void clear_pages(struct i915_vma *vma) 2522c86e55dSMatthew Auld { 2532c86e55dSMatthew Auld GEM_BUG_ON(!vma->pages); 2542c86e55dSMatthew Auld 2552c86e55dSMatthew Auld if (vma->pages != vma->obj->mm.pages) { 2562c86e55dSMatthew Auld sg_free_table(vma->pages); 2572c86e55dSMatthew Auld kfree(vma->pages); 2582c86e55dSMatthew Auld } 2592c86e55dSMatthew Auld vma->pages = NULL; 2602c86e55dSMatthew Auld 2612c86e55dSMatthew Auld memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 2622c86e55dSMatthew Auld } 2632c86e55dSMatthew Auld 2642c86e55dSMatthew Auld static int __setup_page_dma(struct i915_address_space *vm, 2652c86e55dSMatthew Auld struct i915_page_dma *p, 2662c86e55dSMatthew Auld gfp_t gfp) 2672c86e55dSMatthew Auld { 2682c86e55dSMatthew Auld p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); 2692c86e55dSMatthew Auld if (unlikely(!p->page)) 2702c86e55dSMatthew Auld return -ENOMEM; 2712c86e55dSMatthew Auld 2722c86e55dSMatthew Auld p->daddr = dma_map_page_attrs(vm->dma, 2732c86e55dSMatthew Auld p->page, 0, PAGE_SIZE, 2742c86e55dSMatthew Auld PCI_DMA_BIDIRECTIONAL, 2752c86e55dSMatthew Auld DMA_ATTR_SKIP_CPU_SYNC | 2762c86e55dSMatthew Auld DMA_ATTR_NO_WARN); 2772c86e55dSMatthew Auld if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 2782c86e55dSMatthew Auld vm_free_page(vm, p->page); 2792c86e55dSMatthew Auld return -ENOMEM; 2802c86e55dSMatthew Auld } 2812c86e55dSMatthew Auld 2822c86e55dSMatthew Auld return 0; 2832c86e55dSMatthew Auld } 2842c86e55dSMatthew Auld 2852c86e55dSMatthew Auld int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 2862c86e55dSMatthew Auld { 2872c86e55dSMatthew Auld return __setup_page_dma(vm, p, __GFP_HIGHMEM); 2882c86e55dSMatthew Auld } 2892c86e55dSMatthew Auld 2902c86e55dSMatthew Auld void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 2912c86e55dSMatthew Auld { 2922c86e55dSMatthew Auld dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 2932c86e55dSMatthew Auld vm_free_page(vm, p->page); 2942c86e55dSMatthew Auld } 2952c86e55dSMatthew Auld 2962c86e55dSMatthew Auld void 2972c86e55dSMatthew Auld fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) 2982c86e55dSMatthew Auld { 2992c86e55dSMatthew Auld kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); 3002c86e55dSMatthew Auld } 3012c86e55dSMatthew Auld 30282d71e31SChris Wilson static void poison_scratch_page(struct page *page, unsigned long size) 30382d71e31SChris Wilson { 30482d71e31SChris Wilson if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 30582d71e31SChris Wilson return; 30682d71e31SChris Wilson 30782d71e31SChris Wilson GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 30882d71e31SChris Wilson 30982d71e31SChris Wilson do { 31082d71e31SChris Wilson void *vaddr; 31182d71e31SChris Wilson 31282d71e31SChris Wilson vaddr = kmap(page); 31382d71e31SChris Wilson memset(vaddr, POISON_FREE, PAGE_SIZE); 31482d71e31SChris Wilson kunmap(page); 31582d71e31SChris Wilson 31682d71e31SChris Wilson page = pfn_to_page(page_to_pfn(page) + 1); 31782d71e31SChris Wilson size -= PAGE_SIZE; 31882d71e31SChris Wilson } while (size); 31982d71e31SChris Wilson } 32082d71e31SChris Wilson 3212c86e55dSMatthew Auld int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 3222c86e55dSMatthew Auld { 3232c86e55dSMatthew Auld unsigned long size; 3242c86e55dSMatthew Auld 3252c86e55dSMatthew Auld /* 3262c86e55dSMatthew Auld * In order to utilize 64K pages for an object with a size < 2M, we will 3272c86e55dSMatthew Auld * need to support a 64K scratch page, given that every 16th entry for a 3282c86e55dSMatthew Auld * page-table operating in 64K mode must point to a properly aligned 64K 3292c86e55dSMatthew Auld * region, including any PTEs which happen to point to scratch. 3302c86e55dSMatthew Auld * 3312c86e55dSMatthew Auld * This is only relevant for the 48b PPGTT where we support 3322c86e55dSMatthew Auld * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 3332c86e55dSMatthew Auld * scratch (read-only) between all vm, we create one 64k scratch page 3342c86e55dSMatthew Auld * for all. 3352c86e55dSMatthew Auld */ 3362c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 3372c86e55dSMatthew Auld if (i915_vm_is_4lvl(vm) && 3382c86e55dSMatthew Auld HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 3392c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_64K; 3402c86e55dSMatthew Auld gfp |= __GFP_NOWARN; 3412c86e55dSMatthew Auld } 3422c86e55dSMatthew Auld gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; 3432c86e55dSMatthew Auld 3442c86e55dSMatthew Auld do { 3452c86e55dSMatthew Auld unsigned int order = get_order(size); 3462c86e55dSMatthew Auld struct page *page; 3472c86e55dSMatthew Auld dma_addr_t addr; 3482c86e55dSMatthew Auld 3492c86e55dSMatthew Auld page = alloc_pages(gfp, order); 3502c86e55dSMatthew Auld if (unlikely(!page)) 3512c86e55dSMatthew Auld goto skip; 3522c86e55dSMatthew Auld 35382d71e31SChris Wilson /* 35482d71e31SChris Wilson * Use a non-zero scratch page for debugging. 35582d71e31SChris Wilson * 35682d71e31SChris Wilson * We want a value that should be reasonably obvious 35782d71e31SChris Wilson * to spot in the error state, while also causing a GPU hang 35882d71e31SChris Wilson * if executed. We prefer using a clear page in production, so 35982d71e31SChris Wilson * should it ever be accidentally used, the effect should be 36082d71e31SChris Wilson * fairly benign. 36182d71e31SChris Wilson */ 36282d71e31SChris Wilson poison_scratch_page(page, size); 36382d71e31SChris Wilson 3642c86e55dSMatthew Auld addr = dma_map_page_attrs(vm->dma, 3652c86e55dSMatthew Auld page, 0, size, 3662c86e55dSMatthew Auld PCI_DMA_BIDIRECTIONAL, 3672c86e55dSMatthew Auld DMA_ATTR_SKIP_CPU_SYNC | 3682c86e55dSMatthew Auld DMA_ATTR_NO_WARN); 3692c86e55dSMatthew Auld if (unlikely(dma_mapping_error(vm->dma, addr))) 3702c86e55dSMatthew Auld goto free_page; 3712c86e55dSMatthew Auld 3722c86e55dSMatthew Auld if (unlikely(!IS_ALIGNED(addr, size))) 3732c86e55dSMatthew Auld goto unmap_page; 3742c86e55dSMatthew Auld 3752c86e55dSMatthew Auld vm->scratch[0].base.page = page; 3762c86e55dSMatthew Auld vm->scratch[0].base.daddr = addr; 3772c86e55dSMatthew Auld vm->scratch_order = order; 3782c86e55dSMatthew Auld return 0; 3792c86e55dSMatthew Auld 3802c86e55dSMatthew Auld unmap_page: 3812c86e55dSMatthew Auld dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); 3822c86e55dSMatthew Auld free_page: 3832c86e55dSMatthew Auld __free_pages(page, order); 3842c86e55dSMatthew Auld skip: 3852c86e55dSMatthew Auld if (size == I915_GTT_PAGE_SIZE_4K) 3862c86e55dSMatthew Auld return -ENOMEM; 3872c86e55dSMatthew Auld 3882c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 3892c86e55dSMatthew Auld gfp &= ~__GFP_NOWARN; 3902c86e55dSMatthew Auld } while (1); 3912c86e55dSMatthew Auld } 3922c86e55dSMatthew Auld 3932c86e55dSMatthew Auld void cleanup_scratch_page(struct i915_address_space *vm) 3942c86e55dSMatthew Auld { 3952c86e55dSMatthew Auld struct i915_page_dma *p = px_base(&vm->scratch[0]); 3962c86e55dSMatthew Auld unsigned int order = vm->scratch_order; 3972c86e55dSMatthew Auld 3982c86e55dSMatthew Auld dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, 3992c86e55dSMatthew Auld PCI_DMA_BIDIRECTIONAL); 4002c86e55dSMatthew Auld __free_pages(p->page, order); 4012c86e55dSMatthew Auld } 4022c86e55dSMatthew Auld 4032c86e55dSMatthew Auld void free_scratch(struct i915_address_space *vm) 4042c86e55dSMatthew Auld { 4052c86e55dSMatthew Auld int i; 4062c86e55dSMatthew Auld 4072c86e55dSMatthew Auld if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ 4082c86e55dSMatthew Auld return; 4092c86e55dSMatthew Auld 4102c86e55dSMatthew Auld for (i = 1; i <= vm->top; i++) { 4112c86e55dSMatthew Auld if (!px_dma(&vm->scratch[i])) 4122c86e55dSMatthew Auld break; 4132c86e55dSMatthew Auld cleanup_page_dma(vm, px_base(&vm->scratch[i])); 4142c86e55dSMatthew Auld } 4152c86e55dSMatthew Auld 4162c86e55dSMatthew Auld cleanup_scratch_page(vm); 4172c86e55dSMatthew Auld } 4182c86e55dSMatthew Auld 4192c86e55dSMatthew Auld void gtt_write_workarounds(struct intel_gt *gt) 4202c86e55dSMatthew Auld { 4212c86e55dSMatthew Auld struct drm_i915_private *i915 = gt->i915; 4222c86e55dSMatthew Auld struct intel_uncore *uncore = gt->uncore; 4232c86e55dSMatthew Auld 4242c86e55dSMatthew Auld /* 4252c86e55dSMatthew Auld * This function is for gtt related workarounds. This function is 4262c86e55dSMatthew Auld * called on driver load and after a GPU reset, so you can place 4272c86e55dSMatthew Auld * workarounds here even if they get overwritten by GPU reset. 4282c86e55dSMatthew Auld */ 4292c86e55dSMatthew Auld /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 4302c86e55dSMatthew Auld if (IS_BROADWELL(i915)) 4312c86e55dSMatthew Auld intel_uncore_write(uncore, 4322c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 4332c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 4342c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915)) 4352c86e55dSMatthew Auld intel_uncore_write(uncore, 4362c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 4372c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 4382c86e55dSMatthew Auld else if (IS_GEN9_LP(i915)) 4392c86e55dSMatthew Auld intel_uncore_write(uncore, 4402c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 4412c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 4422c86e55dSMatthew Auld else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) 4432c86e55dSMatthew Auld intel_uncore_write(uncore, 4442c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 4452c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 4462c86e55dSMatthew Auld 4472c86e55dSMatthew Auld /* 4482c86e55dSMatthew Auld * To support 64K PTEs we need to first enable the use of the 4492c86e55dSMatthew Auld * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 4502c86e55dSMatthew Auld * mmio, otherwise the page-walker will simply ignore the IPS bit. This 4512c86e55dSMatthew Auld * shouldn't be needed after GEN10. 4522c86e55dSMatthew Auld * 4532c86e55dSMatthew Auld * 64K pages were first introduced from BDW+, although technically they 4542c86e55dSMatthew Auld * only *work* from gen9+. For pre-BDW we instead have the option for 4552c86e55dSMatthew Auld * 32K pages, but we don't currently have any support for it in our 4562c86e55dSMatthew Auld * driver. 4572c86e55dSMatthew Auld */ 4582c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 4592c86e55dSMatthew Auld INTEL_GEN(i915) <= 10) 4602c86e55dSMatthew Auld intel_uncore_rmw(uncore, 4612c86e55dSMatthew Auld GEN8_GAMW_ECO_DEV_RW_IA, 4622c86e55dSMatthew Auld 0, 4632c86e55dSMatthew Auld GAMW_ECO_ENABLE_64K_IPS_FIELD); 4642c86e55dSMatthew Auld 4652c86e55dSMatthew Auld if (IS_GEN_RANGE(i915, 8, 11)) { 4662c86e55dSMatthew Auld bool can_use_gtt_cache = true; 4672c86e55dSMatthew Auld 4682c86e55dSMatthew Auld /* 4692c86e55dSMatthew Auld * According to the BSpec if we use 2M/1G pages then we also 4702c86e55dSMatthew Auld * need to disable the GTT cache. At least on BDW we can see 4712c86e55dSMatthew Auld * visual corruption when using 2M pages, and not disabling the 4722c86e55dSMatthew Auld * GTT cache. 4732c86e55dSMatthew Auld */ 4742c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 4752c86e55dSMatthew Auld can_use_gtt_cache = false; 4762c86e55dSMatthew Auld 4772c86e55dSMatthew Auld /* WaGttCachingOffByDefault */ 4782c86e55dSMatthew Auld intel_uncore_write(uncore, 4792c86e55dSMatthew Auld HSW_GTT_CACHE_EN, 4802c86e55dSMatthew Auld can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 4810d4c351aSPankaj Bharadiya drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && 4822c86e55dSMatthew Auld intel_uncore_read(uncore, 4832c86e55dSMatthew Auld HSW_GTT_CACHE_EN) == 0); 4842c86e55dSMatthew Auld } 4852c86e55dSMatthew Auld } 4862c86e55dSMatthew Auld 4872c86e55dSMatthew Auld u64 gen8_pte_encode(dma_addr_t addr, 4882c86e55dSMatthew Auld enum i915_cache_level level, 4892c86e55dSMatthew Auld u32 flags) 4902c86e55dSMatthew Auld { 4912c86e55dSMatthew Auld gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; 4922c86e55dSMatthew Auld 4932c86e55dSMatthew Auld if (unlikely(flags & PTE_READ_ONLY)) 4942c86e55dSMatthew Auld pte &= ~_PAGE_RW; 4952c86e55dSMatthew Auld 4962c86e55dSMatthew Auld switch (level) { 4972c86e55dSMatthew Auld case I915_CACHE_NONE: 4982c86e55dSMatthew Auld pte |= PPAT_UNCACHED; 4992c86e55dSMatthew Auld break; 5002c86e55dSMatthew Auld case I915_CACHE_WT: 5012c86e55dSMatthew Auld pte |= PPAT_DISPLAY_ELLC; 5022c86e55dSMatthew Auld break; 5032c86e55dSMatthew Auld default: 5042c86e55dSMatthew Auld pte |= PPAT_CACHED; 5052c86e55dSMatthew Auld break; 5062c86e55dSMatthew Auld } 5072c86e55dSMatthew Auld 5082c86e55dSMatthew Auld return pte; 5092c86e55dSMatthew Auld } 5102c86e55dSMatthew Auld 5112c86e55dSMatthew Auld static void tgl_setup_private_ppat(struct intel_uncore *uncore) 5122c86e55dSMatthew Auld { 5132c86e55dSMatthew Auld /* TGL doesn't support LLC or AGE settings */ 5142c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 5152c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 5162c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 5172c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 5182c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 5192c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 5202c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 5212c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 5222c86e55dSMatthew Auld } 5232c86e55dSMatthew Auld 5242c86e55dSMatthew Auld static void cnl_setup_private_ppat(struct intel_uncore *uncore) 5252c86e55dSMatthew Auld { 5262c86e55dSMatthew Auld intel_uncore_write(uncore, 5272c86e55dSMatthew Auld GEN10_PAT_INDEX(0), 5282c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLC); 5292c86e55dSMatthew Auld intel_uncore_write(uncore, 5302c86e55dSMatthew Auld GEN10_PAT_INDEX(1), 5312c86e55dSMatthew Auld GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 5322c86e55dSMatthew Auld intel_uncore_write(uncore, 5332c86e55dSMatthew Auld GEN10_PAT_INDEX(2), 5342c86e55dSMatthew Auld GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 5352c86e55dSMatthew Auld intel_uncore_write(uncore, 5362c86e55dSMatthew Auld GEN10_PAT_INDEX(3), 5372c86e55dSMatthew Auld GEN8_PPAT_UC); 5382c86e55dSMatthew Auld intel_uncore_write(uncore, 5392c86e55dSMatthew Auld GEN10_PAT_INDEX(4), 5402c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 5412c86e55dSMatthew Auld intel_uncore_write(uncore, 5422c86e55dSMatthew Auld GEN10_PAT_INDEX(5), 5432c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 5442c86e55dSMatthew Auld intel_uncore_write(uncore, 5452c86e55dSMatthew Auld GEN10_PAT_INDEX(6), 5462c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 5472c86e55dSMatthew Auld intel_uncore_write(uncore, 5482c86e55dSMatthew Auld GEN10_PAT_INDEX(7), 5492c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 5502c86e55dSMatthew Auld } 5512c86e55dSMatthew Auld 5522c86e55dSMatthew Auld /* 5532c86e55dSMatthew Auld * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 5542c86e55dSMatthew Auld * bits. When using advanced contexts each context stores its own PAT, but 5552c86e55dSMatthew Auld * writing this data shouldn't be harmful even in those cases. 5562c86e55dSMatthew Auld */ 5572c86e55dSMatthew Auld static void bdw_setup_private_ppat(struct intel_uncore *uncore) 5582c86e55dSMatthew Auld { 5592c86e55dSMatthew Auld u64 pat; 5602c86e55dSMatthew Auld 5612c86e55dSMatthew Auld pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 5622c86e55dSMatthew Auld GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 5632c86e55dSMatthew Auld GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 5642c86e55dSMatthew Auld GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 5652c86e55dSMatthew Auld GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 5662c86e55dSMatthew Auld GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 5672c86e55dSMatthew Auld GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 5682c86e55dSMatthew Auld GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 5692c86e55dSMatthew Auld 5702c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 5712c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 5722c86e55dSMatthew Auld } 5732c86e55dSMatthew Auld 5742c86e55dSMatthew Auld static void chv_setup_private_ppat(struct intel_uncore *uncore) 5752c86e55dSMatthew Auld { 5762c86e55dSMatthew Auld u64 pat; 5772c86e55dSMatthew Auld 5782c86e55dSMatthew Auld /* 5792c86e55dSMatthew Auld * Map WB on BDW to snooped on CHV. 5802c86e55dSMatthew Auld * 5812c86e55dSMatthew Auld * Only the snoop bit has meaning for CHV, the rest is 5822c86e55dSMatthew Auld * ignored. 5832c86e55dSMatthew Auld * 5842c86e55dSMatthew Auld * The hardware will never snoop for certain types of accesses: 5852c86e55dSMatthew Auld * - CPU GTT (GMADR->GGTT->no snoop->memory) 5862c86e55dSMatthew Auld * - PPGTT page tables 5872c86e55dSMatthew Auld * - some other special cycles 5882c86e55dSMatthew Auld * 5892c86e55dSMatthew Auld * As with BDW, we also need to consider the following for GT accesses: 5902c86e55dSMatthew Auld * "For GGTT, there is NO pat_sel[2:0] from the entry, 5912c86e55dSMatthew Auld * so RTL will always use the value corresponding to 5922c86e55dSMatthew Auld * pat_sel = 000". 5932c86e55dSMatthew Auld * Which means we must set the snoop bit in PAT entry 0 5942c86e55dSMatthew Auld * in order to keep the global status page working. 5952c86e55dSMatthew Auld */ 5962c86e55dSMatthew Auld 5972c86e55dSMatthew Auld pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 5982c86e55dSMatthew Auld GEN8_PPAT(1, 0) | 5992c86e55dSMatthew Auld GEN8_PPAT(2, 0) | 6002c86e55dSMatthew Auld GEN8_PPAT(3, 0) | 6012c86e55dSMatthew Auld GEN8_PPAT(4, CHV_PPAT_SNOOP) | 6022c86e55dSMatthew Auld GEN8_PPAT(5, CHV_PPAT_SNOOP) | 6032c86e55dSMatthew Auld GEN8_PPAT(6, CHV_PPAT_SNOOP) | 6042c86e55dSMatthew Auld GEN8_PPAT(7, CHV_PPAT_SNOOP); 6052c86e55dSMatthew Auld 6062c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 6072c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 6082c86e55dSMatthew Auld } 6092c86e55dSMatthew Auld 6102c86e55dSMatthew Auld void setup_private_pat(struct intel_uncore *uncore) 6112c86e55dSMatthew Auld { 6122c86e55dSMatthew Auld struct drm_i915_private *i915 = uncore->i915; 6132c86e55dSMatthew Auld 6142c86e55dSMatthew Auld GEM_BUG_ON(INTEL_GEN(i915) < 8); 6152c86e55dSMatthew Auld 6162c86e55dSMatthew Auld if (INTEL_GEN(i915) >= 12) 6172c86e55dSMatthew Auld tgl_setup_private_ppat(uncore); 6182c86e55dSMatthew Auld else if (INTEL_GEN(i915) >= 10) 6192c86e55dSMatthew Auld cnl_setup_private_ppat(uncore); 6202c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 6212c86e55dSMatthew Auld chv_setup_private_ppat(uncore); 6222c86e55dSMatthew Auld else 6232c86e55dSMatthew Auld bdw_setup_private_ppat(uncore); 6242c86e55dSMatthew Auld } 6252c86e55dSMatthew Auld 6262c86e55dSMatthew Auld #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6272c86e55dSMatthew Auld #include "selftests/mock_gtt.c" 6282c86e55dSMatthew Auld #endif 629