12c86e55dSMatthew Auld // SPDX-License-Identifier: MIT 22c86e55dSMatthew Auld /* 32c86e55dSMatthew Auld * Copyright © 2020 Intel Corporation 42c86e55dSMatthew Auld */ 52c86e55dSMatthew Auld 62c86e55dSMatthew Auld #include <linux/slab.h> /* fault-inject.h is not standalone! */ 72c86e55dSMatthew Auld 82c86e55dSMatthew Auld #include <linux/fault-inject.h> 92c86e55dSMatthew Auld 102ca77606SMatthew Auld #include <drm/drm_cache.h> 112ca77606SMatthew Auld 126aed5673SMatthew Auld #include "gem/i915_gem_lmem.h" 132c86e55dSMatthew Auld #include "i915_trace.h" 142c86e55dSMatthew Auld #include "intel_gt.h" 152c86e55dSMatthew Auld #include "intel_gtt.h" 162c86e55dSMatthew Auld 176aed5673SMatthew Auld struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) 186aed5673SMatthew Auld { 196aed5673SMatthew Auld struct drm_i915_gem_object *obj; 206aed5673SMatthew Auld 2132334c9bSMatthew Auld /* 2232334c9bSMatthew Auld * To avoid severe over-allocation when dealing with min_page_size 2332334c9bSMatthew Auld * restrictions, we override that behaviour here by allowing an object 2432334c9bSMatthew Auld * size and page layout which can be smaller. In practice this should be 2532334c9bSMatthew Auld * totally fine, since GTT paging structures are not typically inserted 2632334c9bSMatthew Auld * into the GTT. 2732334c9bSMatthew Auld * 2832334c9bSMatthew Auld * Note that we also hit this path for the scratch page, and for this 2932334c9bSMatthew Auld * case it might need to be 64K, but that should work fine here since we 3032334c9bSMatthew Auld * used the passed in size for the page size, which should ensure it 3132334c9bSMatthew Auld * also has the same alignment. 3232334c9bSMatthew Auld */ 33a259cc14SThomas Hellström obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 34a259cc14SThomas Hellström vm->lmem_pt_obj_flags); 356aed5673SMatthew Auld /* 366aed5673SMatthew Auld * Ensure all paging structures for this vm share the same dma-resv 376aed5673SMatthew Auld * object underneath, with the idea that one object_lock() will lock 386aed5673SMatthew Auld * them all at once. 396aed5673SMatthew Auld */ 404d8151aeSThomas Hellström if (!IS_ERR(obj)) { 414d8151aeSThomas Hellström obj->base.resv = i915_vm_resv_get(vm); 424d8151aeSThomas Hellström obj->shares_resv_from = vm; 434d8151aeSThomas Hellström } 444d8151aeSThomas Hellström 456aed5673SMatthew Auld return obj; 466aed5673SMatthew Auld } 476aed5673SMatthew Auld 4889351925SChris Wilson struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) 492c86e55dSMatthew Auld { 5026ad4f8bSMaarten Lankhorst struct drm_i915_gem_object *obj; 5126ad4f8bSMaarten Lankhorst 522c86e55dSMatthew Auld if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 532c86e55dSMatthew Auld i915_gem_shrink_all(vm->i915); 542c86e55dSMatthew Auld 5526ad4f8bSMaarten Lankhorst obj = i915_gem_object_create_internal(vm->i915, sz); 566aed5673SMatthew Auld /* 576aed5673SMatthew Auld * Ensure all paging structures for this vm share the same dma-resv 586aed5673SMatthew Auld * object underneath, with the idea that one object_lock() will lock 596aed5673SMatthew Auld * them all at once. 606aed5673SMatthew Auld */ 614d8151aeSThomas Hellström if (!IS_ERR(obj)) { 624d8151aeSThomas Hellström obj->base.resv = i915_vm_resv_get(vm); 634d8151aeSThomas Hellström obj->shares_resv_from = vm; 644d8151aeSThomas Hellström } 654d8151aeSThomas Hellström 6626ad4f8bSMaarten Lankhorst return obj; 672c86e55dSMatthew Auld } 682c86e55dSMatthew Auld 69529b9ec8SMatthew Auld int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 702c86e55dSMatthew Auld { 716aed5673SMatthew Auld enum i915_map_type type; 72529b9ec8SMatthew Auld void *vaddr; 732c86e55dSMatthew Auld 746aed5673SMatthew Auld type = i915_coherent_map_type(vm->i915, obj, true); 756aed5673SMatthew Auld vaddr = i915_gem_object_pin_map_unlocked(obj, type); 76529b9ec8SMatthew Auld if (IS_ERR(vaddr)) 77529b9ec8SMatthew Auld return PTR_ERR(vaddr); 7826ad4f8bSMaarten Lankhorst 7926ad4f8bSMaarten Lankhorst i915_gem_object_make_unshrinkable(obj); 8026ad4f8bSMaarten Lankhorst return 0; 8126ad4f8bSMaarten Lankhorst } 8226ad4f8bSMaarten Lankhorst 83529b9ec8SMatthew Auld int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 8426ad4f8bSMaarten Lankhorst { 856aed5673SMatthew Auld enum i915_map_type type; 86529b9ec8SMatthew Auld void *vaddr; 8726ad4f8bSMaarten Lankhorst 886aed5673SMatthew Auld type = i915_coherent_map_type(vm->i915, obj, true); 896aed5673SMatthew Auld vaddr = i915_gem_object_pin_map(obj, type); 90529b9ec8SMatthew Auld if (IS_ERR(vaddr)) 91529b9ec8SMatthew Auld return PTR_ERR(vaddr); 922c86e55dSMatthew Auld 9389351925SChris Wilson i915_gem_object_make_unshrinkable(obj); 9489351925SChris Wilson return 0; 952c86e55dSMatthew Auld } 962c86e55dSMatthew Auld 972c86e55dSMatthew Auld void __i915_vm_close(struct i915_address_space *vm) 982c86e55dSMatthew Auld { 992c86e55dSMatthew Auld struct i915_vma *vma, *vn; 1002c86e55dSMatthew Auld 101ad2f9bc9SChris Wilson if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) 102ad2f9bc9SChris Wilson return; 103ad2f9bc9SChris Wilson 1042c86e55dSMatthew Auld list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 1052c86e55dSMatthew Auld struct drm_i915_gem_object *obj = vma->obj; 1062c86e55dSMatthew Auld 1072c86e55dSMatthew Auld /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 1082c86e55dSMatthew Auld if (!kref_get_unless_zero(&obj->base.refcount)) 1092c86e55dSMatthew Auld continue; 1102c86e55dSMatthew Auld 1112c86e55dSMatthew Auld atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 1122c86e55dSMatthew Auld WARN_ON(__i915_vma_unbind(vma)); 1132c86e55dSMatthew Auld __i915_vma_put(vma); 1142c86e55dSMatthew Auld 1152c86e55dSMatthew Auld i915_gem_object_put(obj); 1162c86e55dSMatthew Auld } 1172c86e55dSMatthew Auld GEM_BUG_ON(!list_empty(&vm->bound_list)); 118ad2f9bc9SChris Wilson 1192c86e55dSMatthew Auld mutex_unlock(&vm->mutex); 1202c86e55dSMatthew Auld } 1212c86e55dSMatthew Auld 12226ad4f8bSMaarten Lankhorst /* lock the vm into the current ww, if we lock one, we lock all */ 12326ad4f8bSMaarten Lankhorst int i915_vm_lock_objects(struct i915_address_space *vm, 12426ad4f8bSMaarten Lankhorst struct i915_gem_ww_ctx *ww) 12526ad4f8bSMaarten Lankhorst { 1264d8151aeSThomas Hellström if (vm->scratch[0]->base.resv == &vm->_resv) { 12726ad4f8bSMaarten Lankhorst return i915_gem_object_lock(vm->scratch[0], ww); 12826ad4f8bSMaarten Lankhorst } else { 12926ad4f8bSMaarten Lankhorst struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 13026ad4f8bSMaarten Lankhorst 13126ad4f8bSMaarten Lankhorst /* We borrowed the scratch page from ggtt, take the top level object */ 13226ad4f8bSMaarten Lankhorst return i915_gem_object_lock(ppgtt->pd->pt.base, ww); 13326ad4f8bSMaarten Lankhorst } 13426ad4f8bSMaarten Lankhorst } 13526ad4f8bSMaarten Lankhorst 1362c86e55dSMatthew Auld void i915_address_space_fini(struct i915_address_space *vm) 1372c86e55dSMatthew Auld { 1382c86e55dSMatthew Auld drm_mm_takedown(&vm->mm); 1392c86e55dSMatthew Auld mutex_destroy(&vm->mutex); 1402c86e55dSMatthew Auld } 1412c86e55dSMatthew Auld 1424d8151aeSThomas Hellström /** 1434d8151aeSThomas Hellström * i915_vm_resv_release - Final struct i915_address_space destructor 1444d8151aeSThomas Hellström * @kref: Pointer to the &i915_address_space.resv_ref member. 1454d8151aeSThomas Hellström * 1464d8151aeSThomas Hellström * This function is called when the last lock sharer no longer shares the 1474d8151aeSThomas Hellström * &i915_address_space._resv lock. 1484d8151aeSThomas Hellström */ 1494d8151aeSThomas Hellström void i915_vm_resv_release(struct kref *kref) 1504d8151aeSThomas Hellström { 1514d8151aeSThomas Hellström struct i915_address_space *vm = 1524d8151aeSThomas Hellström container_of(kref, typeof(*vm), resv_ref); 1534d8151aeSThomas Hellström 1544d8151aeSThomas Hellström dma_resv_fini(&vm->_resv); 1554d8151aeSThomas Hellström kfree(vm); 1564d8151aeSThomas Hellström } 1574d8151aeSThomas Hellström 1582c86e55dSMatthew Auld static void __i915_vm_release(struct work_struct *work) 1592c86e55dSMatthew Auld { 1602c86e55dSMatthew Auld struct i915_address_space *vm = 161dcc5d820SDaniel Vetter container_of(work, struct i915_address_space, release_work); 1622c86e55dSMatthew Auld 1632c86e55dSMatthew Auld vm->cleanup(vm); 1642c86e55dSMatthew Auld i915_address_space_fini(vm); 1652c86e55dSMatthew Auld 1664d8151aeSThomas Hellström i915_vm_resv_put(vm); 1672c86e55dSMatthew Auld } 1682c86e55dSMatthew Auld 1692c86e55dSMatthew Auld void i915_vm_release(struct kref *kref) 1702c86e55dSMatthew Auld { 1712c86e55dSMatthew Auld struct i915_address_space *vm = 1722c86e55dSMatthew Auld container_of(kref, struct i915_address_space, ref); 1732c86e55dSMatthew Auld 1742c86e55dSMatthew Auld GEM_BUG_ON(i915_is_ggtt(vm)); 1752c86e55dSMatthew Auld trace_i915_ppgtt_release(vm); 1762c86e55dSMatthew Auld 177dcc5d820SDaniel Vetter queue_work(vm->i915->wq, &vm->release_work); 1782c86e55dSMatthew Auld } 1792c86e55dSMatthew Auld 1802c86e55dSMatthew Auld void i915_address_space_init(struct i915_address_space *vm, int subclass) 1812c86e55dSMatthew Auld { 1822c86e55dSMatthew Auld kref_init(&vm->ref); 1834d8151aeSThomas Hellström 1844d8151aeSThomas Hellström /* 1854d8151aeSThomas Hellström * Special case for GGTT that has already done an early 1864d8151aeSThomas Hellström * kref_init here. 1874d8151aeSThomas Hellström */ 1884d8151aeSThomas Hellström if (!kref_read(&vm->resv_ref)) 1894d8151aeSThomas Hellström kref_init(&vm->resv_ref); 1904d8151aeSThomas Hellström 191dcc5d820SDaniel Vetter INIT_WORK(&vm->release_work, __i915_vm_release); 1922c86e55dSMatthew Auld atomic_set(&vm->open, 1); 1932c86e55dSMatthew Auld 1942c86e55dSMatthew Auld /* 1952c86e55dSMatthew Auld * The vm->mutex must be reclaim safe (for use in the shrinker). 1962c86e55dSMatthew Auld * Do a dummy acquire now under fs_reclaim so that any allocation 1972c86e55dSMatthew Auld * attempt holding the lock is immediately reported by lockdep. 1982c86e55dSMatthew Auld */ 1992c86e55dSMatthew Auld mutex_init(&vm->mutex); 2002c86e55dSMatthew Auld lockdep_set_subclass(&vm->mutex, subclass); 201bc6f80ccSMaarten Lankhorst 202bc6f80ccSMaarten Lankhorst if (!intel_vm_no_concurrent_access_wa(vm->i915)) { 2032c86e55dSMatthew Auld i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 204bc6f80ccSMaarten Lankhorst } else { 205bc6f80ccSMaarten Lankhorst /* 206bc6f80ccSMaarten Lankhorst * CHV + BXT VTD workaround use stop_machine(), 207bc6f80ccSMaarten Lankhorst * which is allowed to allocate memory. This means &vm->mutex 208bc6f80ccSMaarten Lankhorst * is the outer lock, and in theory we can allocate memory inside 209bc6f80ccSMaarten Lankhorst * it through stop_machine(). 210bc6f80ccSMaarten Lankhorst * 211bc6f80ccSMaarten Lankhorst * Add the annotation for this, we use trylock in shrinker. 212bc6f80ccSMaarten Lankhorst */ 213bc6f80ccSMaarten Lankhorst mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); 214bc6f80ccSMaarten Lankhorst might_alloc(GFP_KERNEL); 215bc6f80ccSMaarten Lankhorst mutex_release(&vm->mutex.dep_map, _THIS_IP_); 216bc6f80ccSMaarten Lankhorst } 2174d8151aeSThomas Hellström dma_resv_init(&vm->_resv); 2182c86e55dSMatthew Auld 2192c86e55dSMatthew Auld GEM_BUG_ON(!vm->total); 2202c86e55dSMatthew Auld drm_mm_init(&vm->mm, 0, vm->total); 2212c86e55dSMatthew Auld vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 2222c86e55dSMatthew Auld 2232c86e55dSMatthew Auld INIT_LIST_HEAD(&vm->bound_list); 2242c86e55dSMatthew Auld } 2252c86e55dSMatthew Auld 2262c86e55dSMatthew Auld void clear_pages(struct i915_vma *vma) 2272c86e55dSMatthew Auld { 2282c86e55dSMatthew Auld GEM_BUG_ON(!vma->pages); 2292c86e55dSMatthew Auld 2302c86e55dSMatthew Auld if (vma->pages != vma->obj->mm.pages) { 2312c86e55dSMatthew Auld sg_free_table(vma->pages); 2322c86e55dSMatthew Auld kfree(vma->pages); 2332c86e55dSMatthew Auld } 2342c86e55dSMatthew Auld vma->pages = NULL; 2352c86e55dSMatthew Auld 2362c86e55dSMatthew Auld memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 2372c86e55dSMatthew Auld } 2382c86e55dSMatthew Auld 239529b9ec8SMatthew Auld void *__px_vaddr(struct drm_i915_gem_object *p) 240529b9ec8SMatthew Auld { 241529b9ec8SMatthew Auld enum i915_map_type type; 242529b9ec8SMatthew Auld 243529b9ec8SMatthew Auld GEM_BUG_ON(!i915_gem_object_has_pages(p)); 244529b9ec8SMatthew Auld return page_unpack_bits(p->mm.mapping, &type); 245529b9ec8SMatthew Auld } 246529b9ec8SMatthew Auld 24789351925SChris Wilson dma_addr_t __px_dma(struct drm_i915_gem_object *p) 2482c86e55dSMatthew Auld { 24989351925SChris Wilson GEM_BUG_ON(!i915_gem_object_has_pages(p)); 25089351925SChris Wilson return sg_dma_address(p->mm.pages->sgl); 2512c86e55dSMatthew Auld } 2522c86e55dSMatthew Auld 25389351925SChris Wilson struct page *__px_page(struct drm_i915_gem_object *p) 2542c86e55dSMatthew Auld { 25589351925SChris Wilson GEM_BUG_ON(!i915_gem_object_has_pages(p)); 25689351925SChris Wilson return sg_page(p->mm.pages->sgl); 2572c86e55dSMatthew Auld } 2582c86e55dSMatthew Auld 2592c86e55dSMatthew Auld void 26089351925SChris Wilson fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) 2612c86e55dSMatthew Auld { 262529b9ec8SMatthew Auld void *vaddr = __px_vaddr(p); 26382d71e31SChris Wilson 26489351925SChris Wilson memset64(vaddr, val, count); 26589351925SChris Wilson clflush_cache_range(vaddr, PAGE_SIZE); 26682d71e31SChris Wilson } 26782d71e31SChris Wilson 26889351925SChris Wilson static void poison_scratch_page(struct drm_i915_gem_object *scratch) 26989351925SChris Wilson { 270529b9ec8SMatthew Auld void *vaddr = __px_vaddr(scratch); 27189351925SChris Wilson u8 val; 27289351925SChris Wilson 27389351925SChris Wilson val = 0; 27489351925SChris Wilson if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 27589351925SChris Wilson val = POISON_FREE; 27689351925SChris Wilson 277529b9ec8SMatthew Auld memset(vaddr, val, scratch->base.size); 2782ca77606SMatthew Auld drm_clflush_virt_range(vaddr, scratch->base.size); 27989351925SChris Wilson } 28089351925SChris Wilson 28189351925SChris Wilson int setup_scratch_page(struct i915_address_space *vm) 2822c86e55dSMatthew Auld { 2832c86e55dSMatthew Auld unsigned long size; 2842c86e55dSMatthew Auld 2852c86e55dSMatthew Auld /* 2862c86e55dSMatthew Auld * In order to utilize 64K pages for an object with a size < 2M, we will 2872c86e55dSMatthew Auld * need to support a 64K scratch page, given that every 16th entry for a 2882c86e55dSMatthew Auld * page-table operating in 64K mode must point to a properly aligned 64K 2892c86e55dSMatthew Auld * region, including any PTEs which happen to point to scratch. 2902c86e55dSMatthew Auld * 2912c86e55dSMatthew Auld * This is only relevant for the 48b PPGTT where we support 2922c86e55dSMatthew Auld * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 2932c86e55dSMatthew Auld * scratch (read-only) between all vm, we create one 64k scratch page 2942c86e55dSMatthew Auld * for all. 2952c86e55dSMatthew Auld */ 2962c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 2972c86e55dSMatthew Auld if (i915_vm_is_4lvl(vm) && 29889351925SChris Wilson HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) 2992c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_64K; 3002c86e55dSMatthew Auld 3012c86e55dSMatthew Auld do { 30289351925SChris Wilson struct drm_i915_gem_object *obj; 3032c86e55dSMatthew Auld 304*fef53be0SMatthew Auld obj = vm->alloc_scratch_dma(vm, size); 30589351925SChris Wilson if (IS_ERR(obj)) 3062c86e55dSMatthew Auld goto skip; 3072c86e55dSMatthew Auld 308529b9ec8SMatthew Auld if (map_pt_dma(vm, obj)) 30989351925SChris Wilson goto skip_obj; 31089351925SChris Wilson 31189351925SChris Wilson /* We need a single contiguous page for our scratch */ 31289351925SChris Wilson if (obj->mm.page_sizes.sg < size) 31389351925SChris Wilson goto skip_obj; 31489351925SChris Wilson 31589351925SChris Wilson /* And it needs to be correspondingly aligned */ 31689351925SChris Wilson if (__px_dma(obj) & (size - 1)) 31789351925SChris Wilson goto skip_obj; 31889351925SChris Wilson 31982d71e31SChris Wilson /* 32082d71e31SChris Wilson * Use a non-zero scratch page for debugging. 32182d71e31SChris Wilson * 32282d71e31SChris Wilson * We want a value that should be reasonably obvious 32382d71e31SChris Wilson * to spot in the error state, while also causing a GPU hang 32482d71e31SChris Wilson * if executed. We prefer using a clear page in production, so 32582d71e31SChris Wilson * should it ever be accidentally used, the effect should be 32682d71e31SChris Wilson * fairly benign. 32782d71e31SChris Wilson */ 32889351925SChris Wilson poison_scratch_page(obj); 32982d71e31SChris Wilson 33089351925SChris Wilson vm->scratch[0] = obj; 33189351925SChris Wilson vm->scratch_order = get_order(size); 3322c86e55dSMatthew Auld return 0; 3332c86e55dSMatthew Auld 33489351925SChris Wilson skip_obj: 33589351925SChris Wilson i915_gem_object_put(obj); 3362c86e55dSMatthew Auld skip: 3372c86e55dSMatthew Auld if (size == I915_GTT_PAGE_SIZE_4K) 3382c86e55dSMatthew Auld return -ENOMEM; 3392c86e55dSMatthew Auld 3402c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 3412c86e55dSMatthew Auld } while (1); 3422c86e55dSMatthew Auld } 3432c86e55dSMatthew Auld 3442c86e55dSMatthew Auld void free_scratch(struct i915_address_space *vm) 3452c86e55dSMatthew Auld { 3462c86e55dSMatthew Auld int i; 3472c86e55dSMatthew Auld 34889351925SChris Wilson for (i = 0; i <= vm->top; i++) 34989351925SChris Wilson i915_gem_object_put(vm->scratch[i]); 3502c86e55dSMatthew Auld } 3512c86e55dSMatthew Auld 3522c86e55dSMatthew Auld void gtt_write_workarounds(struct intel_gt *gt) 3532c86e55dSMatthew Auld { 3542c86e55dSMatthew Auld struct drm_i915_private *i915 = gt->i915; 3552c86e55dSMatthew Auld struct intel_uncore *uncore = gt->uncore; 3562c86e55dSMatthew Auld 3572c86e55dSMatthew Auld /* 3582c86e55dSMatthew Auld * This function is for gtt related workarounds. This function is 3592c86e55dSMatthew Auld * called on driver load and after a GPU reset, so you can place 3602c86e55dSMatthew Auld * workarounds here even if they get overwritten by GPU reset. 3612c86e55dSMatthew Auld */ 3622c86e55dSMatthew Auld /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 3632c86e55dSMatthew Auld if (IS_BROADWELL(i915)) 3642c86e55dSMatthew Auld intel_uncore_write(uncore, 3652c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3662c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 3672c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915)) 3682c86e55dSMatthew Auld intel_uncore_write(uncore, 3692c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3702c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 3712c86e55dSMatthew Auld else if (IS_GEN9_LP(i915)) 3722c86e55dSMatthew Auld intel_uncore_write(uncore, 3732c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3742c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 375c816723bSLucas De Marchi else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) 3762c86e55dSMatthew Auld intel_uncore_write(uncore, 3772c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3782c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 3792c86e55dSMatthew Auld 3802c86e55dSMatthew Auld /* 3812c86e55dSMatthew Auld * To support 64K PTEs we need to first enable the use of the 3822c86e55dSMatthew Auld * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 3832c86e55dSMatthew Auld * mmio, otherwise the page-walker will simply ignore the IPS bit. This 3842c86e55dSMatthew Auld * shouldn't be needed after GEN10. 3852c86e55dSMatthew Auld * 3862c86e55dSMatthew Auld * 64K pages were first introduced from BDW+, although technically they 3872c86e55dSMatthew Auld * only *work* from gen9+. For pre-BDW we instead have the option for 3882c86e55dSMatthew Auld * 32K pages, but we don't currently have any support for it in our 3892c86e55dSMatthew Auld * driver. 3902c86e55dSMatthew Auld */ 3912c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 392c816723bSLucas De Marchi GRAPHICS_VER(i915) <= 10) 3932c86e55dSMatthew Auld intel_uncore_rmw(uncore, 3942c86e55dSMatthew Auld GEN8_GAMW_ECO_DEV_RW_IA, 3952c86e55dSMatthew Auld 0, 3962c86e55dSMatthew Auld GAMW_ECO_ENABLE_64K_IPS_FIELD); 3972c86e55dSMatthew Auld 398c816723bSLucas De Marchi if (IS_GRAPHICS_VER(i915, 8, 11)) { 3992c86e55dSMatthew Auld bool can_use_gtt_cache = true; 4002c86e55dSMatthew Auld 4012c86e55dSMatthew Auld /* 4022c86e55dSMatthew Auld * According to the BSpec if we use 2M/1G pages then we also 4032c86e55dSMatthew Auld * need to disable the GTT cache. At least on BDW we can see 4042c86e55dSMatthew Auld * visual corruption when using 2M pages, and not disabling the 4052c86e55dSMatthew Auld * GTT cache. 4062c86e55dSMatthew Auld */ 4072c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 4082c86e55dSMatthew Auld can_use_gtt_cache = false; 4092c86e55dSMatthew Auld 4102c86e55dSMatthew Auld /* WaGttCachingOffByDefault */ 4112c86e55dSMatthew Auld intel_uncore_write(uncore, 4122c86e55dSMatthew Auld HSW_GTT_CACHE_EN, 4132c86e55dSMatthew Auld can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 4140d4c351aSPankaj Bharadiya drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && 4152c86e55dSMatthew Auld intel_uncore_read(uncore, 4162c86e55dSMatthew Auld HSW_GTT_CACHE_EN) == 0); 4172c86e55dSMatthew Auld } 4182c86e55dSMatthew Auld } 4192c86e55dSMatthew Auld 4202c86e55dSMatthew Auld static void tgl_setup_private_ppat(struct intel_uncore *uncore) 4212c86e55dSMatthew Auld { 4222c86e55dSMatthew Auld /* TGL doesn't support LLC or AGE settings */ 4232c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 4242c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 4252c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 4262c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 4272c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 4282c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 4292c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 4302c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 4312c86e55dSMatthew Auld } 4322c86e55dSMatthew Auld 4336266992cSLucas De Marchi static void icl_setup_private_ppat(struct intel_uncore *uncore) 4342c86e55dSMatthew Auld { 4352c86e55dSMatthew Auld intel_uncore_write(uncore, 4362c86e55dSMatthew Auld GEN10_PAT_INDEX(0), 4372c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLC); 4382c86e55dSMatthew Auld intel_uncore_write(uncore, 4392c86e55dSMatthew Auld GEN10_PAT_INDEX(1), 4402c86e55dSMatthew Auld GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 4412c86e55dSMatthew Auld intel_uncore_write(uncore, 4422c86e55dSMatthew Auld GEN10_PAT_INDEX(2), 443c0888e9eSVille Syrjälä GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 4442c86e55dSMatthew Auld intel_uncore_write(uncore, 4452c86e55dSMatthew Auld GEN10_PAT_INDEX(3), 4462c86e55dSMatthew Auld GEN8_PPAT_UC); 4472c86e55dSMatthew Auld intel_uncore_write(uncore, 4482c86e55dSMatthew Auld GEN10_PAT_INDEX(4), 4492c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 4502c86e55dSMatthew Auld intel_uncore_write(uncore, 4512c86e55dSMatthew Auld GEN10_PAT_INDEX(5), 4522c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 4532c86e55dSMatthew Auld intel_uncore_write(uncore, 4542c86e55dSMatthew Auld GEN10_PAT_INDEX(6), 4552c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 4562c86e55dSMatthew Auld intel_uncore_write(uncore, 4572c86e55dSMatthew Auld GEN10_PAT_INDEX(7), 4582c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 4592c86e55dSMatthew Auld } 4602c86e55dSMatthew Auld 4612c86e55dSMatthew Auld /* 4622c86e55dSMatthew Auld * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 4632c86e55dSMatthew Auld * bits. When using advanced contexts each context stores its own PAT, but 4642c86e55dSMatthew Auld * writing this data shouldn't be harmful even in those cases. 4652c86e55dSMatthew Auld */ 4662c86e55dSMatthew Auld static void bdw_setup_private_ppat(struct intel_uncore *uncore) 4672c86e55dSMatthew Auld { 468c0888e9eSVille Syrjälä struct drm_i915_private *i915 = uncore->i915; 4692c86e55dSMatthew Auld u64 pat; 4702c86e55dSMatthew Auld 4712c86e55dSMatthew Auld pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 4722c86e55dSMatthew Auld GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 4732c86e55dSMatthew Auld GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 4742c86e55dSMatthew Auld GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 4752c86e55dSMatthew Auld GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 4762c86e55dSMatthew Auld GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 4772c86e55dSMatthew Auld GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 4782c86e55dSMatthew Auld 479c0888e9eSVille Syrjälä /* for scanout with eLLC */ 480c816723bSLucas De Marchi if (GRAPHICS_VER(i915) >= 9) 481c0888e9eSVille Syrjälä pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 482c0888e9eSVille Syrjälä else 483c0888e9eSVille Syrjälä pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 484c0888e9eSVille Syrjälä 4852c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 4862c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 4872c86e55dSMatthew Auld } 4882c86e55dSMatthew Auld 4892c86e55dSMatthew Auld static void chv_setup_private_ppat(struct intel_uncore *uncore) 4902c86e55dSMatthew Auld { 4912c86e55dSMatthew Auld u64 pat; 4922c86e55dSMatthew Auld 4932c86e55dSMatthew Auld /* 4942c86e55dSMatthew Auld * Map WB on BDW to snooped on CHV. 4952c86e55dSMatthew Auld * 4962c86e55dSMatthew Auld * Only the snoop bit has meaning for CHV, the rest is 4972c86e55dSMatthew Auld * ignored. 4982c86e55dSMatthew Auld * 4992c86e55dSMatthew Auld * The hardware will never snoop for certain types of accesses: 5002c86e55dSMatthew Auld * - CPU GTT (GMADR->GGTT->no snoop->memory) 5012c86e55dSMatthew Auld * - PPGTT page tables 5022c86e55dSMatthew Auld * - some other special cycles 5032c86e55dSMatthew Auld * 5042c86e55dSMatthew Auld * As with BDW, we also need to consider the following for GT accesses: 5052c86e55dSMatthew Auld * "For GGTT, there is NO pat_sel[2:0] from the entry, 5062c86e55dSMatthew Auld * so RTL will always use the value corresponding to 5072c86e55dSMatthew Auld * pat_sel = 000". 5082c86e55dSMatthew Auld * Which means we must set the snoop bit in PAT entry 0 5092c86e55dSMatthew Auld * in order to keep the global status page working. 5102c86e55dSMatthew Auld */ 5112c86e55dSMatthew Auld 5122c86e55dSMatthew Auld pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 5132c86e55dSMatthew Auld GEN8_PPAT(1, 0) | 5142c86e55dSMatthew Auld GEN8_PPAT(2, 0) | 5152c86e55dSMatthew Auld GEN8_PPAT(3, 0) | 5162c86e55dSMatthew Auld GEN8_PPAT(4, CHV_PPAT_SNOOP) | 5172c86e55dSMatthew Auld GEN8_PPAT(5, CHV_PPAT_SNOOP) | 5182c86e55dSMatthew Auld GEN8_PPAT(6, CHV_PPAT_SNOOP) | 5192c86e55dSMatthew Auld GEN8_PPAT(7, CHV_PPAT_SNOOP); 5202c86e55dSMatthew Auld 5212c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 5222c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 5232c86e55dSMatthew Auld } 5242c86e55dSMatthew Auld 5252c86e55dSMatthew Auld void setup_private_pat(struct intel_uncore *uncore) 5262c86e55dSMatthew Auld { 5272c86e55dSMatthew Auld struct drm_i915_private *i915 = uncore->i915; 5282c86e55dSMatthew Auld 529c816723bSLucas De Marchi GEM_BUG_ON(GRAPHICS_VER(i915) < 8); 5302c86e55dSMatthew Auld 531c816723bSLucas De Marchi if (GRAPHICS_VER(i915) >= 12) 5322c86e55dSMatthew Auld tgl_setup_private_ppat(uncore); 5336266992cSLucas De Marchi else if (GRAPHICS_VER(i915) >= 11) 5346266992cSLucas De Marchi icl_setup_private_ppat(uncore); 5352c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 5362c86e55dSMatthew Auld chv_setup_private_ppat(uncore); 5372c86e55dSMatthew Auld else 5382c86e55dSMatthew Auld bdw_setup_private_ppat(uncore); 5392c86e55dSMatthew Auld } 5402c86e55dSMatthew Auld 541a4d86249SChris Wilson struct i915_vma * 542a4d86249SChris Wilson __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) 543a4d86249SChris Wilson { 544a4d86249SChris Wilson struct drm_i915_gem_object *obj; 545a4d86249SChris Wilson struct i915_vma *vma; 546a4d86249SChris Wilson 547a4d86249SChris Wilson obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); 548a4d86249SChris Wilson if (IS_ERR(obj)) 549a4d86249SChris Wilson return ERR_CAST(obj); 550a4d86249SChris Wilson 551a4d86249SChris Wilson i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 552a4d86249SChris Wilson 553a4d86249SChris Wilson vma = i915_vma_instance(obj, vm, NULL); 554a4d86249SChris Wilson if (IS_ERR(vma)) { 555a4d86249SChris Wilson i915_gem_object_put(obj); 556a4d86249SChris Wilson return vma; 557a4d86249SChris Wilson } 558a4d86249SChris Wilson 5592a665968SMaarten Lankhorst return vma; 5602a665968SMaarten Lankhorst } 5612a665968SMaarten Lankhorst 5622a665968SMaarten Lankhorst struct i915_vma * 5632a665968SMaarten Lankhorst __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) 5642a665968SMaarten Lankhorst { 5652a665968SMaarten Lankhorst struct i915_vma *vma; 5662a665968SMaarten Lankhorst int err; 5672a665968SMaarten Lankhorst 5682a665968SMaarten Lankhorst vma = __vm_create_scratch_for_read(vm, size); 5692a665968SMaarten Lankhorst if (IS_ERR(vma)) 5702a665968SMaarten Lankhorst return vma; 5712a665968SMaarten Lankhorst 572a4d86249SChris Wilson err = i915_vma_pin(vma, 0, 0, 573a4d86249SChris Wilson i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 574a4d86249SChris Wilson if (err) { 575a4d86249SChris Wilson i915_vma_put(vma); 576a4d86249SChris Wilson return ERR_PTR(err); 577a4d86249SChris Wilson } 578a4d86249SChris Wilson 579a4d86249SChris Wilson return vma; 580a4d86249SChris Wilson } 581a4d86249SChris Wilson 5822c86e55dSMatthew Auld #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5832c86e55dSMatthew Auld #include "selftests/mock_gtt.c" 5842c86e55dSMatthew Auld #endif 585