12c86e55dSMatthew Auld // SPDX-License-Identifier: MIT 22c86e55dSMatthew Auld /* 32c86e55dSMatthew Auld * Copyright © 2020 Intel Corporation 42c86e55dSMatthew Auld */ 52c86e55dSMatthew Auld 62c86e55dSMatthew Auld #include <linux/slab.h> /* fault-inject.h is not standalone! */ 72c86e55dSMatthew Auld 82c86e55dSMatthew Auld #include <linux/fault-inject.h> 98581fd40SJakub Kicinski #include <linux/sched/mm.h> 102c86e55dSMatthew Auld 112ca77606SMatthew Auld #include <drm/drm_cache.h> 122ca77606SMatthew Auld 13b508d01fSJani Nikula #include "gem/i915_gem_internal.h" 146aed5673SMatthew Auld #include "gem/i915_gem_lmem.h" 152c86e55dSMatthew Auld #include "i915_trace.h" 16*a7f46d5bSTvrtko Ursulin #include "i915_utils.h" 172c86e55dSMatthew Auld #include "intel_gt.h" 180d6419e9SMatt Roper #include "intel_gt_regs.h" 192c86e55dSMatthew Auld #include "intel_gtt.h" 202c86e55dSMatthew Auld 21*a7f46d5bSTvrtko Ursulin 22*a7f46d5bSTvrtko Ursulin static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) 23*a7f46d5bSTvrtko Ursulin { 24*a7f46d5bSTvrtko Ursulin return IS_BROXTON(i915) && i915_vtd_active(i915); 25*a7f46d5bSTvrtko Ursulin } 26*a7f46d5bSTvrtko Ursulin 27*a7f46d5bSTvrtko Ursulin bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) 28*a7f46d5bSTvrtko Ursulin { 29*a7f46d5bSTvrtko Ursulin return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); 30*a7f46d5bSTvrtko Ursulin } 31*a7f46d5bSTvrtko Ursulin 326aed5673SMatthew Auld struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) 336aed5673SMatthew Auld { 346aed5673SMatthew Auld struct drm_i915_gem_object *obj; 356aed5673SMatthew Auld 3632334c9bSMatthew Auld /* 3732334c9bSMatthew Auld * To avoid severe over-allocation when dealing with min_page_size 3832334c9bSMatthew Auld * restrictions, we override that behaviour here by allowing an object 3932334c9bSMatthew Auld * size and page layout which can be smaller. In practice this should be 4032334c9bSMatthew Auld * totally fine, since GTT paging structures are not typically inserted 4132334c9bSMatthew Auld * into the GTT. 4232334c9bSMatthew Auld * 4332334c9bSMatthew Auld * Note that we also hit this path for the scratch page, and for this 4432334c9bSMatthew Auld * case it might need to be 64K, but that should work fine here since we 4532334c9bSMatthew Auld * used the passed in size for the page size, which should ensure it 4632334c9bSMatthew Auld * also has the same alignment. 4732334c9bSMatthew Auld */ 48a259cc14SThomas Hellström obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 49a259cc14SThomas Hellström vm->lmem_pt_obj_flags); 506aed5673SMatthew Auld /* 516aed5673SMatthew Auld * Ensure all paging structures for this vm share the same dma-resv 526aed5673SMatthew Auld * object underneath, with the idea that one object_lock() will lock 536aed5673SMatthew Auld * them all at once. 546aed5673SMatthew Auld */ 554d8151aeSThomas Hellström if (!IS_ERR(obj)) { 564d8151aeSThomas Hellström obj->base.resv = i915_vm_resv_get(vm); 574d8151aeSThomas Hellström obj->shares_resv_from = vm; 584d8151aeSThomas Hellström } 594d8151aeSThomas Hellström 606aed5673SMatthew Auld return obj; 616aed5673SMatthew Auld } 626aed5673SMatthew Auld 6389351925SChris Wilson struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) 642c86e55dSMatthew Auld { 6526ad4f8bSMaarten Lankhorst struct drm_i915_gem_object *obj; 6626ad4f8bSMaarten Lankhorst 672c86e55dSMatthew Auld if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 682c86e55dSMatthew Auld i915_gem_shrink_all(vm->i915); 692c86e55dSMatthew Auld 7026ad4f8bSMaarten Lankhorst obj = i915_gem_object_create_internal(vm->i915, sz); 716aed5673SMatthew Auld /* 726aed5673SMatthew Auld * Ensure all paging structures for this vm share the same dma-resv 736aed5673SMatthew Auld * object underneath, with the idea that one object_lock() will lock 746aed5673SMatthew Auld * them all at once. 756aed5673SMatthew Auld */ 764d8151aeSThomas Hellström if (!IS_ERR(obj)) { 774d8151aeSThomas Hellström obj->base.resv = i915_vm_resv_get(vm); 784d8151aeSThomas Hellström obj->shares_resv_from = vm; 794d8151aeSThomas Hellström } 804d8151aeSThomas Hellström 8126ad4f8bSMaarten Lankhorst return obj; 822c86e55dSMatthew Auld } 832c86e55dSMatthew Auld 84529b9ec8SMatthew Auld int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 852c86e55dSMatthew Auld { 866aed5673SMatthew Auld enum i915_map_type type; 87529b9ec8SMatthew Auld void *vaddr; 882c86e55dSMatthew Auld 896aed5673SMatthew Auld type = i915_coherent_map_type(vm->i915, obj, true); 906aed5673SMatthew Auld vaddr = i915_gem_object_pin_map_unlocked(obj, type); 91529b9ec8SMatthew Auld if (IS_ERR(vaddr)) 92529b9ec8SMatthew Auld return PTR_ERR(vaddr); 9326ad4f8bSMaarten Lankhorst 9426ad4f8bSMaarten Lankhorst i915_gem_object_make_unshrinkable(obj); 9526ad4f8bSMaarten Lankhorst return 0; 9626ad4f8bSMaarten Lankhorst } 9726ad4f8bSMaarten Lankhorst 98529b9ec8SMatthew Auld int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 9926ad4f8bSMaarten Lankhorst { 1006aed5673SMatthew Auld enum i915_map_type type; 101529b9ec8SMatthew Auld void *vaddr; 10226ad4f8bSMaarten Lankhorst 1036aed5673SMatthew Auld type = i915_coherent_map_type(vm->i915, obj, true); 1046aed5673SMatthew Auld vaddr = i915_gem_object_pin_map(obj, type); 105529b9ec8SMatthew Auld if (IS_ERR(vaddr)) 106529b9ec8SMatthew Auld return PTR_ERR(vaddr); 1072c86e55dSMatthew Auld 10889351925SChris Wilson i915_gem_object_make_unshrinkable(obj); 10989351925SChris Wilson return 0; 1102c86e55dSMatthew Auld } 1112c86e55dSMatthew Auld 1122c86e55dSMatthew Auld void __i915_vm_close(struct i915_address_space *vm) 1132c86e55dSMatthew Auld { 1142c86e55dSMatthew Auld struct i915_vma *vma, *vn; 1152c86e55dSMatthew Auld 116ad2f9bc9SChris Wilson if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) 117ad2f9bc9SChris Wilson return; 118ad2f9bc9SChris Wilson 1192c86e55dSMatthew Auld list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 1202c86e55dSMatthew Auld struct drm_i915_gem_object *obj = vma->obj; 1212c86e55dSMatthew Auld 1222c86e55dSMatthew Auld /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 1232c86e55dSMatthew Auld if (!kref_get_unless_zero(&obj->base.refcount)) 1242c86e55dSMatthew Auld continue; 1252c86e55dSMatthew Auld 1262c86e55dSMatthew Auld atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 1272c86e55dSMatthew Auld WARN_ON(__i915_vma_unbind(vma)); 1282c86e55dSMatthew Auld __i915_vma_put(vma); 1292c86e55dSMatthew Auld 1302c86e55dSMatthew Auld i915_gem_object_put(obj); 1312c86e55dSMatthew Auld } 1322c86e55dSMatthew Auld GEM_BUG_ON(!list_empty(&vm->bound_list)); 133ad2f9bc9SChris Wilson 1342c86e55dSMatthew Auld mutex_unlock(&vm->mutex); 1352c86e55dSMatthew Auld } 1362c86e55dSMatthew Auld 13726ad4f8bSMaarten Lankhorst /* lock the vm into the current ww, if we lock one, we lock all */ 13826ad4f8bSMaarten Lankhorst int i915_vm_lock_objects(struct i915_address_space *vm, 13926ad4f8bSMaarten Lankhorst struct i915_gem_ww_ctx *ww) 14026ad4f8bSMaarten Lankhorst { 1414d8151aeSThomas Hellström if (vm->scratch[0]->base.resv == &vm->_resv) { 14226ad4f8bSMaarten Lankhorst return i915_gem_object_lock(vm->scratch[0], ww); 14326ad4f8bSMaarten Lankhorst } else { 14426ad4f8bSMaarten Lankhorst struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 14526ad4f8bSMaarten Lankhorst 14626ad4f8bSMaarten Lankhorst /* We borrowed the scratch page from ggtt, take the top level object */ 14726ad4f8bSMaarten Lankhorst return i915_gem_object_lock(ppgtt->pd->pt.base, ww); 14826ad4f8bSMaarten Lankhorst } 14926ad4f8bSMaarten Lankhorst } 15026ad4f8bSMaarten Lankhorst 1512c86e55dSMatthew Auld void i915_address_space_fini(struct i915_address_space *vm) 1522c86e55dSMatthew Auld { 1532c86e55dSMatthew Auld drm_mm_takedown(&vm->mm); 1542c86e55dSMatthew Auld mutex_destroy(&vm->mutex); 1552c86e55dSMatthew Auld } 1562c86e55dSMatthew Auld 1574d8151aeSThomas Hellström /** 1584d8151aeSThomas Hellström * i915_vm_resv_release - Final struct i915_address_space destructor 1594d8151aeSThomas Hellström * @kref: Pointer to the &i915_address_space.resv_ref member. 1604d8151aeSThomas Hellström * 1614d8151aeSThomas Hellström * This function is called when the last lock sharer no longer shares the 1624d8151aeSThomas Hellström * &i915_address_space._resv lock. 1634d8151aeSThomas Hellström */ 1644d8151aeSThomas Hellström void i915_vm_resv_release(struct kref *kref) 1654d8151aeSThomas Hellström { 1664d8151aeSThomas Hellström struct i915_address_space *vm = 1674d8151aeSThomas Hellström container_of(kref, typeof(*vm), resv_ref); 1684d8151aeSThomas Hellström 1694d8151aeSThomas Hellström dma_resv_fini(&vm->_resv); 1704d8151aeSThomas Hellström kfree(vm); 1714d8151aeSThomas Hellström } 1724d8151aeSThomas Hellström 1732c86e55dSMatthew Auld static void __i915_vm_release(struct work_struct *work) 1742c86e55dSMatthew Auld { 1752c86e55dSMatthew Auld struct i915_address_space *vm = 176dcc5d820SDaniel Vetter container_of(work, struct i915_address_space, release_work); 1772c86e55dSMatthew Auld 1782f6b90daSThomas Hellström /* Synchronize async unbinds. */ 1792f6b90daSThomas Hellström i915_vma_resource_bind_dep_sync_all(vm); 1802f6b90daSThomas Hellström 1812c86e55dSMatthew Auld vm->cleanup(vm); 1822c86e55dSMatthew Auld i915_address_space_fini(vm); 1832c86e55dSMatthew Auld 1844d8151aeSThomas Hellström i915_vm_resv_put(vm); 1852c86e55dSMatthew Auld } 1862c86e55dSMatthew Auld 1872c86e55dSMatthew Auld void i915_vm_release(struct kref *kref) 1882c86e55dSMatthew Auld { 1892c86e55dSMatthew Auld struct i915_address_space *vm = 1902c86e55dSMatthew Auld container_of(kref, struct i915_address_space, ref); 1912c86e55dSMatthew Auld 1922c86e55dSMatthew Auld GEM_BUG_ON(i915_is_ggtt(vm)); 1932c86e55dSMatthew Auld trace_i915_ppgtt_release(vm); 1942c86e55dSMatthew Auld 195dcc5d820SDaniel Vetter queue_work(vm->i915->wq, &vm->release_work); 1962c86e55dSMatthew Auld } 1972c86e55dSMatthew Auld 1982c86e55dSMatthew Auld void i915_address_space_init(struct i915_address_space *vm, int subclass) 1992c86e55dSMatthew Auld { 2002c86e55dSMatthew Auld kref_init(&vm->ref); 2014d8151aeSThomas Hellström 2024d8151aeSThomas Hellström /* 2034d8151aeSThomas Hellström * Special case for GGTT that has already done an early 2044d8151aeSThomas Hellström * kref_init here. 2054d8151aeSThomas Hellström */ 2064d8151aeSThomas Hellström if (!kref_read(&vm->resv_ref)) 2074d8151aeSThomas Hellström kref_init(&vm->resv_ref); 2084d8151aeSThomas Hellström 2092f6b90daSThomas Hellström vm->pending_unbind = RB_ROOT_CACHED; 210dcc5d820SDaniel Vetter INIT_WORK(&vm->release_work, __i915_vm_release); 2112c86e55dSMatthew Auld atomic_set(&vm->open, 1); 2122c86e55dSMatthew Auld 2132c86e55dSMatthew Auld /* 2142c86e55dSMatthew Auld * The vm->mutex must be reclaim safe (for use in the shrinker). 2152c86e55dSMatthew Auld * Do a dummy acquire now under fs_reclaim so that any allocation 2162c86e55dSMatthew Auld * attempt holding the lock is immediately reported by lockdep. 2172c86e55dSMatthew Auld */ 2182c86e55dSMatthew Auld mutex_init(&vm->mutex); 2192c86e55dSMatthew Auld lockdep_set_subclass(&vm->mutex, subclass); 220bc6f80ccSMaarten Lankhorst 221bc6f80ccSMaarten Lankhorst if (!intel_vm_no_concurrent_access_wa(vm->i915)) { 2222c86e55dSMatthew Auld i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 223bc6f80ccSMaarten Lankhorst } else { 224bc6f80ccSMaarten Lankhorst /* 225bc6f80ccSMaarten Lankhorst * CHV + BXT VTD workaround use stop_machine(), 226bc6f80ccSMaarten Lankhorst * which is allowed to allocate memory. This means &vm->mutex 227bc6f80ccSMaarten Lankhorst * is the outer lock, and in theory we can allocate memory inside 228bc6f80ccSMaarten Lankhorst * it through stop_machine(). 229bc6f80ccSMaarten Lankhorst * 230bc6f80ccSMaarten Lankhorst * Add the annotation for this, we use trylock in shrinker. 231bc6f80ccSMaarten Lankhorst */ 232bc6f80ccSMaarten Lankhorst mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); 233bc6f80ccSMaarten Lankhorst might_alloc(GFP_KERNEL); 234bc6f80ccSMaarten Lankhorst mutex_release(&vm->mutex.dep_map, _THIS_IP_); 235bc6f80ccSMaarten Lankhorst } 2364d8151aeSThomas Hellström dma_resv_init(&vm->_resv); 2372c86e55dSMatthew Auld 2382c86e55dSMatthew Auld GEM_BUG_ON(!vm->total); 2392c86e55dSMatthew Auld drm_mm_init(&vm->mm, 0, vm->total); 2402c86e55dSMatthew Auld vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 2412c86e55dSMatthew Auld 2422c86e55dSMatthew Auld INIT_LIST_HEAD(&vm->bound_list); 2432c86e55dSMatthew Auld } 2442c86e55dSMatthew Auld 245529b9ec8SMatthew Auld void *__px_vaddr(struct drm_i915_gem_object *p) 246529b9ec8SMatthew Auld { 247529b9ec8SMatthew Auld enum i915_map_type type; 248529b9ec8SMatthew Auld 249529b9ec8SMatthew Auld GEM_BUG_ON(!i915_gem_object_has_pages(p)); 250529b9ec8SMatthew Auld return page_unpack_bits(p->mm.mapping, &type); 251529b9ec8SMatthew Auld } 252529b9ec8SMatthew Auld 25389351925SChris Wilson dma_addr_t __px_dma(struct drm_i915_gem_object *p) 2542c86e55dSMatthew Auld { 25589351925SChris Wilson GEM_BUG_ON(!i915_gem_object_has_pages(p)); 25689351925SChris Wilson return sg_dma_address(p->mm.pages->sgl); 2572c86e55dSMatthew Auld } 2582c86e55dSMatthew Auld 25989351925SChris Wilson struct page *__px_page(struct drm_i915_gem_object *p) 2602c86e55dSMatthew Auld { 26189351925SChris Wilson GEM_BUG_ON(!i915_gem_object_has_pages(p)); 26289351925SChris Wilson return sg_page(p->mm.pages->sgl); 2632c86e55dSMatthew Auld } 2642c86e55dSMatthew Auld 2652c86e55dSMatthew Auld void 26689351925SChris Wilson fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) 2672c86e55dSMatthew Auld { 268529b9ec8SMatthew Auld void *vaddr = __px_vaddr(p); 26982d71e31SChris Wilson 27089351925SChris Wilson memset64(vaddr, val, count); 27189351925SChris Wilson clflush_cache_range(vaddr, PAGE_SIZE); 27282d71e31SChris Wilson } 27382d71e31SChris Wilson 27489351925SChris Wilson static void poison_scratch_page(struct drm_i915_gem_object *scratch) 27589351925SChris Wilson { 276529b9ec8SMatthew Auld void *vaddr = __px_vaddr(scratch); 27789351925SChris Wilson u8 val; 27889351925SChris Wilson 27989351925SChris Wilson val = 0; 28089351925SChris Wilson if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 28189351925SChris Wilson val = POISON_FREE; 28289351925SChris Wilson 283529b9ec8SMatthew Auld memset(vaddr, val, scratch->base.size); 2842ca77606SMatthew Auld drm_clflush_virt_range(vaddr, scratch->base.size); 28589351925SChris Wilson } 28689351925SChris Wilson 28789351925SChris Wilson int setup_scratch_page(struct i915_address_space *vm) 2882c86e55dSMatthew Auld { 2892c86e55dSMatthew Auld unsigned long size; 2902c86e55dSMatthew Auld 2912c86e55dSMatthew Auld /* 2922c86e55dSMatthew Auld * In order to utilize 64K pages for an object with a size < 2M, we will 2932c86e55dSMatthew Auld * need to support a 64K scratch page, given that every 16th entry for a 2942c86e55dSMatthew Auld * page-table operating in 64K mode must point to a properly aligned 64K 2952c86e55dSMatthew Auld * region, including any PTEs which happen to point to scratch. 2962c86e55dSMatthew Auld * 2972c86e55dSMatthew Auld * This is only relevant for the 48b PPGTT where we support 2982c86e55dSMatthew Auld * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 2992c86e55dSMatthew Auld * scratch (read-only) between all vm, we create one 64k scratch page 3002c86e55dSMatthew Auld * for all. 3012c86e55dSMatthew Auld */ 3022c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 3032c86e55dSMatthew Auld if (i915_vm_is_4lvl(vm) && 30489351925SChris Wilson HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) 3052c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_64K; 3062c86e55dSMatthew Auld 3072c86e55dSMatthew Auld do { 30889351925SChris Wilson struct drm_i915_gem_object *obj; 3092c86e55dSMatthew Auld 310fef53be0SMatthew Auld obj = vm->alloc_scratch_dma(vm, size); 31189351925SChris Wilson if (IS_ERR(obj)) 3122c86e55dSMatthew Auld goto skip; 3132c86e55dSMatthew Auld 314529b9ec8SMatthew Auld if (map_pt_dma(vm, obj)) 31589351925SChris Wilson goto skip_obj; 31689351925SChris Wilson 31789351925SChris Wilson /* We need a single contiguous page for our scratch */ 31889351925SChris Wilson if (obj->mm.page_sizes.sg < size) 31989351925SChris Wilson goto skip_obj; 32089351925SChris Wilson 32189351925SChris Wilson /* And it needs to be correspondingly aligned */ 32289351925SChris Wilson if (__px_dma(obj) & (size - 1)) 32389351925SChris Wilson goto skip_obj; 32489351925SChris Wilson 32582d71e31SChris Wilson /* 32682d71e31SChris Wilson * Use a non-zero scratch page for debugging. 32782d71e31SChris Wilson * 32882d71e31SChris Wilson * We want a value that should be reasonably obvious 32982d71e31SChris Wilson * to spot in the error state, while also causing a GPU hang 33082d71e31SChris Wilson * if executed. We prefer using a clear page in production, so 33182d71e31SChris Wilson * should it ever be accidentally used, the effect should be 33282d71e31SChris Wilson * fairly benign. 33382d71e31SChris Wilson */ 33489351925SChris Wilson poison_scratch_page(obj); 33582d71e31SChris Wilson 33689351925SChris Wilson vm->scratch[0] = obj; 33789351925SChris Wilson vm->scratch_order = get_order(size); 3382c86e55dSMatthew Auld return 0; 3392c86e55dSMatthew Auld 34089351925SChris Wilson skip_obj: 34189351925SChris Wilson i915_gem_object_put(obj); 3422c86e55dSMatthew Auld skip: 3432c86e55dSMatthew Auld if (size == I915_GTT_PAGE_SIZE_4K) 3442c86e55dSMatthew Auld return -ENOMEM; 3452c86e55dSMatthew Auld 346f122a46aSMatthew Auld /* 347f122a46aSMatthew Auld * If we need 64K minimum GTT pages for device local-memory, 348f122a46aSMatthew Auld * like on XEHPSDV, then we need to fail the allocation here, 349f122a46aSMatthew Auld * otherwise we can't safely support the insertion of 350f122a46aSMatthew Auld * local-memory pages for this vm, since the HW expects the 351f122a46aSMatthew Auld * correct physical alignment and size when the page-table is 352f122a46aSMatthew Auld * operating in 64K GTT mode, which includes any scratch PTEs, 353f122a46aSMatthew Auld * since userspace can still touch them. 354f122a46aSMatthew Auld */ 355f122a46aSMatthew Auld if (HAS_64K_PAGES(vm->i915)) 356f122a46aSMatthew Auld return -ENOMEM; 357f122a46aSMatthew Auld 3582c86e55dSMatthew Auld size = I915_GTT_PAGE_SIZE_4K; 3592c86e55dSMatthew Auld } while (1); 3602c86e55dSMatthew Auld } 3612c86e55dSMatthew Auld 3622c86e55dSMatthew Auld void free_scratch(struct i915_address_space *vm) 3632c86e55dSMatthew Auld { 3642c86e55dSMatthew Auld int i; 3652c86e55dSMatthew Auld 36689351925SChris Wilson for (i = 0; i <= vm->top; i++) 36789351925SChris Wilson i915_gem_object_put(vm->scratch[i]); 3682c86e55dSMatthew Auld } 3692c86e55dSMatthew Auld 3702c86e55dSMatthew Auld void gtt_write_workarounds(struct intel_gt *gt) 3712c86e55dSMatthew Auld { 3722c86e55dSMatthew Auld struct drm_i915_private *i915 = gt->i915; 3732c86e55dSMatthew Auld struct intel_uncore *uncore = gt->uncore; 3742c86e55dSMatthew Auld 3752c86e55dSMatthew Auld /* 3762c86e55dSMatthew Auld * This function is for gtt related workarounds. This function is 3772c86e55dSMatthew Auld * called on driver load and after a GPU reset, so you can place 3782c86e55dSMatthew Auld * workarounds here even if they get overwritten by GPU reset. 3792c86e55dSMatthew Auld */ 3802c86e55dSMatthew Auld /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 3812c86e55dSMatthew Auld if (IS_BROADWELL(i915)) 3822c86e55dSMatthew Auld intel_uncore_write(uncore, 3832c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3842c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 3852c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915)) 3862c86e55dSMatthew Auld intel_uncore_write(uncore, 3872c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3882c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 3892c86e55dSMatthew Auld else if (IS_GEN9_LP(i915)) 3902c86e55dSMatthew Auld intel_uncore_write(uncore, 3912c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3922c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 393c816723bSLucas De Marchi else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) 3942c86e55dSMatthew Auld intel_uncore_write(uncore, 3952c86e55dSMatthew Auld GEN8_L3_LRA_1_GPGPU, 3962c86e55dSMatthew Auld GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 3972c86e55dSMatthew Auld 3982c86e55dSMatthew Auld /* 3992c86e55dSMatthew Auld * To support 64K PTEs we need to first enable the use of the 4002c86e55dSMatthew Auld * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 4012c86e55dSMatthew Auld * mmio, otherwise the page-walker will simply ignore the IPS bit. This 4022c86e55dSMatthew Auld * shouldn't be needed after GEN10. 4032c86e55dSMatthew Auld * 4042c86e55dSMatthew Auld * 64K pages were first introduced from BDW+, although technically they 4052c86e55dSMatthew Auld * only *work* from gen9+. For pre-BDW we instead have the option for 4062c86e55dSMatthew Auld * 32K pages, but we don't currently have any support for it in our 4072c86e55dSMatthew Auld * driver. 4082c86e55dSMatthew Auld */ 4092c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 410c816723bSLucas De Marchi GRAPHICS_VER(i915) <= 10) 4112c86e55dSMatthew Auld intel_uncore_rmw(uncore, 4122c86e55dSMatthew Auld GEN8_GAMW_ECO_DEV_RW_IA, 4132c86e55dSMatthew Auld 0, 4142c86e55dSMatthew Auld GAMW_ECO_ENABLE_64K_IPS_FIELD); 4152c86e55dSMatthew Auld 416c816723bSLucas De Marchi if (IS_GRAPHICS_VER(i915, 8, 11)) { 4172c86e55dSMatthew Auld bool can_use_gtt_cache = true; 4182c86e55dSMatthew Auld 4192c86e55dSMatthew Auld /* 4202c86e55dSMatthew Auld * According to the BSpec if we use 2M/1G pages then we also 4212c86e55dSMatthew Auld * need to disable the GTT cache. At least on BDW we can see 4222c86e55dSMatthew Auld * visual corruption when using 2M pages, and not disabling the 4232c86e55dSMatthew Auld * GTT cache. 4242c86e55dSMatthew Auld */ 4252c86e55dSMatthew Auld if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 4262c86e55dSMatthew Auld can_use_gtt_cache = false; 4272c86e55dSMatthew Auld 4282c86e55dSMatthew Auld /* WaGttCachingOffByDefault */ 4292c86e55dSMatthew Auld intel_uncore_write(uncore, 4302c86e55dSMatthew Auld HSW_GTT_CACHE_EN, 4312c86e55dSMatthew Auld can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 4320d4c351aSPankaj Bharadiya drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && 4332c86e55dSMatthew Auld intel_uncore_read(uncore, 4342c86e55dSMatthew Auld HSW_GTT_CACHE_EN) == 0); 4352c86e55dSMatthew Auld } 4362c86e55dSMatthew Auld } 4372c86e55dSMatthew Auld 4382c86e55dSMatthew Auld static void tgl_setup_private_ppat(struct intel_uncore *uncore) 4392c86e55dSMatthew Auld { 4402c86e55dSMatthew Auld /* TGL doesn't support LLC or AGE settings */ 4412c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 4422c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 4432c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 4442c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 4452c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 4462c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 4472c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 4482c86e55dSMatthew Auld intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 4492c86e55dSMatthew Auld } 4502c86e55dSMatthew Auld 4516266992cSLucas De Marchi static void icl_setup_private_ppat(struct intel_uncore *uncore) 4522c86e55dSMatthew Auld { 4532c86e55dSMatthew Auld intel_uncore_write(uncore, 4542c86e55dSMatthew Auld GEN10_PAT_INDEX(0), 4552c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLC); 4562c86e55dSMatthew Auld intel_uncore_write(uncore, 4572c86e55dSMatthew Auld GEN10_PAT_INDEX(1), 4582c86e55dSMatthew Auld GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 4592c86e55dSMatthew Auld intel_uncore_write(uncore, 4602c86e55dSMatthew Auld GEN10_PAT_INDEX(2), 461c0888e9eSVille Syrjälä GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 4622c86e55dSMatthew Auld intel_uncore_write(uncore, 4632c86e55dSMatthew Auld GEN10_PAT_INDEX(3), 4642c86e55dSMatthew Auld GEN8_PPAT_UC); 4652c86e55dSMatthew Auld intel_uncore_write(uncore, 4662c86e55dSMatthew Auld GEN10_PAT_INDEX(4), 4672c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 4682c86e55dSMatthew Auld intel_uncore_write(uncore, 4692c86e55dSMatthew Auld GEN10_PAT_INDEX(5), 4702c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 4712c86e55dSMatthew Auld intel_uncore_write(uncore, 4722c86e55dSMatthew Auld GEN10_PAT_INDEX(6), 4732c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 4742c86e55dSMatthew Auld intel_uncore_write(uncore, 4752c86e55dSMatthew Auld GEN10_PAT_INDEX(7), 4762c86e55dSMatthew Auld GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 4772c86e55dSMatthew Auld } 4782c86e55dSMatthew Auld 4792c86e55dSMatthew Auld /* 4802c86e55dSMatthew Auld * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 4812c86e55dSMatthew Auld * bits. When using advanced contexts each context stores its own PAT, but 4822c86e55dSMatthew Auld * writing this data shouldn't be harmful even in those cases. 4832c86e55dSMatthew Auld */ 4842c86e55dSMatthew Auld static void bdw_setup_private_ppat(struct intel_uncore *uncore) 4852c86e55dSMatthew Auld { 486c0888e9eSVille Syrjälä struct drm_i915_private *i915 = uncore->i915; 4872c86e55dSMatthew Auld u64 pat; 4882c86e55dSMatthew Auld 4892c86e55dSMatthew Auld pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 4902c86e55dSMatthew Auld GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 4912c86e55dSMatthew Auld GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 4922c86e55dSMatthew Auld GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 4932c86e55dSMatthew Auld GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 4942c86e55dSMatthew Auld GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 4952c86e55dSMatthew Auld GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 4962c86e55dSMatthew Auld 497c0888e9eSVille Syrjälä /* for scanout with eLLC */ 498c816723bSLucas De Marchi if (GRAPHICS_VER(i915) >= 9) 499c0888e9eSVille Syrjälä pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 500c0888e9eSVille Syrjälä else 501c0888e9eSVille Syrjälä pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 502c0888e9eSVille Syrjälä 5032c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 5042c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 5052c86e55dSMatthew Auld } 5062c86e55dSMatthew Auld 5072c86e55dSMatthew Auld static void chv_setup_private_ppat(struct intel_uncore *uncore) 5082c86e55dSMatthew Auld { 5092c86e55dSMatthew Auld u64 pat; 5102c86e55dSMatthew Auld 5112c86e55dSMatthew Auld /* 5122c86e55dSMatthew Auld * Map WB on BDW to snooped on CHV. 5132c86e55dSMatthew Auld * 5142c86e55dSMatthew Auld * Only the snoop bit has meaning for CHV, the rest is 5152c86e55dSMatthew Auld * ignored. 5162c86e55dSMatthew Auld * 5172c86e55dSMatthew Auld * The hardware will never snoop for certain types of accesses: 5182c86e55dSMatthew Auld * - CPU GTT (GMADR->GGTT->no snoop->memory) 5192c86e55dSMatthew Auld * - PPGTT page tables 5202c86e55dSMatthew Auld * - some other special cycles 5212c86e55dSMatthew Auld * 5222c86e55dSMatthew Auld * As with BDW, we also need to consider the following for GT accesses: 5232c86e55dSMatthew Auld * "For GGTT, there is NO pat_sel[2:0] from the entry, 5242c86e55dSMatthew Auld * so RTL will always use the value corresponding to 5252c86e55dSMatthew Auld * pat_sel = 000". 5262c86e55dSMatthew Auld * Which means we must set the snoop bit in PAT entry 0 5272c86e55dSMatthew Auld * in order to keep the global status page working. 5282c86e55dSMatthew Auld */ 5292c86e55dSMatthew Auld 5302c86e55dSMatthew Auld pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 5312c86e55dSMatthew Auld GEN8_PPAT(1, 0) | 5322c86e55dSMatthew Auld GEN8_PPAT(2, 0) | 5332c86e55dSMatthew Auld GEN8_PPAT(3, 0) | 5342c86e55dSMatthew Auld GEN8_PPAT(4, CHV_PPAT_SNOOP) | 5352c86e55dSMatthew Auld GEN8_PPAT(5, CHV_PPAT_SNOOP) | 5362c86e55dSMatthew Auld GEN8_PPAT(6, CHV_PPAT_SNOOP) | 5372c86e55dSMatthew Auld GEN8_PPAT(7, CHV_PPAT_SNOOP); 5382c86e55dSMatthew Auld 5392c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 5402c86e55dSMatthew Auld intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 5412c86e55dSMatthew Auld } 5422c86e55dSMatthew Auld 5432c86e55dSMatthew Auld void setup_private_pat(struct intel_uncore *uncore) 5442c86e55dSMatthew Auld { 5452c86e55dSMatthew Auld struct drm_i915_private *i915 = uncore->i915; 5462c86e55dSMatthew Auld 547c816723bSLucas De Marchi GEM_BUG_ON(GRAPHICS_VER(i915) < 8); 5482c86e55dSMatthew Auld 549c816723bSLucas De Marchi if (GRAPHICS_VER(i915) >= 12) 5502c86e55dSMatthew Auld tgl_setup_private_ppat(uncore); 5516266992cSLucas De Marchi else if (GRAPHICS_VER(i915) >= 11) 5526266992cSLucas De Marchi icl_setup_private_ppat(uncore); 5532c86e55dSMatthew Auld else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 5542c86e55dSMatthew Auld chv_setup_private_ppat(uncore); 5552c86e55dSMatthew Auld else 5562c86e55dSMatthew Auld bdw_setup_private_ppat(uncore); 5572c86e55dSMatthew Auld } 5582c86e55dSMatthew Auld 559a4d86249SChris Wilson struct i915_vma * 560a4d86249SChris Wilson __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) 561a4d86249SChris Wilson { 562a4d86249SChris Wilson struct drm_i915_gem_object *obj; 563a4d86249SChris Wilson struct i915_vma *vma; 564a4d86249SChris Wilson 565a4d86249SChris Wilson obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); 566a4d86249SChris Wilson if (IS_ERR(obj)) 567a4d86249SChris Wilson return ERR_CAST(obj); 568a4d86249SChris Wilson 569a4d86249SChris Wilson i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 570a4d86249SChris Wilson 571a4d86249SChris Wilson vma = i915_vma_instance(obj, vm, NULL); 572a4d86249SChris Wilson if (IS_ERR(vma)) { 573a4d86249SChris Wilson i915_gem_object_put(obj); 574a4d86249SChris Wilson return vma; 575a4d86249SChris Wilson } 576a4d86249SChris Wilson 5772a665968SMaarten Lankhorst return vma; 5782a665968SMaarten Lankhorst } 5792a665968SMaarten Lankhorst 5802a665968SMaarten Lankhorst struct i915_vma * 5812a665968SMaarten Lankhorst __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) 5822a665968SMaarten Lankhorst { 5832a665968SMaarten Lankhorst struct i915_vma *vma; 5842a665968SMaarten Lankhorst int err; 5852a665968SMaarten Lankhorst 5862a665968SMaarten Lankhorst vma = __vm_create_scratch_for_read(vm, size); 5872a665968SMaarten Lankhorst if (IS_ERR(vma)) 5882a665968SMaarten Lankhorst return vma; 5892a665968SMaarten Lankhorst 590a4d86249SChris Wilson err = i915_vma_pin(vma, 0, 0, 591a4d86249SChris Wilson i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 592a4d86249SChris Wilson if (err) { 593a4d86249SChris Wilson i915_vma_put(vma); 594a4d86249SChris Wilson return ERR_PTR(err); 595a4d86249SChris Wilson } 596a4d86249SChris Wilson 597a4d86249SChris Wilson return vma; 598a4d86249SChris Wilson } 599a4d86249SChris Wilson 6002c86e55dSMatthew Auld #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6012c86e55dSMatthew Auld #include "selftests/mock_gtt.c" 6022c86e55dSMatthew Auld #endif 603