1fe5db27dSBen Gardon // SPDX-License-Identifier: GPL-2.0 2fe5db27dSBen Gardon 302c00b3aSBen Gardon #include "mmu.h" 402c00b3aSBen Gardon #include "mmu_internal.h" 5bb18842eSBen Gardon #include "mmutrace.h" 62f2fad08SBen Gardon #include "tdp_iter.h" 7fe5db27dSBen Gardon #include "tdp_mmu.h" 802c00b3aSBen Gardon #include "spte.h" 9fe5db27dSBen Gardon 1033dd3574SBen Gardon #include <trace/events/kvm.h> 1133dd3574SBen Gardon 1295fb5b02SBen Gardon #ifdef CONFIG_X86_64 13fe5db27dSBen Gardon static bool __read_mostly tdp_mmu_enabled = false; 1495fb5b02SBen Gardon module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); 1595fb5b02SBen Gardon #endif 16fe5db27dSBen Gardon 17fe5db27dSBen Gardon static bool is_tdp_mmu_enabled(void) 18fe5db27dSBen Gardon { 19fe5db27dSBen Gardon #ifdef CONFIG_X86_64 20fe5db27dSBen Gardon return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 21fe5db27dSBen Gardon #else 22fe5db27dSBen Gardon return false; 23fe5db27dSBen Gardon #endif /* CONFIG_X86_64 */ 24fe5db27dSBen Gardon } 25fe5db27dSBen Gardon 26fe5db27dSBen Gardon /* Initializes the TDP MMU for the VM, if enabled. */ 27fe5db27dSBen Gardon void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 28fe5db27dSBen Gardon { 29fe5db27dSBen Gardon if (!is_tdp_mmu_enabled()) 30fe5db27dSBen Gardon return; 31fe5db27dSBen Gardon 32fe5db27dSBen Gardon /* This should not be changed for the lifetime of the VM. */ 33fe5db27dSBen Gardon kvm->arch.tdp_mmu_enabled = true; 3402c00b3aSBen Gardon 3502c00b3aSBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 3689c0fd49SBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 37fe5db27dSBen Gardon } 38fe5db27dSBen Gardon 39fe5db27dSBen Gardon void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 40fe5db27dSBen Gardon { 41fe5db27dSBen Gardon if (!kvm->arch.tdp_mmu_enabled) 42fe5db27dSBen Gardon return; 4302c00b3aSBen Gardon 4402c00b3aSBen Gardon WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 4502c00b3aSBen Gardon } 4602c00b3aSBen Gardon 47a889ea54SBen Gardon static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) 48a889ea54SBen Gardon { 49a889ea54SBen Gardon if (kvm_mmu_put_root(kvm, root)) 50a889ea54SBen Gardon kvm_tdp_mmu_free_root(kvm, root); 51a889ea54SBen Gardon } 52a889ea54SBen Gardon 53a889ea54SBen Gardon static inline bool tdp_mmu_next_root_valid(struct kvm *kvm, 54a889ea54SBen Gardon struct kvm_mmu_page *root) 55a889ea54SBen Gardon { 56a889ea54SBen Gardon lockdep_assert_held(&kvm->mmu_lock); 57a889ea54SBen Gardon 58a889ea54SBen Gardon if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) 59a889ea54SBen Gardon return false; 60a889ea54SBen Gardon 61a889ea54SBen Gardon kvm_mmu_get_root(kvm, root); 62a889ea54SBen Gardon return true; 63a889ea54SBen Gardon 64a889ea54SBen Gardon } 65a889ea54SBen Gardon 66a889ea54SBen Gardon static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, 67a889ea54SBen Gardon struct kvm_mmu_page *root) 68a889ea54SBen Gardon { 69a889ea54SBen Gardon struct kvm_mmu_page *next_root; 70a889ea54SBen Gardon 71a889ea54SBen Gardon next_root = list_next_entry(root, link); 72a889ea54SBen Gardon tdp_mmu_put_root(kvm, root); 73a889ea54SBen Gardon return next_root; 74a889ea54SBen Gardon } 75a889ea54SBen Gardon 76a889ea54SBen Gardon /* 77a889ea54SBen Gardon * Note: this iterator gets and puts references to the roots it iterates over. 78a889ea54SBen Gardon * This makes it safe to release the MMU lock and yield within the loop, but 79a889ea54SBen Gardon * if exiting the loop early, the caller must drop the reference to the most 80a889ea54SBen Gardon * recent root. (Unless keeping a live reference is desirable.) 81a889ea54SBen Gardon */ 82a889ea54SBen Gardon #define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ 83a889ea54SBen Gardon for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ 84a889ea54SBen Gardon typeof(*_root), link); \ 85a889ea54SBen Gardon tdp_mmu_next_root_valid(_kvm, _root); \ 86a889ea54SBen Gardon _root = tdp_mmu_next_root(_kvm, _root)) 87a889ea54SBen Gardon 8802c00b3aSBen Gardon #define for_each_tdp_mmu_root(_kvm, _root) \ 8902c00b3aSBen Gardon list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 9002c00b3aSBen Gardon 9102c00b3aSBen Gardon bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 9202c00b3aSBen Gardon { 9302c00b3aSBen Gardon struct kvm_mmu_page *sp; 9402c00b3aSBen Gardon 95c887c9b9SPaolo Bonzini if (!kvm->arch.tdp_mmu_enabled) 96c887c9b9SPaolo Bonzini return false; 97c887c9b9SPaolo Bonzini if (WARN_ON(!VALID_PAGE(hpa))) 98c887c9b9SPaolo Bonzini return false; 99c887c9b9SPaolo Bonzini 10002c00b3aSBen Gardon sp = to_shadow_page(hpa); 101c887c9b9SPaolo Bonzini if (WARN_ON(!sp)) 102c887c9b9SPaolo Bonzini return false; 10302c00b3aSBen Gardon 10402c00b3aSBen Gardon return sp->tdp_mmu_page && sp->root_count; 10502c00b3aSBen Gardon } 10602c00b3aSBen Gardon 107faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 108063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield); 109faaf05b0SBen Gardon 11002c00b3aSBen Gardon void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 11102c00b3aSBen Gardon { 112339f5a7fSRick Edgecombe gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); 113faaf05b0SBen Gardon 11402c00b3aSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 11502c00b3aSBen Gardon 11602c00b3aSBen Gardon WARN_ON(root->root_count); 11702c00b3aSBen Gardon WARN_ON(!root->tdp_mmu_page); 11802c00b3aSBen Gardon 11902c00b3aSBen Gardon list_del(&root->link); 12002c00b3aSBen Gardon 121063afacdSBen Gardon zap_gfn_range(kvm, root, 0, max_gfn, false); 122faaf05b0SBen Gardon 12302c00b3aSBen Gardon free_page((unsigned long)root->spt); 12402c00b3aSBen Gardon kmem_cache_free(mmu_page_header_cache, root); 12502c00b3aSBen Gardon } 12602c00b3aSBen Gardon 12702c00b3aSBen Gardon static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 12802c00b3aSBen Gardon int level) 12902c00b3aSBen Gardon { 13002c00b3aSBen Gardon union kvm_mmu_page_role role; 13102c00b3aSBen Gardon 13202c00b3aSBen Gardon role = vcpu->arch.mmu->mmu_role.base; 13302c00b3aSBen Gardon role.level = level; 13402c00b3aSBen Gardon role.direct = true; 13502c00b3aSBen Gardon role.gpte_is_8_bytes = true; 13602c00b3aSBen Gardon role.access = ACC_ALL; 13702c00b3aSBen Gardon 13802c00b3aSBen Gardon return role; 13902c00b3aSBen Gardon } 14002c00b3aSBen Gardon 14102c00b3aSBen Gardon static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 14202c00b3aSBen Gardon int level) 14302c00b3aSBen Gardon { 14402c00b3aSBen Gardon struct kvm_mmu_page *sp; 14502c00b3aSBen Gardon 14602c00b3aSBen Gardon sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 14702c00b3aSBen Gardon sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 14802c00b3aSBen Gardon set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 14902c00b3aSBen Gardon 15002c00b3aSBen Gardon sp->role.word = page_role_for_level(vcpu, level).word; 15102c00b3aSBen Gardon sp->gfn = gfn; 15202c00b3aSBen Gardon sp->tdp_mmu_page = true; 15302c00b3aSBen Gardon 15433dd3574SBen Gardon trace_kvm_mmu_get_page(sp, true); 15533dd3574SBen Gardon 15602c00b3aSBen Gardon return sp; 15702c00b3aSBen Gardon } 15802c00b3aSBen Gardon 15902c00b3aSBen Gardon static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 16002c00b3aSBen Gardon { 16102c00b3aSBen Gardon union kvm_mmu_page_role role; 16202c00b3aSBen Gardon struct kvm *kvm = vcpu->kvm; 16302c00b3aSBen Gardon struct kvm_mmu_page *root; 16402c00b3aSBen Gardon 16502c00b3aSBen Gardon role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 16602c00b3aSBen Gardon 16702c00b3aSBen Gardon spin_lock(&kvm->mmu_lock); 16802c00b3aSBen Gardon 16902c00b3aSBen Gardon /* Check for an existing root before allocating a new one. */ 17002c00b3aSBen Gardon for_each_tdp_mmu_root(kvm, root) { 17102c00b3aSBen Gardon if (root->role.word == role.word) { 17202c00b3aSBen Gardon kvm_mmu_get_root(kvm, root); 17302c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 17402c00b3aSBen Gardon return root; 17502c00b3aSBen Gardon } 17602c00b3aSBen Gardon } 17702c00b3aSBen Gardon 17802c00b3aSBen Gardon root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 17902c00b3aSBen Gardon root->root_count = 1; 18002c00b3aSBen Gardon 18102c00b3aSBen Gardon list_add(&root->link, &kvm->arch.tdp_mmu_roots); 18202c00b3aSBen Gardon 18302c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 18402c00b3aSBen Gardon 18502c00b3aSBen Gardon return root; 18602c00b3aSBen Gardon } 18702c00b3aSBen Gardon 18802c00b3aSBen Gardon hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 18902c00b3aSBen Gardon { 19002c00b3aSBen Gardon struct kvm_mmu_page *root; 19102c00b3aSBen Gardon 19202c00b3aSBen Gardon root = get_tdp_mmu_vcpu_root(vcpu); 19302c00b3aSBen Gardon if (!root) 19402c00b3aSBen Gardon return INVALID_PAGE; 19502c00b3aSBen Gardon 19602c00b3aSBen Gardon return __pa(root->spt); 197fe5db27dSBen Gardon } 1982f2fad08SBen Gardon 1992f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2002f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level); 2012f2fad08SBen Gardon 202faaf05b0SBen Gardon static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 203faaf05b0SBen Gardon { 204faaf05b0SBen Gardon return sp->role.smm ? 1 : 0; 205faaf05b0SBen Gardon } 206faaf05b0SBen Gardon 207f8e14497SBen Gardon static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 208f8e14497SBen Gardon { 209f8e14497SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 210f8e14497SBen Gardon 211f8e14497SBen Gardon if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level)) 212f8e14497SBen Gardon return; 213f8e14497SBen Gardon 214f8e14497SBen Gardon if (is_accessed_spte(old_spte) && 215f8e14497SBen Gardon (!is_accessed_spte(new_spte) || pfn_changed)) 216f8e14497SBen Gardon kvm_set_pfn_accessed(spte_to_pfn(old_spte)); 217f8e14497SBen Gardon } 218f8e14497SBen Gardon 219a6a0b05dSBen Gardon static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, 220a6a0b05dSBen Gardon u64 old_spte, u64 new_spte, int level) 221a6a0b05dSBen Gardon { 222a6a0b05dSBen Gardon bool pfn_changed; 223a6a0b05dSBen Gardon struct kvm_memory_slot *slot; 224a6a0b05dSBen Gardon 225a6a0b05dSBen Gardon if (level > PG_LEVEL_4K) 226a6a0b05dSBen Gardon return; 227a6a0b05dSBen Gardon 228a6a0b05dSBen Gardon pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 229a6a0b05dSBen Gardon 230a6a0b05dSBen Gardon if ((!is_writable_pte(old_spte) || pfn_changed) && 231a6a0b05dSBen Gardon is_writable_pte(new_spte)) { 232a6a0b05dSBen Gardon slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); 233fb04a1edSPeter Xu mark_page_dirty_in_slot(kvm, slot, gfn); 234a6a0b05dSBen Gardon } 235a6a0b05dSBen Gardon } 236a6a0b05dSBen Gardon 2372f2fad08SBen Gardon /** 2382f2fad08SBen Gardon * handle_changed_spte - handle bookkeeping associated with an SPTE change 2392f2fad08SBen Gardon * @kvm: kvm instance 2402f2fad08SBen Gardon * @as_id: the address space of the paging structure the SPTE was a part of 2412f2fad08SBen Gardon * @gfn: the base GFN that was mapped by the SPTE 2422f2fad08SBen Gardon * @old_spte: The value of the SPTE before the change 2432f2fad08SBen Gardon * @new_spte: The value of the SPTE after the change 2442f2fad08SBen Gardon * @level: the level of the PT the SPTE is part of in the paging structure 2452f2fad08SBen Gardon * 2462f2fad08SBen Gardon * Handle bookkeeping that might result from the modification of a SPTE. 2472f2fad08SBen Gardon * This function must be called for all TDP SPTE modifications. 2482f2fad08SBen Gardon */ 2492f2fad08SBen Gardon static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2502f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 2512f2fad08SBen Gardon { 2522f2fad08SBen Gardon bool was_present = is_shadow_present_pte(old_spte); 2532f2fad08SBen Gardon bool is_present = is_shadow_present_pte(new_spte); 2542f2fad08SBen Gardon bool was_leaf = was_present && is_last_spte(old_spte, level); 2552f2fad08SBen Gardon bool is_leaf = is_present && is_last_spte(new_spte, level); 2562f2fad08SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2572f2fad08SBen Gardon u64 *pt; 25889c0fd49SBen Gardon struct kvm_mmu_page *sp; 2592f2fad08SBen Gardon u64 old_child_spte; 2602f2fad08SBen Gardon int i; 2612f2fad08SBen Gardon 2622f2fad08SBen Gardon WARN_ON(level > PT64_ROOT_MAX_LEVEL); 2632f2fad08SBen Gardon WARN_ON(level < PG_LEVEL_4K); 264764388ceSSean Christopherson WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); 2652f2fad08SBen Gardon 2662f2fad08SBen Gardon /* 2672f2fad08SBen Gardon * If this warning were to trigger it would indicate that there was a 2682f2fad08SBen Gardon * missing MMU notifier or a race with some notifier handler. 2692f2fad08SBen Gardon * A present, leaf SPTE should never be directly replaced with another 2702f2fad08SBen Gardon * present leaf SPTE pointing to a differnt PFN. A notifier handler 2712f2fad08SBen Gardon * should be zapping the SPTE before the main MM's page table is 2722f2fad08SBen Gardon * changed, or the SPTE should be zeroed, and the TLBs flushed by the 2732f2fad08SBen Gardon * thread before replacement. 2742f2fad08SBen Gardon */ 2752f2fad08SBen Gardon if (was_leaf && is_leaf && pfn_changed) { 2762f2fad08SBen Gardon pr_err("Invalid SPTE change: cannot replace a present leaf\n" 2772f2fad08SBen Gardon "SPTE with another present leaf SPTE mapping a\n" 2782f2fad08SBen Gardon "different PFN!\n" 2792f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2802f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2812f2fad08SBen Gardon 2822f2fad08SBen Gardon /* 2832f2fad08SBen Gardon * Crash the host to prevent error propagation and guest data 2842f2fad08SBen Gardon * courruption. 2852f2fad08SBen Gardon */ 2862f2fad08SBen Gardon BUG(); 2872f2fad08SBen Gardon } 2882f2fad08SBen Gardon 2892f2fad08SBen Gardon if (old_spte == new_spte) 2902f2fad08SBen Gardon return; 2912f2fad08SBen Gardon 292b9a98c34SBen Gardon trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); 293b9a98c34SBen Gardon 2942f2fad08SBen Gardon /* 2952f2fad08SBen Gardon * The only times a SPTE should be changed from a non-present to 2962f2fad08SBen Gardon * non-present state is when an MMIO entry is installed/modified/ 2972f2fad08SBen Gardon * removed. In that case, there is nothing to do here. 2982f2fad08SBen Gardon */ 2992f2fad08SBen Gardon if (!was_present && !is_present) { 3002f2fad08SBen Gardon /* 3012f2fad08SBen Gardon * If this change does not involve a MMIO SPTE, it is 3022f2fad08SBen Gardon * unexpected. Log the change, though it should not impact the 3032f2fad08SBen Gardon * guest since both the former and current SPTEs are nonpresent. 3042f2fad08SBen Gardon */ 3052f2fad08SBen Gardon if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 3062f2fad08SBen Gardon pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 3072f2fad08SBen Gardon "should not be replaced with another,\n" 3082f2fad08SBen Gardon "different nonpresent SPTE, unless one or both\n" 3092f2fad08SBen Gardon "are MMIO SPTEs.\n" 3102f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 3112f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 3122f2fad08SBen Gardon return; 3132f2fad08SBen Gardon } 3142f2fad08SBen Gardon 3152f2fad08SBen Gardon 3162f2fad08SBen Gardon if (was_leaf && is_dirty_spte(old_spte) && 3172f2fad08SBen Gardon (!is_dirty_spte(new_spte) || pfn_changed)) 3182f2fad08SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 3192f2fad08SBen Gardon 3202f2fad08SBen Gardon /* 3212f2fad08SBen Gardon * Recursively handle child PTs if the change removed a subtree from 3222f2fad08SBen Gardon * the paging structure. 3232f2fad08SBen Gardon */ 3242f2fad08SBen Gardon if (was_present && !was_leaf && (pfn_changed || !is_present)) { 3252f2fad08SBen Gardon pt = spte_to_child_pt(old_spte, level); 32689c0fd49SBen Gardon sp = sptep_to_sp(pt); 32789c0fd49SBen Gardon 32833dd3574SBen Gardon trace_kvm_mmu_prepare_zap_page(sp); 32933dd3574SBen Gardon 33089c0fd49SBen Gardon list_del(&sp->link); 3312f2fad08SBen Gardon 33229cf0f50SBen Gardon if (sp->lpage_disallowed) 33329cf0f50SBen Gardon unaccount_huge_nx_page(kvm, sp); 33429cf0f50SBen Gardon 3352f2fad08SBen Gardon for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 3362f2fad08SBen Gardon old_child_spte = READ_ONCE(*(pt + i)); 3372f2fad08SBen Gardon WRITE_ONCE(*(pt + i), 0); 3382f2fad08SBen Gardon handle_changed_spte(kvm, as_id, 3392f2fad08SBen Gardon gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 3402f2fad08SBen Gardon old_child_spte, 0, level - 1); 3412f2fad08SBen Gardon } 3422f2fad08SBen Gardon 3432f2fad08SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 3442f2fad08SBen Gardon KVM_PAGES_PER_HPAGE(level)); 3452f2fad08SBen Gardon 3462f2fad08SBen Gardon free_page((unsigned long)pt); 34789c0fd49SBen Gardon kmem_cache_free(mmu_page_header_cache, sp); 3482f2fad08SBen Gardon } 3492f2fad08SBen Gardon } 3502f2fad08SBen Gardon 3512f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 3522f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 3532f2fad08SBen Gardon { 3542f2fad08SBen Gardon __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 355f8e14497SBen Gardon handle_changed_spte_acc_track(old_spte, new_spte, level); 356a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, 357a6a0b05dSBen Gardon new_spte, level); 3582f2fad08SBen Gardon } 359faaf05b0SBen Gardon 360*fe43fa2fSBen Gardon /* 361*fe43fa2fSBen Gardon * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping 362*fe43fa2fSBen Gardon * @kvm: kvm instance 363*fe43fa2fSBen Gardon * @iter: a tdp_iter instance currently on the SPTE that should be set 364*fe43fa2fSBen Gardon * @new_spte: The value the SPTE should be set to 365*fe43fa2fSBen Gardon * @record_acc_track: Notify the MM subsystem of changes to the accessed state 366*fe43fa2fSBen Gardon * of the page. Should be set unless handling an MMU 367*fe43fa2fSBen Gardon * notifier for access tracking. Leaving record_acc_track 368*fe43fa2fSBen Gardon * unset in that case prevents page accesses from being 369*fe43fa2fSBen Gardon * double counted. 370*fe43fa2fSBen Gardon * @record_dirty_log: Record the page as dirty in the dirty bitmap if 371*fe43fa2fSBen Gardon * appropriate for the change being made. Should be set 372*fe43fa2fSBen Gardon * unless performing certain dirty logging operations. 373*fe43fa2fSBen Gardon * Leaving record_dirty_log unset in that case prevents page 374*fe43fa2fSBen Gardon * writes from being double counted. 375*fe43fa2fSBen Gardon */ 376f8e14497SBen Gardon static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 377a6a0b05dSBen Gardon u64 new_spte, bool record_acc_track, 378a6a0b05dSBen Gardon bool record_dirty_log) 379faaf05b0SBen Gardon { 380faaf05b0SBen Gardon u64 *root_pt = tdp_iter_root_pt(iter); 381faaf05b0SBen Gardon struct kvm_mmu_page *root = sptep_to_sp(root_pt); 382faaf05b0SBen Gardon int as_id = kvm_mmu_page_as_id(root); 383faaf05b0SBen Gardon 384f8e14497SBen Gardon WRITE_ONCE(*iter->sptep, new_spte); 385faaf05b0SBen Gardon 386f8e14497SBen Gardon __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 387faaf05b0SBen Gardon iter->level); 388f8e14497SBen Gardon if (record_acc_track) 389f8e14497SBen Gardon handle_changed_spte_acc_track(iter->old_spte, new_spte, 390f8e14497SBen Gardon iter->level); 391a6a0b05dSBen Gardon if (record_dirty_log) 392a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, 393a6a0b05dSBen Gardon iter->old_spte, new_spte, 394a6a0b05dSBen Gardon iter->level); 395f8e14497SBen Gardon } 396f8e14497SBen Gardon 397f8e14497SBen Gardon static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 398f8e14497SBen Gardon u64 new_spte) 399f8e14497SBen Gardon { 400a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, true); 401f8e14497SBen Gardon } 402f8e14497SBen Gardon 403f8e14497SBen Gardon static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm, 404f8e14497SBen Gardon struct tdp_iter *iter, 405f8e14497SBen Gardon u64 new_spte) 406f8e14497SBen Gardon { 407a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, false, true); 408a6a0b05dSBen Gardon } 409a6a0b05dSBen Gardon 410a6a0b05dSBen Gardon static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, 411a6a0b05dSBen Gardon struct tdp_iter *iter, 412a6a0b05dSBen Gardon u64 new_spte) 413a6a0b05dSBen Gardon { 414a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, false); 415faaf05b0SBen Gardon } 416faaf05b0SBen Gardon 417faaf05b0SBen Gardon #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 418faaf05b0SBen Gardon for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 419faaf05b0SBen Gardon 420f8e14497SBen Gardon #define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ 421f8e14497SBen Gardon tdp_root_for_each_pte(_iter, _root, _start, _end) \ 422f8e14497SBen Gardon if (!is_shadow_present_pte(_iter.old_spte) || \ 423f8e14497SBen Gardon !is_last_spte(_iter.old_spte, _iter.level)) \ 424f8e14497SBen Gardon continue; \ 425f8e14497SBen Gardon else 426f8e14497SBen Gardon 427bb18842eSBen Gardon #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 428bb18842eSBen Gardon for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 429bb18842eSBen Gardon _mmu->shadow_root_level, _start, _end) 430bb18842eSBen Gardon 431faaf05b0SBen Gardon /* 432e28a436cSBen Gardon * Flush the TLB and yield if the MMU lock is contended or this thread needs to 433e28a436cSBen Gardon * return control to the scheduler. 434e28a436cSBen Gardon * 435e28a436cSBen Gardon * If this function yields, it will also reset the tdp_iter's walk over the 436e28a436cSBen Gardon * paging structure and the calling function should allow the iterator to 437e28a436cSBen Gardon * continue its traversal from the paging structure root. 438e28a436cSBen Gardon * 439e28a436cSBen Gardon * Return true if this function yielded, the TLBs were flushed, and the 440e28a436cSBen Gardon * iterator's traversal was reset. Return false if a yield was not needed. 441faaf05b0SBen Gardon */ 442faaf05b0SBen Gardon static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 443faaf05b0SBen Gardon { 444faaf05b0SBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 445faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 446faaf05b0SBen Gardon cond_resched_lock(&kvm->mmu_lock); 447faaf05b0SBen Gardon tdp_iter_refresh_walk(iter); 448faaf05b0SBen Gardon return true; 449faaf05b0SBen Gardon } 450e28a436cSBen Gardon 451e28a436cSBen Gardon return false; 452faaf05b0SBen Gardon } 453faaf05b0SBen Gardon 454e28a436cSBen Gardon /* 455e28a436cSBen Gardon * Yield if the MMU lock is contended or this thread needs to return control 456e28a436cSBen Gardon * to the scheduler. 457e28a436cSBen Gardon * 458e28a436cSBen Gardon * If this function yields, it will also reset the tdp_iter's walk over the 459e28a436cSBen Gardon * paging structure and the calling function should allow the iterator to 460e28a436cSBen Gardon * continue its traversal from the paging structure root. 461e28a436cSBen Gardon * 462e28a436cSBen Gardon * Return true if this function yielded and the iterator's traversal was reset. 463e28a436cSBen Gardon * Return false if a yield was not needed. 464e28a436cSBen Gardon */ 465e28a436cSBen Gardon static bool tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 466a6a0b05dSBen Gardon { 467a6a0b05dSBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 468a6a0b05dSBen Gardon cond_resched_lock(&kvm->mmu_lock); 469a6a0b05dSBen Gardon tdp_iter_refresh_walk(iter); 470e28a436cSBen Gardon return true; 471a6a0b05dSBen Gardon } 472e28a436cSBen Gardon 473e28a436cSBen Gardon return false; 474a6a0b05dSBen Gardon } 475a6a0b05dSBen Gardon 476faaf05b0SBen Gardon /* 477faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 478faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 479faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 480faaf05b0SBen Gardon * MMU lock. 481063afacdSBen Gardon * If can_yield is true, will release the MMU lock and reschedule if the 482063afacdSBen Gardon * scheduler needs the CPU or there is contention on the MMU lock. If this 483063afacdSBen Gardon * function cannot yield, it will not release the MMU lock or reschedule and 484063afacdSBen Gardon * the caller must ensure it does not supply too large a GFN range, or the 485063afacdSBen Gardon * operation can cause a soft lockup. 486faaf05b0SBen Gardon */ 487faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 488063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield) 489faaf05b0SBen Gardon { 490faaf05b0SBen Gardon struct tdp_iter iter; 491faaf05b0SBen Gardon bool flush_needed = false; 492faaf05b0SBen Gardon 493faaf05b0SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 494faaf05b0SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 495faaf05b0SBen Gardon continue; 496faaf05b0SBen Gardon 497faaf05b0SBen Gardon /* 498faaf05b0SBen Gardon * If this is a non-last-level SPTE that covers a larger range 499faaf05b0SBen Gardon * than should be zapped, continue, and zap the mappings at a 500faaf05b0SBen Gardon * lower level. 501faaf05b0SBen Gardon */ 502faaf05b0SBen Gardon if ((iter.gfn < start || 503faaf05b0SBen Gardon iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 504faaf05b0SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 505faaf05b0SBen Gardon continue; 506faaf05b0SBen Gardon 507faaf05b0SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 508faaf05b0SBen Gardon 509e28a436cSBen Gardon flush_needed = !can_yield || 510e28a436cSBen Gardon !tdp_mmu_iter_flush_cond_resched(kvm, &iter); 511faaf05b0SBen Gardon } 512faaf05b0SBen Gardon return flush_needed; 513faaf05b0SBen Gardon } 514faaf05b0SBen Gardon 515faaf05b0SBen Gardon /* 516faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 517faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 518faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 519faaf05b0SBen Gardon * MMU lock. 520faaf05b0SBen Gardon */ 521faaf05b0SBen Gardon bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) 522faaf05b0SBen Gardon { 523faaf05b0SBen Gardon struct kvm_mmu_page *root; 524faaf05b0SBen Gardon bool flush = false; 525faaf05b0SBen Gardon 526a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) 527063afacdSBen Gardon flush |= zap_gfn_range(kvm, root, start, end, true); 528faaf05b0SBen Gardon 529faaf05b0SBen Gardon return flush; 530faaf05b0SBen Gardon } 531faaf05b0SBen Gardon 532faaf05b0SBen Gardon void kvm_tdp_mmu_zap_all(struct kvm *kvm) 533faaf05b0SBen Gardon { 534339f5a7fSRick Edgecombe gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); 535faaf05b0SBen Gardon bool flush; 536faaf05b0SBen Gardon 537faaf05b0SBen Gardon flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 538faaf05b0SBen Gardon if (flush) 539faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 540faaf05b0SBen Gardon } 541bb18842eSBen Gardon 542bb18842eSBen Gardon /* 543bb18842eSBen Gardon * Installs a last-level SPTE to handle a TDP page fault. 544bb18842eSBen Gardon * (NPT/EPT violation/misconfiguration) 545bb18842eSBen Gardon */ 546bb18842eSBen Gardon static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 547bb18842eSBen Gardon int map_writable, 548bb18842eSBen Gardon struct tdp_iter *iter, 549bb18842eSBen Gardon kvm_pfn_t pfn, bool prefault) 550bb18842eSBen Gardon { 551bb18842eSBen Gardon u64 new_spte; 552bb18842eSBen Gardon int ret = 0; 553bb18842eSBen Gardon int make_spte_ret = 0; 554bb18842eSBen Gardon 555bb18842eSBen Gardon if (unlikely(is_noslot_pfn(pfn))) { 556bb18842eSBen Gardon new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 557bb18842eSBen Gardon trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 55833dd3574SBen Gardon } else { 559bb18842eSBen Gardon make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 560bb18842eSBen Gardon pfn, iter->old_spte, prefault, true, 561bb18842eSBen Gardon map_writable, !shadow_accessed_mask, 562bb18842eSBen Gardon &new_spte); 56333dd3574SBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 56433dd3574SBen Gardon } 565bb18842eSBen Gardon 566bb18842eSBen Gardon if (new_spte == iter->old_spte) 567bb18842eSBen Gardon ret = RET_PF_SPURIOUS; 568bb18842eSBen Gardon else 569bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 570bb18842eSBen Gardon 571bb18842eSBen Gardon /* 572bb18842eSBen Gardon * If the page fault was caused by a write but the page is write 573bb18842eSBen Gardon * protected, emulation is needed. If the emulation was skipped, 574bb18842eSBen Gardon * the vCPU would have the same fault again. 575bb18842eSBen Gardon */ 576bb18842eSBen Gardon if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 577bb18842eSBen Gardon if (write) 578bb18842eSBen Gardon ret = RET_PF_EMULATE; 579bb18842eSBen Gardon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 580bb18842eSBen Gardon } 581bb18842eSBen Gardon 582bb18842eSBen Gardon /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 583bb18842eSBen Gardon if (unlikely(is_mmio_spte(new_spte))) 584bb18842eSBen Gardon ret = RET_PF_EMULATE; 585bb18842eSBen Gardon 586bb18842eSBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 587bb18842eSBen Gardon if (!prefault) 588bb18842eSBen Gardon vcpu->stat.pf_fixed++; 589bb18842eSBen Gardon 590bb18842eSBen Gardon return ret; 591bb18842eSBen Gardon } 592bb18842eSBen Gardon 593bb18842eSBen Gardon /* 594bb18842eSBen Gardon * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 595bb18842eSBen Gardon * page tables and SPTEs to translate the faulting guest physical address. 596bb18842eSBen Gardon */ 597bb18842eSBen Gardon int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 598bb18842eSBen Gardon int map_writable, int max_level, kvm_pfn_t pfn, 599bb18842eSBen Gardon bool prefault) 600bb18842eSBen Gardon { 601bb18842eSBen Gardon bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 602bb18842eSBen Gardon bool write = error_code & PFERR_WRITE_MASK; 603bb18842eSBen Gardon bool exec = error_code & PFERR_FETCH_MASK; 604bb18842eSBen Gardon bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 605bb18842eSBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 606bb18842eSBen Gardon struct tdp_iter iter; 60789c0fd49SBen Gardon struct kvm_mmu_page *sp; 608bb18842eSBen Gardon u64 *child_pt; 609bb18842eSBen Gardon u64 new_spte; 610bb18842eSBen Gardon int ret; 611bb18842eSBen Gardon gfn_t gfn = gpa >> PAGE_SHIFT; 612bb18842eSBen Gardon int level; 613bb18842eSBen Gardon int req_level; 614bb18842eSBen Gardon 615bb18842eSBen Gardon if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 616bb18842eSBen Gardon return RET_PF_RETRY; 617bb18842eSBen Gardon if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 618bb18842eSBen Gardon return RET_PF_RETRY; 619bb18842eSBen Gardon 620bb18842eSBen Gardon level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 621bb18842eSBen Gardon huge_page_disallowed, &req_level); 622bb18842eSBen Gardon 623bb18842eSBen Gardon trace_kvm_mmu_spte_requested(gpa, level, pfn); 624bb18842eSBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 625bb18842eSBen Gardon if (nx_huge_page_workaround_enabled) 626bb18842eSBen Gardon disallowed_hugepage_adjust(iter.old_spte, gfn, 627bb18842eSBen Gardon iter.level, &pfn, &level); 628bb18842eSBen Gardon 629bb18842eSBen Gardon if (iter.level == level) 630bb18842eSBen Gardon break; 631bb18842eSBen Gardon 632bb18842eSBen Gardon /* 633bb18842eSBen Gardon * If there is an SPTE mapping a large page at a higher level 634bb18842eSBen Gardon * than the target, that SPTE must be cleared and replaced 635bb18842eSBen Gardon * with a non-leaf SPTE. 636bb18842eSBen Gardon */ 637bb18842eSBen Gardon if (is_shadow_present_pte(iter.old_spte) && 638bb18842eSBen Gardon is_large_pte(iter.old_spte)) { 639bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 640bb18842eSBen Gardon 641bb18842eSBen Gardon kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 642bb18842eSBen Gardon KVM_PAGES_PER_HPAGE(iter.level)); 643bb18842eSBen Gardon 644bb18842eSBen Gardon /* 645bb18842eSBen Gardon * The iter must explicitly re-read the spte here 646bb18842eSBen Gardon * because the new value informs the !present 647bb18842eSBen Gardon * path below. 648bb18842eSBen Gardon */ 649bb18842eSBen Gardon iter.old_spte = READ_ONCE(*iter.sptep); 650bb18842eSBen Gardon } 651bb18842eSBen Gardon 652bb18842eSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) { 65389c0fd49SBen Gardon sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 65489c0fd49SBen Gardon list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 65589c0fd49SBen Gardon child_pt = sp->spt; 656bb18842eSBen Gardon clear_page(child_pt); 657bb18842eSBen Gardon new_spte = make_nonleaf_spte(child_pt, 658bb18842eSBen Gardon !shadow_accessed_mask); 659bb18842eSBen Gardon 660bb18842eSBen Gardon trace_kvm_mmu_get_page(sp, true); 66129cf0f50SBen Gardon if (huge_page_disallowed && req_level >= iter.level) 66229cf0f50SBen Gardon account_huge_nx_page(vcpu->kvm, sp); 66329cf0f50SBen Gardon 664bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 665bb18842eSBen Gardon } 666bb18842eSBen Gardon } 667bb18842eSBen Gardon 668bb18842eSBen Gardon if (WARN_ON(iter.level != level)) 669bb18842eSBen Gardon return RET_PF_RETRY; 670bb18842eSBen Gardon 671bb18842eSBen Gardon ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 672bb18842eSBen Gardon pfn, prefault); 673bb18842eSBen Gardon 674bb18842eSBen Gardon return ret; 675bb18842eSBen Gardon } 676063afacdSBen Gardon 677063afacdSBen Gardon static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, 678063afacdSBen Gardon unsigned long end, unsigned long data, 679063afacdSBen Gardon int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, 680063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 681063afacdSBen Gardon gfn_t end, unsigned long data)) 682063afacdSBen Gardon { 683063afacdSBen Gardon struct kvm_memslots *slots; 684063afacdSBen Gardon struct kvm_memory_slot *memslot; 685063afacdSBen Gardon struct kvm_mmu_page *root; 686063afacdSBen Gardon int ret = 0; 687063afacdSBen Gardon int as_id; 688063afacdSBen Gardon 689a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) { 690063afacdSBen Gardon as_id = kvm_mmu_page_as_id(root); 691063afacdSBen Gardon slots = __kvm_memslots(kvm, as_id); 692063afacdSBen Gardon kvm_for_each_memslot(memslot, slots) { 693063afacdSBen Gardon unsigned long hva_start, hva_end; 694063afacdSBen Gardon gfn_t gfn_start, gfn_end; 695063afacdSBen Gardon 696063afacdSBen Gardon hva_start = max(start, memslot->userspace_addr); 697063afacdSBen Gardon hva_end = min(end, memslot->userspace_addr + 698063afacdSBen Gardon (memslot->npages << PAGE_SHIFT)); 699063afacdSBen Gardon if (hva_start >= hva_end) 700063afacdSBen Gardon continue; 701063afacdSBen Gardon /* 702063afacdSBen Gardon * {gfn(page) | page intersects with [hva_start, hva_end)} = 703063afacdSBen Gardon * {gfn_start, gfn_start+1, ..., gfn_end-1}. 704063afacdSBen Gardon */ 705063afacdSBen Gardon gfn_start = hva_to_gfn_memslot(hva_start, memslot); 706063afacdSBen Gardon gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 707063afacdSBen Gardon 708063afacdSBen Gardon ret |= handler(kvm, memslot, root, gfn_start, 709063afacdSBen Gardon gfn_end, data); 710063afacdSBen Gardon } 711063afacdSBen Gardon } 712063afacdSBen Gardon 713063afacdSBen Gardon return ret; 714063afacdSBen Gardon } 715063afacdSBen Gardon 716063afacdSBen Gardon static int zap_gfn_range_hva_wrapper(struct kvm *kvm, 717063afacdSBen Gardon struct kvm_memory_slot *slot, 718063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 719063afacdSBen Gardon gfn_t end, unsigned long unused) 720063afacdSBen Gardon { 721063afacdSBen Gardon return zap_gfn_range(kvm, root, start, end, false); 722063afacdSBen Gardon } 723063afacdSBen Gardon 724063afacdSBen Gardon int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, 725063afacdSBen Gardon unsigned long end) 726063afacdSBen Gardon { 727063afacdSBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 728063afacdSBen Gardon zap_gfn_range_hva_wrapper); 729063afacdSBen Gardon } 730f8e14497SBen Gardon 731f8e14497SBen Gardon /* 732f8e14497SBen Gardon * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero 733f8e14497SBen Gardon * if any of the GFNs in the range have been accessed. 734f8e14497SBen Gardon */ 735f8e14497SBen Gardon static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, 736f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t start, gfn_t end, 737f8e14497SBen Gardon unsigned long unused) 738f8e14497SBen Gardon { 739f8e14497SBen Gardon struct tdp_iter iter; 740f8e14497SBen Gardon int young = 0; 741f8e14497SBen Gardon u64 new_spte = 0; 742f8e14497SBen Gardon 743f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 744f8e14497SBen Gardon /* 745f8e14497SBen Gardon * If we have a non-accessed entry we don't need to change the 746f8e14497SBen Gardon * pte. 747f8e14497SBen Gardon */ 748f8e14497SBen Gardon if (!is_accessed_spte(iter.old_spte)) 749f8e14497SBen Gardon continue; 750f8e14497SBen Gardon 751f8e14497SBen Gardon new_spte = iter.old_spte; 752f8e14497SBen Gardon 753f8e14497SBen Gardon if (spte_ad_enabled(new_spte)) { 754f8e14497SBen Gardon clear_bit((ffs(shadow_accessed_mask) - 1), 755f8e14497SBen Gardon (unsigned long *)&new_spte); 756f8e14497SBen Gardon } else { 757f8e14497SBen Gardon /* 758f8e14497SBen Gardon * Capture the dirty status of the page, so that it doesn't get 759f8e14497SBen Gardon * lost when the SPTE is marked for access tracking. 760f8e14497SBen Gardon */ 761f8e14497SBen Gardon if (is_writable_pte(new_spte)) 762f8e14497SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(new_spte)); 763f8e14497SBen Gardon 764f8e14497SBen Gardon new_spte = mark_spte_for_access_track(new_spte); 765f8e14497SBen Gardon } 766a6a0b05dSBen Gardon new_spte &= ~shadow_dirty_mask; 767f8e14497SBen Gardon 768f8e14497SBen Gardon tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); 769f8e14497SBen Gardon young = 1; 77033dd3574SBen Gardon 77133dd3574SBen Gardon trace_kvm_age_page(iter.gfn, iter.level, slot, young); 772f8e14497SBen Gardon } 773f8e14497SBen Gardon 774f8e14497SBen Gardon return young; 775f8e14497SBen Gardon } 776f8e14497SBen Gardon 777f8e14497SBen Gardon int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, 778f8e14497SBen Gardon unsigned long end) 779f8e14497SBen Gardon { 780f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 781f8e14497SBen Gardon age_gfn_range); 782f8e14497SBen Gardon } 783f8e14497SBen Gardon 784f8e14497SBen Gardon static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 785f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 786f8e14497SBen Gardon unsigned long unused2) 787f8e14497SBen Gardon { 788f8e14497SBen Gardon struct tdp_iter iter; 789f8e14497SBen Gardon 790f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) 791f8e14497SBen Gardon if (is_accessed_spte(iter.old_spte)) 792f8e14497SBen Gardon return 1; 793f8e14497SBen Gardon 794f8e14497SBen Gardon return 0; 795f8e14497SBen Gardon } 796f8e14497SBen Gardon 797f8e14497SBen Gardon int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva) 798f8e14497SBen Gardon { 799f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0, 800f8e14497SBen Gardon test_age_gfn); 801f8e14497SBen Gardon } 8021d8dd6b3SBen Gardon 8031d8dd6b3SBen Gardon /* 8041d8dd6b3SBen Gardon * Handle the changed_pte MMU notifier for the TDP MMU. 8051d8dd6b3SBen Gardon * data is a pointer to the new pte_t mapping the HVA specified by the MMU 8061d8dd6b3SBen Gardon * notifier. 8071d8dd6b3SBen Gardon * Returns non-zero if a flush is needed before releasing the MMU lock. 8081d8dd6b3SBen Gardon */ 8091d8dd6b3SBen Gardon static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, 8101d8dd6b3SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 8111d8dd6b3SBen Gardon unsigned long data) 8121d8dd6b3SBen Gardon { 8131d8dd6b3SBen Gardon struct tdp_iter iter; 8141d8dd6b3SBen Gardon pte_t *ptep = (pte_t *)data; 8151d8dd6b3SBen Gardon kvm_pfn_t new_pfn; 8161d8dd6b3SBen Gardon u64 new_spte; 8171d8dd6b3SBen Gardon int need_flush = 0; 8181d8dd6b3SBen Gardon 8191d8dd6b3SBen Gardon WARN_ON(pte_huge(*ptep)); 8201d8dd6b3SBen Gardon 8211d8dd6b3SBen Gardon new_pfn = pte_pfn(*ptep); 8221d8dd6b3SBen Gardon 8231d8dd6b3SBen Gardon tdp_root_for_each_pte(iter, root, gfn, gfn + 1) { 8241d8dd6b3SBen Gardon if (iter.level != PG_LEVEL_4K) 8251d8dd6b3SBen Gardon continue; 8261d8dd6b3SBen Gardon 8271d8dd6b3SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 8281d8dd6b3SBen Gardon break; 8291d8dd6b3SBen Gardon 8301d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 8311d8dd6b3SBen Gardon 8321d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1); 8331d8dd6b3SBen Gardon 8341d8dd6b3SBen Gardon if (!pte_write(*ptep)) { 8351d8dd6b3SBen Gardon new_spte = kvm_mmu_changed_pte_notifier_make_spte( 8361d8dd6b3SBen Gardon iter.old_spte, new_pfn); 8371d8dd6b3SBen Gardon 8381d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 8391d8dd6b3SBen Gardon } 8401d8dd6b3SBen Gardon 8411d8dd6b3SBen Gardon need_flush = 1; 8421d8dd6b3SBen Gardon } 8431d8dd6b3SBen Gardon 8441d8dd6b3SBen Gardon if (need_flush) 8451d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); 8461d8dd6b3SBen Gardon 8471d8dd6b3SBen Gardon return 0; 8481d8dd6b3SBen Gardon } 8491d8dd6b3SBen Gardon 8501d8dd6b3SBen Gardon int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address, 8511d8dd6b3SBen Gardon pte_t *host_ptep) 8521d8dd6b3SBen Gardon { 8531d8dd6b3SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1, 8541d8dd6b3SBen Gardon (unsigned long)host_ptep, 8551d8dd6b3SBen Gardon set_tdp_spte); 8561d8dd6b3SBen Gardon } 8571d8dd6b3SBen Gardon 858a6a0b05dSBen Gardon /* 859a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs [start, end). If 860a6a0b05dSBen Gardon * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. 861a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 862a6a0b05dSBen Gardon */ 863a6a0b05dSBen Gardon static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 864a6a0b05dSBen Gardon gfn_t start, gfn_t end, int min_level) 865a6a0b05dSBen Gardon { 866a6a0b05dSBen Gardon struct tdp_iter iter; 867a6a0b05dSBen Gardon u64 new_spte; 868a6a0b05dSBen Gardon bool spte_set = false; 869a6a0b05dSBen Gardon 870a6a0b05dSBen Gardon BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); 871a6a0b05dSBen Gardon 872a6a0b05dSBen Gardon for_each_tdp_pte_min_level(iter, root->spt, root->role.level, 873a6a0b05dSBen Gardon min_level, start, end) { 874a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 875a6a0b05dSBen Gardon !is_last_spte(iter.old_spte, iter.level)) 876a6a0b05dSBen Gardon continue; 877a6a0b05dSBen Gardon 878a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 879a6a0b05dSBen Gardon 880a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 881a6a0b05dSBen Gardon spte_set = true; 882a6a0b05dSBen Gardon 883a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 884a6a0b05dSBen Gardon } 885a6a0b05dSBen Gardon return spte_set; 886a6a0b05dSBen Gardon } 887a6a0b05dSBen Gardon 888a6a0b05dSBen Gardon /* 889a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs in the memslot. Will 890a6a0b05dSBen Gardon * only affect leaf SPTEs down to min_level. 891a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 892a6a0b05dSBen Gardon */ 893a6a0b05dSBen Gardon bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, 894a6a0b05dSBen Gardon int min_level) 895a6a0b05dSBen Gardon { 896a6a0b05dSBen Gardon struct kvm_mmu_page *root; 897a6a0b05dSBen Gardon int root_as_id; 898a6a0b05dSBen Gardon bool spte_set = false; 899a6a0b05dSBen Gardon 900a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) { 901a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 902a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 903a6a0b05dSBen Gardon continue; 904a6a0b05dSBen Gardon 905a6a0b05dSBen Gardon spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, 906a6a0b05dSBen Gardon slot->base_gfn + slot->npages, min_level); 907a6a0b05dSBen Gardon } 908a6a0b05dSBen Gardon 909a6a0b05dSBen Gardon return spte_set; 910a6a0b05dSBen Gardon } 911a6a0b05dSBen Gardon 912a6a0b05dSBen Gardon /* 913a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 914a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 915a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 916a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 917a6a0b05dSBen Gardon * be flushed. 918a6a0b05dSBen Gardon */ 919a6a0b05dSBen Gardon static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 920a6a0b05dSBen Gardon gfn_t start, gfn_t end) 921a6a0b05dSBen Gardon { 922a6a0b05dSBen Gardon struct tdp_iter iter; 923a6a0b05dSBen Gardon u64 new_spte; 924a6a0b05dSBen Gardon bool spte_set = false; 925a6a0b05dSBen Gardon 926a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 927a6a0b05dSBen Gardon if (spte_ad_need_write_protect(iter.old_spte)) { 928a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 929a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 930a6a0b05dSBen Gardon else 931a6a0b05dSBen Gardon continue; 932a6a0b05dSBen Gardon } else { 933a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 934a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 935a6a0b05dSBen Gardon else 936a6a0b05dSBen Gardon continue; 937a6a0b05dSBen Gardon } 938a6a0b05dSBen Gardon 939a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 940a6a0b05dSBen Gardon spte_set = true; 941a6a0b05dSBen Gardon 942a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 943a6a0b05dSBen Gardon } 944a6a0b05dSBen Gardon return spte_set; 945a6a0b05dSBen Gardon } 946a6a0b05dSBen Gardon 947a6a0b05dSBen Gardon /* 948a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 949a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 950a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 951a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 952a6a0b05dSBen Gardon * be flushed. 953a6a0b05dSBen Gardon */ 954a6a0b05dSBen Gardon bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 955a6a0b05dSBen Gardon { 956a6a0b05dSBen Gardon struct kvm_mmu_page *root; 957a6a0b05dSBen Gardon int root_as_id; 958a6a0b05dSBen Gardon bool spte_set = false; 959a6a0b05dSBen Gardon 960a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) { 961a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 962a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 963a6a0b05dSBen Gardon continue; 964a6a0b05dSBen Gardon 965a6a0b05dSBen Gardon spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, 966a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 967a6a0b05dSBen Gardon } 968a6a0b05dSBen Gardon 969a6a0b05dSBen Gardon return spte_set; 970a6a0b05dSBen Gardon } 971a6a0b05dSBen Gardon 972a6a0b05dSBen Gardon /* 973a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 974a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 975a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 976a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 977a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 978a6a0b05dSBen Gardon */ 979a6a0b05dSBen Gardon static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, 980a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, bool wrprot) 981a6a0b05dSBen Gardon { 982a6a0b05dSBen Gardon struct tdp_iter iter; 983a6a0b05dSBen Gardon u64 new_spte; 984a6a0b05dSBen Gardon 985a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), 986a6a0b05dSBen Gardon gfn + BITS_PER_LONG) { 987a6a0b05dSBen Gardon if (!mask) 988a6a0b05dSBen Gardon break; 989a6a0b05dSBen Gardon 990a6a0b05dSBen Gardon if (iter.level > PG_LEVEL_4K || 991a6a0b05dSBen Gardon !(mask & (1UL << (iter.gfn - gfn)))) 992a6a0b05dSBen Gardon continue; 993a6a0b05dSBen Gardon 994a6a0b05dSBen Gardon if (wrprot || spte_ad_need_write_protect(iter.old_spte)) { 995a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 996a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 997a6a0b05dSBen Gardon else 998a6a0b05dSBen Gardon continue; 999a6a0b05dSBen Gardon } else { 1000a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 1001a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 1002a6a0b05dSBen Gardon else 1003a6a0b05dSBen Gardon continue; 1004a6a0b05dSBen Gardon } 1005a6a0b05dSBen Gardon 1006a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 1007a6a0b05dSBen Gardon 1008a6a0b05dSBen Gardon mask &= ~(1UL << (iter.gfn - gfn)); 1009a6a0b05dSBen Gardon } 1010a6a0b05dSBen Gardon } 1011a6a0b05dSBen Gardon 1012a6a0b05dSBen Gardon /* 1013a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 1014a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 1015a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 1016a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 1017a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 1018a6a0b05dSBen Gardon */ 1019a6a0b05dSBen Gardon void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, 1020a6a0b05dSBen Gardon struct kvm_memory_slot *slot, 1021a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, 1022a6a0b05dSBen Gardon bool wrprot) 1023a6a0b05dSBen Gardon { 1024a6a0b05dSBen Gardon struct kvm_mmu_page *root; 1025a6a0b05dSBen Gardon int root_as_id; 1026a6a0b05dSBen Gardon 1027a6a0b05dSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 1028a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 1029a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 1030a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 1031a6a0b05dSBen Gardon continue; 1032a6a0b05dSBen Gardon 1033a6a0b05dSBen Gardon clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot); 1034a6a0b05dSBen Gardon } 1035a6a0b05dSBen Gardon } 1036a6a0b05dSBen Gardon 1037a6a0b05dSBen Gardon /* 1038a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 1039a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 1040a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 1041a6a0b05dSBen Gardon */ 1042a6a0b05dSBen Gardon static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 1043a6a0b05dSBen Gardon gfn_t start, gfn_t end) 1044a6a0b05dSBen Gardon { 1045a6a0b05dSBen Gardon struct tdp_iter iter; 1046a6a0b05dSBen Gardon u64 new_spte; 1047a6a0b05dSBen Gardon bool spte_set = false; 1048a6a0b05dSBen Gardon 1049a6a0b05dSBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 1050a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 1051a6a0b05dSBen Gardon continue; 1052a6a0b05dSBen Gardon 1053a6a0b05dSBen Gardon new_spte = iter.old_spte | shadow_dirty_mask; 1054a6a0b05dSBen Gardon 1055a6a0b05dSBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 1056a6a0b05dSBen Gardon spte_set = true; 1057a6a0b05dSBen Gardon 1058a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 1059a6a0b05dSBen Gardon } 1060a6a0b05dSBen Gardon 1061a6a0b05dSBen Gardon return spte_set; 1062a6a0b05dSBen Gardon } 1063a6a0b05dSBen Gardon 1064a6a0b05dSBen Gardon /* 1065a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 1066a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 1067a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 1068a6a0b05dSBen Gardon */ 1069a6a0b05dSBen Gardon bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) 1070a6a0b05dSBen Gardon { 1071a6a0b05dSBen Gardon struct kvm_mmu_page *root; 1072a6a0b05dSBen Gardon int root_as_id; 1073a6a0b05dSBen Gardon bool spte_set = false; 1074a6a0b05dSBen Gardon 1075a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) { 1076a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 1077a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 1078a6a0b05dSBen Gardon continue; 1079a6a0b05dSBen Gardon 1080a6a0b05dSBen Gardon spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, 1081a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 1082a6a0b05dSBen Gardon } 1083a6a0b05dSBen Gardon return spte_set; 1084a6a0b05dSBen Gardon } 1085a6a0b05dSBen Gardon 108614881998SBen Gardon /* 108787aa9ec9SBen Gardon * Clear leaf entries which could be replaced by large mappings, for 108887aa9ec9SBen Gardon * GFNs within the slot. 108914881998SBen Gardon */ 109014881998SBen Gardon static void zap_collapsible_spte_range(struct kvm *kvm, 109114881998SBen Gardon struct kvm_mmu_page *root, 109214881998SBen Gardon gfn_t start, gfn_t end) 109314881998SBen Gardon { 109414881998SBen Gardon struct tdp_iter iter; 109514881998SBen Gardon kvm_pfn_t pfn; 109614881998SBen Gardon bool spte_set = false; 109714881998SBen Gardon 109814881998SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 109914881998SBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 110087aa9ec9SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 110114881998SBen Gardon continue; 110214881998SBen Gardon 110314881998SBen Gardon pfn = spte_to_pfn(iter.old_spte); 110414881998SBen Gardon if (kvm_is_reserved_pfn(pfn) || 110514881998SBen Gardon !PageTransCompoundMap(pfn_to_page(pfn))) 110614881998SBen Gardon continue; 110714881998SBen Gardon 110814881998SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 110914881998SBen Gardon 1110e28a436cSBen Gardon spte_set = !tdp_mmu_iter_flush_cond_resched(kvm, &iter); 111114881998SBen Gardon } 111214881998SBen Gardon 111314881998SBen Gardon if (spte_set) 111414881998SBen Gardon kvm_flush_remote_tlbs(kvm); 111514881998SBen Gardon } 111614881998SBen Gardon 111714881998SBen Gardon /* 111814881998SBen Gardon * Clear non-leaf entries (and free associated page tables) which could 111914881998SBen Gardon * be replaced by large mappings, for GFNs within the slot. 112014881998SBen Gardon */ 112114881998SBen Gardon void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, 112214881998SBen Gardon const struct kvm_memory_slot *slot) 112314881998SBen Gardon { 112414881998SBen Gardon struct kvm_mmu_page *root; 112514881998SBen Gardon int root_as_id; 112614881998SBen Gardon 1127a889ea54SBen Gardon for_each_tdp_mmu_root_yield_safe(kvm, root) { 112814881998SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 112914881998SBen Gardon if (root_as_id != slot->as_id) 113014881998SBen Gardon continue; 113114881998SBen Gardon 113214881998SBen Gardon zap_collapsible_spte_range(kvm, root, slot->base_gfn, 113314881998SBen Gardon slot->base_gfn + slot->npages); 113414881998SBen Gardon } 113514881998SBen Gardon } 113646044f72SBen Gardon 113746044f72SBen Gardon /* 113846044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 113946044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 114046044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 114146044f72SBen Gardon */ 114246044f72SBen Gardon static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, 114346044f72SBen Gardon gfn_t gfn) 114446044f72SBen Gardon { 114546044f72SBen Gardon struct tdp_iter iter; 114646044f72SBen Gardon u64 new_spte; 114746044f72SBen Gardon bool spte_set = false; 114846044f72SBen Gardon 114946044f72SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { 115046044f72SBen Gardon if (!is_writable_pte(iter.old_spte)) 115146044f72SBen Gardon break; 115246044f72SBen Gardon 115346044f72SBen Gardon new_spte = iter.old_spte & 115446044f72SBen Gardon ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); 115546044f72SBen Gardon 115646044f72SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 115746044f72SBen Gardon spte_set = true; 115846044f72SBen Gardon } 115946044f72SBen Gardon 116046044f72SBen Gardon return spte_set; 116146044f72SBen Gardon } 116246044f72SBen Gardon 116346044f72SBen Gardon /* 116446044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 116546044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 116646044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 116746044f72SBen Gardon */ 116846044f72SBen Gardon bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, 116946044f72SBen Gardon struct kvm_memory_slot *slot, gfn_t gfn) 117046044f72SBen Gardon { 117146044f72SBen Gardon struct kvm_mmu_page *root; 117246044f72SBen Gardon int root_as_id; 117346044f72SBen Gardon bool spte_set = false; 117446044f72SBen Gardon 117546044f72SBen Gardon lockdep_assert_held(&kvm->mmu_lock); 117646044f72SBen Gardon for_each_tdp_mmu_root(kvm, root) { 117746044f72SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 117846044f72SBen Gardon if (root_as_id != slot->as_id) 117946044f72SBen Gardon continue; 118046044f72SBen Gardon 118146044f72SBen Gardon spte_set |= write_protect_gfn(kvm, root, gfn); 118246044f72SBen Gardon } 118346044f72SBen Gardon return spte_set; 118446044f72SBen Gardon } 118546044f72SBen Gardon 118695fb5b02SBen Gardon /* 118795fb5b02SBen Gardon * Return the level of the lowest level SPTE added to sptes. 118895fb5b02SBen Gardon * That SPTE may be non-present. 118995fb5b02SBen Gardon */ 119039b4d43eSSean Christopherson int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, 119139b4d43eSSean Christopherson int *root_level) 119295fb5b02SBen Gardon { 119395fb5b02SBen Gardon struct tdp_iter iter; 119495fb5b02SBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 119595fb5b02SBen Gardon gfn_t gfn = addr >> PAGE_SHIFT; 11962aa07893SSean Christopherson int leaf = -1; 119795fb5b02SBen Gardon 119839b4d43eSSean Christopherson *root_level = vcpu->arch.mmu->shadow_root_level; 119995fb5b02SBen Gardon 120095fb5b02SBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 120195fb5b02SBen Gardon leaf = iter.level; 1202dde81f94SSean Christopherson sptes[leaf] = iter.old_spte; 120395fb5b02SBen Gardon } 120495fb5b02SBen Gardon 120595fb5b02SBen Gardon return leaf; 120695fb5b02SBen Gardon } 1207