1fe5db27dSBen Gardon // SPDX-License-Identifier: GPL-2.0 2fe5db27dSBen Gardon 302c00b3aSBen Gardon #include "mmu.h" 402c00b3aSBen Gardon #include "mmu_internal.h" 5bb18842eSBen Gardon #include "mmutrace.h" 62f2fad08SBen Gardon #include "tdp_iter.h" 7fe5db27dSBen Gardon #include "tdp_mmu.h" 802c00b3aSBen Gardon #include "spte.h" 9fe5db27dSBen Gardon 1095fb5b02SBen Gardon #ifdef CONFIG_X86_64 11fe5db27dSBen Gardon static bool __read_mostly tdp_mmu_enabled = false; 1295fb5b02SBen Gardon module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); 1395fb5b02SBen Gardon #endif 14fe5db27dSBen Gardon 15fe5db27dSBen Gardon static bool is_tdp_mmu_enabled(void) 16fe5db27dSBen Gardon { 17fe5db27dSBen Gardon #ifdef CONFIG_X86_64 18fe5db27dSBen Gardon return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 19fe5db27dSBen Gardon #else 20fe5db27dSBen Gardon return false; 21fe5db27dSBen Gardon #endif /* CONFIG_X86_64 */ 22fe5db27dSBen Gardon } 23fe5db27dSBen Gardon 24fe5db27dSBen Gardon /* Initializes the TDP MMU for the VM, if enabled. */ 25fe5db27dSBen Gardon void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 26fe5db27dSBen Gardon { 27fe5db27dSBen Gardon if (!is_tdp_mmu_enabled()) 28fe5db27dSBen Gardon return; 29fe5db27dSBen Gardon 30fe5db27dSBen Gardon /* This should not be changed for the lifetime of the VM. */ 31fe5db27dSBen Gardon kvm->arch.tdp_mmu_enabled = true; 3202c00b3aSBen Gardon 3302c00b3aSBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 3489c0fd49SBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 35fe5db27dSBen Gardon } 36fe5db27dSBen Gardon 37fe5db27dSBen Gardon void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 38fe5db27dSBen Gardon { 39fe5db27dSBen Gardon if (!kvm->arch.tdp_mmu_enabled) 40fe5db27dSBen Gardon return; 4102c00b3aSBen Gardon 4202c00b3aSBen Gardon WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 4302c00b3aSBen Gardon } 4402c00b3aSBen Gardon 4502c00b3aSBen Gardon #define for_each_tdp_mmu_root(_kvm, _root) \ 4602c00b3aSBen Gardon list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 4702c00b3aSBen Gardon 4802c00b3aSBen Gardon bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 4902c00b3aSBen Gardon { 5002c00b3aSBen Gardon struct kvm_mmu_page *sp; 5102c00b3aSBen Gardon 5202c00b3aSBen Gardon sp = to_shadow_page(hpa); 5302c00b3aSBen Gardon 5402c00b3aSBen Gardon return sp->tdp_mmu_page && sp->root_count; 5502c00b3aSBen Gardon } 5602c00b3aSBen Gardon 57faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 58063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield); 59faaf05b0SBen Gardon 6002c00b3aSBen Gardon void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 6102c00b3aSBen Gardon { 62faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 63faaf05b0SBen Gardon 6402c00b3aSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 6502c00b3aSBen Gardon 6602c00b3aSBen Gardon WARN_ON(root->root_count); 6702c00b3aSBen Gardon WARN_ON(!root->tdp_mmu_page); 6802c00b3aSBen Gardon 6902c00b3aSBen Gardon list_del(&root->link); 7002c00b3aSBen Gardon 71063afacdSBen Gardon zap_gfn_range(kvm, root, 0, max_gfn, false); 72faaf05b0SBen Gardon 7302c00b3aSBen Gardon free_page((unsigned long)root->spt); 7402c00b3aSBen Gardon kmem_cache_free(mmu_page_header_cache, root); 7502c00b3aSBen Gardon } 7602c00b3aSBen Gardon 7702c00b3aSBen Gardon static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 7802c00b3aSBen Gardon int level) 7902c00b3aSBen Gardon { 8002c00b3aSBen Gardon union kvm_mmu_page_role role; 8102c00b3aSBen Gardon 8202c00b3aSBen Gardon role = vcpu->arch.mmu->mmu_role.base; 8302c00b3aSBen Gardon role.level = level; 8402c00b3aSBen Gardon role.direct = true; 8502c00b3aSBen Gardon role.gpte_is_8_bytes = true; 8602c00b3aSBen Gardon role.access = ACC_ALL; 8702c00b3aSBen Gardon 8802c00b3aSBen Gardon return role; 8902c00b3aSBen Gardon } 9002c00b3aSBen Gardon 9102c00b3aSBen Gardon static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 9202c00b3aSBen Gardon int level) 9302c00b3aSBen Gardon { 9402c00b3aSBen Gardon struct kvm_mmu_page *sp; 9502c00b3aSBen Gardon 9602c00b3aSBen Gardon sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 9702c00b3aSBen Gardon sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 9802c00b3aSBen Gardon set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 9902c00b3aSBen Gardon 10002c00b3aSBen Gardon sp->role.word = page_role_for_level(vcpu, level).word; 10102c00b3aSBen Gardon sp->gfn = gfn; 10202c00b3aSBen Gardon sp->tdp_mmu_page = true; 10302c00b3aSBen Gardon 10402c00b3aSBen Gardon return sp; 10502c00b3aSBen Gardon } 10602c00b3aSBen Gardon 10702c00b3aSBen Gardon static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 10802c00b3aSBen Gardon { 10902c00b3aSBen Gardon union kvm_mmu_page_role role; 11002c00b3aSBen Gardon struct kvm *kvm = vcpu->kvm; 11102c00b3aSBen Gardon struct kvm_mmu_page *root; 11202c00b3aSBen Gardon 11302c00b3aSBen Gardon role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 11402c00b3aSBen Gardon 11502c00b3aSBen Gardon spin_lock(&kvm->mmu_lock); 11602c00b3aSBen Gardon 11702c00b3aSBen Gardon /* Check for an existing root before allocating a new one. */ 11802c00b3aSBen Gardon for_each_tdp_mmu_root(kvm, root) { 11902c00b3aSBen Gardon if (root->role.word == role.word) { 12002c00b3aSBen Gardon kvm_mmu_get_root(kvm, root); 12102c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 12202c00b3aSBen Gardon return root; 12302c00b3aSBen Gardon } 12402c00b3aSBen Gardon } 12502c00b3aSBen Gardon 12602c00b3aSBen Gardon root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 12702c00b3aSBen Gardon root->root_count = 1; 12802c00b3aSBen Gardon 12902c00b3aSBen Gardon list_add(&root->link, &kvm->arch.tdp_mmu_roots); 13002c00b3aSBen Gardon 13102c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 13202c00b3aSBen Gardon 13302c00b3aSBen Gardon return root; 13402c00b3aSBen Gardon } 13502c00b3aSBen Gardon 13602c00b3aSBen Gardon hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 13702c00b3aSBen Gardon { 13802c00b3aSBen Gardon struct kvm_mmu_page *root; 13902c00b3aSBen Gardon 14002c00b3aSBen Gardon root = get_tdp_mmu_vcpu_root(vcpu); 14102c00b3aSBen Gardon if (!root) 14202c00b3aSBen Gardon return INVALID_PAGE; 14302c00b3aSBen Gardon 14402c00b3aSBen Gardon return __pa(root->spt); 145fe5db27dSBen Gardon } 1462f2fad08SBen Gardon 1472f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1482f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level); 1492f2fad08SBen Gardon 150faaf05b0SBen Gardon static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 151faaf05b0SBen Gardon { 152faaf05b0SBen Gardon return sp->role.smm ? 1 : 0; 153faaf05b0SBen Gardon } 154faaf05b0SBen Gardon 155f8e14497SBen Gardon static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 156f8e14497SBen Gardon { 157f8e14497SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 158f8e14497SBen Gardon 159f8e14497SBen Gardon if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level)) 160f8e14497SBen Gardon return; 161f8e14497SBen Gardon 162f8e14497SBen Gardon if (is_accessed_spte(old_spte) && 163f8e14497SBen Gardon (!is_accessed_spte(new_spte) || pfn_changed)) 164f8e14497SBen Gardon kvm_set_pfn_accessed(spte_to_pfn(old_spte)); 165f8e14497SBen Gardon } 166f8e14497SBen Gardon 167a6a0b05dSBen Gardon static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, 168a6a0b05dSBen Gardon u64 old_spte, u64 new_spte, int level) 169a6a0b05dSBen Gardon { 170a6a0b05dSBen Gardon bool pfn_changed; 171a6a0b05dSBen Gardon struct kvm_memory_slot *slot; 172a6a0b05dSBen Gardon 173a6a0b05dSBen Gardon if (level > PG_LEVEL_4K) 174a6a0b05dSBen Gardon return; 175a6a0b05dSBen Gardon 176a6a0b05dSBen Gardon pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 177a6a0b05dSBen Gardon 178a6a0b05dSBen Gardon if ((!is_writable_pte(old_spte) || pfn_changed) && 179a6a0b05dSBen Gardon is_writable_pte(new_spte)) { 180a6a0b05dSBen Gardon slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); 181a6a0b05dSBen Gardon mark_page_dirty_in_slot(slot, gfn); 182a6a0b05dSBen Gardon } 183a6a0b05dSBen Gardon } 184a6a0b05dSBen Gardon 1852f2fad08SBen Gardon /** 1862f2fad08SBen Gardon * handle_changed_spte - handle bookkeeping associated with an SPTE change 1872f2fad08SBen Gardon * @kvm: kvm instance 1882f2fad08SBen Gardon * @as_id: the address space of the paging structure the SPTE was a part of 1892f2fad08SBen Gardon * @gfn: the base GFN that was mapped by the SPTE 1902f2fad08SBen Gardon * @old_spte: The value of the SPTE before the change 1912f2fad08SBen Gardon * @new_spte: The value of the SPTE after the change 1922f2fad08SBen Gardon * @level: the level of the PT the SPTE is part of in the paging structure 1932f2fad08SBen Gardon * 1942f2fad08SBen Gardon * Handle bookkeeping that might result from the modification of a SPTE. 1952f2fad08SBen Gardon * This function must be called for all TDP SPTE modifications. 1962f2fad08SBen Gardon */ 1972f2fad08SBen Gardon static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1982f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 1992f2fad08SBen Gardon { 2002f2fad08SBen Gardon bool was_present = is_shadow_present_pte(old_spte); 2012f2fad08SBen Gardon bool is_present = is_shadow_present_pte(new_spte); 2022f2fad08SBen Gardon bool was_leaf = was_present && is_last_spte(old_spte, level); 2032f2fad08SBen Gardon bool is_leaf = is_present && is_last_spte(new_spte, level); 2042f2fad08SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2052f2fad08SBen Gardon u64 *pt; 20689c0fd49SBen Gardon struct kvm_mmu_page *sp; 2072f2fad08SBen Gardon u64 old_child_spte; 2082f2fad08SBen Gardon int i; 2092f2fad08SBen Gardon 2102f2fad08SBen Gardon WARN_ON(level > PT64_ROOT_MAX_LEVEL); 2112f2fad08SBen Gardon WARN_ON(level < PG_LEVEL_4K); 2122f2fad08SBen Gardon WARN_ON(gfn % KVM_PAGES_PER_HPAGE(level)); 2132f2fad08SBen Gardon 2142f2fad08SBen Gardon /* 2152f2fad08SBen Gardon * If this warning were to trigger it would indicate that there was a 2162f2fad08SBen Gardon * missing MMU notifier or a race with some notifier handler. 2172f2fad08SBen Gardon * A present, leaf SPTE should never be directly replaced with another 2182f2fad08SBen Gardon * present leaf SPTE pointing to a differnt PFN. A notifier handler 2192f2fad08SBen Gardon * should be zapping the SPTE before the main MM's page table is 2202f2fad08SBen Gardon * changed, or the SPTE should be zeroed, and the TLBs flushed by the 2212f2fad08SBen Gardon * thread before replacement. 2222f2fad08SBen Gardon */ 2232f2fad08SBen Gardon if (was_leaf && is_leaf && pfn_changed) { 2242f2fad08SBen Gardon pr_err("Invalid SPTE change: cannot replace a present leaf\n" 2252f2fad08SBen Gardon "SPTE with another present leaf SPTE mapping a\n" 2262f2fad08SBen Gardon "different PFN!\n" 2272f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2282f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2292f2fad08SBen Gardon 2302f2fad08SBen Gardon /* 2312f2fad08SBen Gardon * Crash the host to prevent error propagation and guest data 2322f2fad08SBen Gardon * courruption. 2332f2fad08SBen Gardon */ 2342f2fad08SBen Gardon BUG(); 2352f2fad08SBen Gardon } 2362f2fad08SBen Gardon 2372f2fad08SBen Gardon if (old_spte == new_spte) 2382f2fad08SBen Gardon return; 2392f2fad08SBen Gardon 2402f2fad08SBen Gardon /* 2412f2fad08SBen Gardon * The only times a SPTE should be changed from a non-present to 2422f2fad08SBen Gardon * non-present state is when an MMIO entry is installed/modified/ 2432f2fad08SBen Gardon * removed. In that case, there is nothing to do here. 2442f2fad08SBen Gardon */ 2452f2fad08SBen Gardon if (!was_present && !is_present) { 2462f2fad08SBen Gardon /* 2472f2fad08SBen Gardon * If this change does not involve a MMIO SPTE, it is 2482f2fad08SBen Gardon * unexpected. Log the change, though it should not impact the 2492f2fad08SBen Gardon * guest since both the former and current SPTEs are nonpresent. 2502f2fad08SBen Gardon */ 2512f2fad08SBen Gardon if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 2522f2fad08SBen Gardon pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 2532f2fad08SBen Gardon "should not be replaced with another,\n" 2542f2fad08SBen Gardon "different nonpresent SPTE, unless one or both\n" 2552f2fad08SBen Gardon "are MMIO SPTEs.\n" 2562f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2572f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2582f2fad08SBen Gardon return; 2592f2fad08SBen Gardon } 2602f2fad08SBen Gardon 2612f2fad08SBen Gardon 2622f2fad08SBen Gardon if (was_leaf && is_dirty_spte(old_spte) && 2632f2fad08SBen Gardon (!is_dirty_spte(new_spte) || pfn_changed)) 2642f2fad08SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 2652f2fad08SBen Gardon 2662f2fad08SBen Gardon /* 2672f2fad08SBen Gardon * Recursively handle child PTs if the change removed a subtree from 2682f2fad08SBen Gardon * the paging structure. 2692f2fad08SBen Gardon */ 2702f2fad08SBen Gardon if (was_present && !was_leaf && (pfn_changed || !is_present)) { 2712f2fad08SBen Gardon pt = spte_to_child_pt(old_spte, level); 27289c0fd49SBen Gardon sp = sptep_to_sp(pt); 27389c0fd49SBen Gardon 27489c0fd49SBen Gardon list_del(&sp->link); 2752f2fad08SBen Gardon 2762f2fad08SBen Gardon for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 2772f2fad08SBen Gardon old_child_spte = READ_ONCE(*(pt + i)); 2782f2fad08SBen Gardon WRITE_ONCE(*(pt + i), 0); 2792f2fad08SBen Gardon handle_changed_spte(kvm, as_id, 2802f2fad08SBen Gardon gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 2812f2fad08SBen Gardon old_child_spte, 0, level - 1); 2822f2fad08SBen Gardon } 2832f2fad08SBen Gardon 2842f2fad08SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 2852f2fad08SBen Gardon KVM_PAGES_PER_HPAGE(level)); 2862f2fad08SBen Gardon 2872f2fad08SBen Gardon free_page((unsigned long)pt); 28889c0fd49SBen Gardon kmem_cache_free(mmu_page_header_cache, sp); 2892f2fad08SBen Gardon } 2902f2fad08SBen Gardon } 2912f2fad08SBen Gardon 2922f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2932f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 2942f2fad08SBen Gardon { 2952f2fad08SBen Gardon __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 296f8e14497SBen Gardon handle_changed_spte_acc_track(old_spte, new_spte, level); 297a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, 298a6a0b05dSBen Gardon new_spte, level); 2992f2fad08SBen Gardon } 300faaf05b0SBen Gardon 301f8e14497SBen Gardon static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 302a6a0b05dSBen Gardon u64 new_spte, bool record_acc_track, 303a6a0b05dSBen Gardon bool record_dirty_log) 304faaf05b0SBen Gardon { 305faaf05b0SBen Gardon u64 *root_pt = tdp_iter_root_pt(iter); 306faaf05b0SBen Gardon struct kvm_mmu_page *root = sptep_to_sp(root_pt); 307faaf05b0SBen Gardon int as_id = kvm_mmu_page_as_id(root); 308faaf05b0SBen Gardon 309f8e14497SBen Gardon WRITE_ONCE(*iter->sptep, new_spte); 310faaf05b0SBen Gardon 311f8e14497SBen Gardon __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 312faaf05b0SBen Gardon iter->level); 313f8e14497SBen Gardon if (record_acc_track) 314f8e14497SBen Gardon handle_changed_spte_acc_track(iter->old_spte, new_spte, 315f8e14497SBen Gardon iter->level); 316a6a0b05dSBen Gardon if (record_dirty_log) 317a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, 318a6a0b05dSBen Gardon iter->old_spte, new_spte, 319a6a0b05dSBen Gardon iter->level); 320f8e14497SBen Gardon } 321f8e14497SBen Gardon 322f8e14497SBen Gardon static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 323f8e14497SBen Gardon u64 new_spte) 324f8e14497SBen Gardon { 325a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, true); 326f8e14497SBen Gardon } 327f8e14497SBen Gardon 328f8e14497SBen Gardon static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm, 329f8e14497SBen Gardon struct tdp_iter *iter, 330f8e14497SBen Gardon u64 new_spte) 331f8e14497SBen Gardon { 332a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, false, true); 333a6a0b05dSBen Gardon } 334a6a0b05dSBen Gardon 335a6a0b05dSBen Gardon static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, 336a6a0b05dSBen Gardon struct tdp_iter *iter, 337a6a0b05dSBen Gardon u64 new_spte) 338a6a0b05dSBen Gardon { 339a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, false); 340faaf05b0SBen Gardon } 341faaf05b0SBen Gardon 342faaf05b0SBen Gardon #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 343faaf05b0SBen Gardon for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 344faaf05b0SBen Gardon 345f8e14497SBen Gardon #define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ 346f8e14497SBen Gardon tdp_root_for_each_pte(_iter, _root, _start, _end) \ 347f8e14497SBen Gardon if (!is_shadow_present_pte(_iter.old_spte) || \ 348f8e14497SBen Gardon !is_last_spte(_iter.old_spte, _iter.level)) \ 349f8e14497SBen Gardon continue; \ 350f8e14497SBen Gardon else 351f8e14497SBen Gardon 352bb18842eSBen Gardon #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 353bb18842eSBen Gardon for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 354bb18842eSBen Gardon _mmu->shadow_root_level, _start, _end) 355bb18842eSBen Gardon 356faaf05b0SBen Gardon /* 357faaf05b0SBen Gardon * Flush the TLB if the process should drop kvm->mmu_lock. 358faaf05b0SBen Gardon * Return whether the caller still needs to flush the tlb. 359faaf05b0SBen Gardon */ 360faaf05b0SBen Gardon static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 361faaf05b0SBen Gardon { 362faaf05b0SBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 363faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 364faaf05b0SBen Gardon cond_resched_lock(&kvm->mmu_lock); 365faaf05b0SBen Gardon tdp_iter_refresh_walk(iter); 366faaf05b0SBen Gardon return false; 367faaf05b0SBen Gardon } else { 368faaf05b0SBen Gardon return true; 369faaf05b0SBen Gardon } 370faaf05b0SBen Gardon } 371faaf05b0SBen Gardon 372a6a0b05dSBen Gardon static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 373a6a0b05dSBen Gardon { 374a6a0b05dSBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 375a6a0b05dSBen Gardon cond_resched_lock(&kvm->mmu_lock); 376a6a0b05dSBen Gardon tdp_iter_refresh_walk(iter); 377a6a0b05dSBen Gardon } 378a6a0b05dSBen Gardon } 379a6a0b05dSBen Gardon 380faaf05b0SBen Gardon /* 381faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 382faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 383faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 384faaf05b0SBen Gardon * MMU lock. 385063afacdSBen Gardon * If can_yield is true, will release the MMU lock and reschedule if the 386063afacdSBen Gardon * scheduler needs the CPU or there is contention on the MMU lock. If this 387063afacdSBen Gardon * function cannot yield, it will not release the MMU lock or reschedule and 388063afacdSBen Gardon * the caller must ensure it does not supply too large a GFN range, or the 389063afacdSBen Gardon * operation can cause a soft lockup. 390faaf05b0SBen Gardon */ 391faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 392063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield) 393faaf05b0SBen Gardon { 394faaf05b0SBen Gardon struct tdp_iter iter; 395faaf05b0SBen Gardon bool flush_needed = false; 396faaf05b0SBen Gardon 397faaf05b0SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 398faaf05b0SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 399faaf05b0SBen Gardon continue; 400faaf05b0SBen Gardon 401faaf05b0SBen Gardon /* 402faaf05b0SBen Gardon * If this is a non-last-level SPTE that covers a larger range 403faaf05b0SBen Gardon * than should be zapped, continue, and zap the mappings at a 404faaf05b0SBen Gardon * lower level. 405faaf05b0SBen Gardon */ 406faaf05b0SBen Gardon if ((iter.gfn < start || 407faaf05b0SBen Gardon iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 408faaf05b0SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 409faaf05b0SBen Gardon continue; 410faaf05b0SBen Gardon 411faaf05b0SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 412faaf05b0SBen Gardon 413063afacdSBen Gardon if (can_yield) 414faaf05b0SBen Gardon flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 415063afacdSBen Gardon else 416063afacdSBen Gardon flush_needed = true; 417faaf05b0SBen Gardon } 418faaf05b0SBen Gardon return flush_needed; 419faaf05b0SBen Gardon } 420faaf05b0SBen Gardon 421faaf05b0SBen Gardon /* 422faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 423faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 424faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 425faaf05b0SBen Gardon * MMU lock. 426faaf05b0SBen Gardon */ 427faaf05b0SBen Gardon bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) 428faaf05b0SBen Gardon { 429faaf05b0SBen Gardon struct kvm_mmu_page *root; 430faaf05b0SBen Gardon bool flush = false; 431faaf05b0SBen Gardon 432faaf05b0SBen Gardon for_each_tdp_mmu_root(kvm, root) { 433faaf05b0SBen Gardon /* 434faaf05b0SBen Gardon * Take a reference on the root so that it cannot be freed if 435faaf05b0SBen Gardon * this thread releases the MMU lock and yields in this loop. 436faaf05b0SBen Gardon */ 437faaf05b0SBen Gardon kvm_mmu_get_root(kvm, root); 438faaf05b0SBen Gardon 439063afacdSBen Gardon flush |= zap_gfn_range(kvm, root, start, end, true); 440faaf05b0SBen Gardon 441faaf05b0SBen Gardon kvm_mmu_put_root(kvm, root); 442faaf05b0SBen Gardon } 443faaf05b0SBen Gardon 444faaf05b0SBen Gardon return flush; 445faaf05b0SBen Gardon } 446faaf05b0SBen Gardon 447faaf05b0SBen Gardon void kvm_tdp_mmu_zap_all(struct kvm *kvm) 448faaf05b0SBen Gardon { 449faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 450faaf05b0SBen Gardon bool flush; 451faaf05b0SBen Gardon 452faaf05b0SBen Gardon flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 453faaf05b0SBen Gardon if (flush) 454faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 455faaf05b0SBen Gardon } 456bb18842eSBen Gardon 457bb18842eSBen Gardon /* 458bb18842eSBen Gardon * Installs a last-level SPTE to handle a TDP page fault. 459bb18842eSBen Gardon * (NPT/EPT violation/misconfiguration) 460bb18842eSBen Gardon */ 461bb18842eSBen Gardon static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 462bb18842eSBen Gardon int map_writable, 463bb18842eSBen Gardon struct tdp_iter *iter, 464bb18842eSBen Gardon kvm_pfn_t pfn, bool prefault) 465bb18842eSBen Gardon { 466bb18842eSBen Gardon u64 new_spte; 467bb18842eSBen Gardon int ret = 0; 468bb18842eSBen Gardon int make_spte_ret = 0; 469bb18842eSBen Gardon 470bb18842eSBen Gardon if (unlikely(is_noslot_pfn(pfn))) { 471bb18842eSBen Gardon new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 472bb18842eSBen Gardon trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 473bb18842eSBen Gardon } else 474bb18842eSBen Gardon make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 475bb18842eSBen Gardon pfn, iter->old_spte, prefault, true, 476bb18842eSBen Gardon map_writable, !shadow_accessed_mask, 477bb18842eSBen Gardon &new_spte); 478bb18842eSBen Gardon 479bb18842eSBen Gardon if (new_spte == iter->old_spte) 480bb18842eSBen Gardon ret = RET_PF_SPURIOUS; 481bb18842eSBen Gardon else 482bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 483bb18842eSBen Gardon 484bb18842eSBen Gardon /* 485bb18842eSBen Gardon * If the page fault was caused by a write but the page is write 486bb18842eSBen Gardon * protected, emulation is needed. If the emulation was skipped, 487bb18842eSBen Gardon * the vCPU would have the same fault again. 488bb18842eSBen Gardon */ 489bb18842eSBen Gardon if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 490bb18842eSBen Gardon if (write) 491bb18842eSBen Gardon ret = RET_PF_EMULATE; 492bb18842eSBen Gardon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 493bb18842eSBen Gardon } 494bb18842eSBen Gardon 495bb18842eSBen Gardon /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 496bb18842eSBen Gardon if (unlikely(is_mmio_spte(new_spte))) 497bb18842eSBen Gardon ret = RET_PF_EMULATE; 498bb18842eSBen Gardon 499bb18842eSBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 500bb18842eSBen Gardon if (!prefault) 501bb18842eSBen Gardon vcpu->stat.pf_fixed++; 502bb18842eSBen Gardon 503bb18842eSBen Gardon return ret; 504bb18842eSBen Gardon } 505bb18842eSBen Gardon 506bb18842eSBen Gardon /* 507bb18842eSBen Gardon * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 508bb18842eSBen Gardon * page tables and SPTEs to translate the faulting guest physical address. 509bb18842eSBen Gardon */ 510bb18842eSBen Gardon int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 511bb18842eSBen Gardon int map_writable, int max_level, kvm_pfn_t pfn, 512bb18842eSBen Gardon bool prefault) 513bb18842eSBen Gardon { 514bb18842eSBen Gardon bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 515bb18842eSBen Gardon bool write = error_code & PFERR_WRITE_MASK; 516bb18842eSBen Gardon bool exec = error_code & PFERR_FETCH_MASK; 517bb18842eSBen Gardon bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 518bb18842eSBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 519bb18842eSBen Gardon struct tdp_iter iter; 52089c0fd49SBen Gardon struct kvm_mmu_page *sp; 521bb18842eSBen Gardon u64 *child_pt; 522bb18842eSBen Gardon u64 new_spte; 523bb18842eSBen Gardon int ret; 524bb18842eSBen Gardon gfn_t gfn = gpa >> PAGE_SHIFT; 525bb18842eSBen Gardon int level; 526bb18842eSBen Gardon int req_level; 527bb18842eSBen Gardon 528bb18842eSBen Gardon if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 529bb18842eSBen Gardon return RET_PF_RETRY; 530bb18842eSBen Gardon if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 531bb18842eSBen Gardon return RET_PF_RETRY; 532bb18842eSBen Gardon 533bb18842eSBen Gardon level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 534bb18842eSBen Gardon huge_page_disallowed, &req_level); 535bb18842eSBen Gardon 536bb18842eSBen Gardon trace_kvm_mmu_spte_requested(gpa, level, pfn); 537bb18842eSBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 538bb18842eSBen Gardon if (nx_huge_page_workaround_enabled) 539bb18842eSBen Gardon disallowed_hugepage_adjust(iter.old_spte, gfn, 540bb18842eSBen Gardon iter.level, &pfn, &level); 541bb18842eSBen Gardon 542bb18842eSBen Gardon if (iter.level == level) 543bb18842eSBen Gardon break; 544bb18842eSBen Gardon 545bb18842eSBen Gardon /* 546bb18842eSBen Gardon * If there is an SPTE mapping a large page at a higher level 547bb18842eSBen Gardon * than the target, that SPTE must be cleared and replaced 548bb18842eSBen Gardon * with a non-leaf SPTE. 549bb18842eSBen Gardon */ 550bb18842eSBen Gardon if (is_shadow_present_pte(iter.old_spte) && 551bb18842eSBen Gardon is_large_pte(iter.old_spte)) { 552bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 553bb18842eSBen Gardon 554bb18842eSBen Gardon kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 555bb18842eSBen Gardon KVM_PAGES_PER_HPAGE(iter.level)); 556bb18842eSBen Gardon 557bb18842eSBen Gardon /* 558bb18842eSBen Gardon * The iter must explicitly re-read the spte here 559bb18842eSBen Gardon * because the new value informs the !present 560bb18842eSBen Gardon * path below. 561bb18842eSBen Gardon */ 562bb18842eSBen Gardon iter.old_spte = READ_ONCE(*iter.sptep); 563bb18842eSBen Gardon } 564bb18842eSBen Gardon 565bb18842eSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) { 56689c0fd49SBen Gardon sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 56789c0fd49SBen Gardon list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 56889c0fd49SBen Gardon child_pt = sp->spt; 569bb18842eSBen Gardon clear_page(child_pt); 570bb18842eSBen Gardon new_spte = make_nonleaf_spte(child_pt, 571bb18842eSBen Gardon !shadow_accessed_mask); 572bb18842eSBen Gardon 573bb18842eSBen Gardon trace_kvm_mmu_get_page(sp, true); 574bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 575bb18842eSBen Gardon } 576bb18842eSBen Gardon } 577bb18842eSBen Gardon 578bb18842eSBen Gardon if (WARN_ON(iter.level != level)) 579bb18842eSBen Gardon return RET_PF_RETRY; 580bb18842eSBen Gardon 581bb18842eSBen Gardon ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 582bb18842eSBen Gardon pfn, prefault); 583bb18842eSBen Gardon 584bb18842eSBen Gardon return ret; 585bb18842eSBen Gardon } 586063afacdSBen Gardon 587063afacdSBen Gardon static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, 588063afacdSBen Gardon unsigned long end, unsigned long data, 589063afacdSBen Gardon int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, 590063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 591063afacdSBen Gardon gfn_t end, unsigned long data)) 592063afacdSBen Gardon { 593063afacdSBen Gardon struct kvm_memslots *slots; 594063afacdSBen Gardon struct kvm_memory_slot *memslot; 595063afacdSBen Gardon struct kvm_mmu_page *root; 596063afacdSBen Gardon int ret = 0; 597063afacdSBen Gardon int as_id; 598063afacdSBen Gardon 599063afacdSBen Gardon for_each_tdp_mmu_root(kvm, root) { 600063afacdSBen Gardon /* 601063afacdSBen Gardon * Take a reference on the root so that it cannot be freed if 602063afacdSBen Gardon * this thread releases the MMU lock and yields in this loop. 603063afacdSBen Gardon */ 604063afacdSBen Gardon kvm_mmu_get_root(kvm, root); 605063afacdSBen Gardon 606063afacdSBen Gardon as_id = kvm_mmu_page_as_id(root); 607063afacdSBen Gardon slots = __kvm_memslots(kvm, as_id); 608063afacdSBen Gardon kvm_for_each_memslot(memslot, slots) { 609063afacdSBen Gardon unsigned long hva_start, hva_end; 610063afacdSBen Gardon gfn_t gfn_start, gfn_end; 611063afacdSBen Gardon 612063afacdSBen Gardon hva_start = max(start, memslot->userspace_addr); 613063afacdSBen Gardon hva_end = min(end, memslot->userspace_addr + 614063afacdSBen Gardon (memslot->npages << PAGE_SHIFT)); 615063afacdSBen Gardon if (hva_start >= hva_end) 616063afacdSBen Gardon continue; 617063afacdSBen Gardon /* 618063afacdSBen Gardon * {gfn(page) | page intersects with [hva_start, hva_end)} = 619063afacdSBen Gardon * {gfn_start, gfn_start+1, ..., gfn_end-1}. 620063afacdSBen Gardon */ 621063afacdSBen Gardon gfn_start = hva_to_gfn_memslot(hva_start, memslot); 622063afacdSBen Gardon gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 623063afacdSBen Gardon 624063afacdSBen Gardon ret |= handler(kvm, memslot, root, gfn_start, 625063afacdSBen Gardon gfn_end, data); 626063afacdSBen Gardon } 627063afacdSBen Gardon 628063afacdSBen Gardon kvm_mmu_put_root(kvm, root); 629063afacdSBen Gardon } 630063afacdSBen Gardon 631063afacdSBen Gardon return ret; 632063afacdSBen Gardon } 633063afacdSBen Gardon 634063afacdSBen Gardon static int zap_gfn_range_hva_wrapper(struct kvm *kvm, 635063afacdSBen Gardon struct kvm_memory_slot *slot, 636063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 637063afacdSBen Gardon gfn_t end, unsigned long unused) 638063afacdSBen Gardon { 639063afacdSBen Gardon return zap_gfn_range(kvm, root, start, end, false); 640063afacdSBen Gardon } 641063afacdSBen Gardon 642063afacdSBen Gardon int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, 643063afacdSBen Gardon unsigned long end) 644063afacdSBen Gardon { 645063afacdSBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 646063afacdSBen Gardon zap_gfn_range_hva_wrapper); 647063afacdSBen Gardon } 648f8e14497SBen Gardon 649f8e14497SBen Gardon /* 650f8e14497SBen Gardon * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero 651f8e14497SBen Gardon * if any of the GFNs in the range have been accessed. 652f8e14497SBen Gardon */ 653f8e14497SBen Gardon static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, 654f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t start, gfn_t end, 655f8e14497SBen Gardon unsigned long unused) 656f8e14497SBen Gardon { 657f8e14497SBen Gardon struct tdp_iter iter; 658f8e14497SBen Gardon int young = 0; 659f8e14497SBen Gardon u64 new_spte = 0; 660f8e14497SBen Gardon 661f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 662f8e14497SBen Gardon /* 663f8e14497SBen Gardon * If we have a non-accessed entry we don't need to change the 664f8e14497SBen Gardon * pte. 665f8e14497SBen Gardon */ 666f8e14497SBen Gardon if (!is_accessed_spte(iter.old_spte)) 667f8e14497SBen Gardon continue; 668f8e14497SBen Gardon 669f8e14497SBen Gardon new_spte = iter.old_spte; 670f8e14497SBen Gardon 671f8e14497SBen Gardon if (spte_ad_enabled(new_spte)) { 672f8e14497SBen Gardon clear_bit((ffs(shadow_accessed_mask) - 1), 673f8e14497SBen Gardon (unsigned long *)&new_spte); 674f8e14497SBen Gardon } else { 675f8e14497SBen Gardon /* 676f8e14497SBen Gardon * Capture the dirty status of the page, so that it doesn't get 677f8e14497SBen Gardon * lost when the SPTE is marked for access tracking. 678f8e14497SBen Gardon */ 679f8e14497SBen Gardon if (is_writable_pte(new_spte)) 680f8e14497SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(new_spte)); 681f8e14497SBen Gardon 682f8e14497SBen Gardon new_spte = mark_spte_for_access_track(new_spte); 683f8e14497SBen Gardon } 684a6a0b05dSBen Gardon new_spte &= ~shadow_dirty_mask; 685f8e14497SBen Gardon 686f8e14497SBen Gardon tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); 687f8e14497SBen Gardon young = 1; 688f8e14497SBen Gardon } 689f8e14497SBen Gardon 690f8e14497SBen Gardon return young; 691f8e14497SBen Gardon } 692f8e14497SBen Gardon 693f8e14497SBen Gardon int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, 694f8e14497SBen Gardon unsigned long end) 695f8e14497SBen Gardon { 696f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 697f8e14497SBen Gardon age_gfn_range); 698f8e14497SBen Gardon } 699f8e14497SBen Gardon 700f8e14497SBen Gardon static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 701f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 702f8e14497SBen Gardon unsigned long unused2) 703f8e14497SBen Gardon { 704f8e14497SBen Gardon struct tdp_iter iter; 705f8e14497SBen Gardon 706f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) 707f8e14497SBen Gardon if (is_accessed_spte(iter.old_spte)) 708f8e14497SBen Gardon return 1; 709f8e14497SBen Gardon 710f8e14497SBen Gardon return 0; 711f8e14497SBen Gardon } 712f8e14497SBen Gardon 713f8e14497SBen Gardon int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva) 714f8e14497SBen Gardon { 715f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0, 716f8e14497SBen Gardon test_age_gfn); 717f8e14497SBen Gardon } 7181d8dd6b3SBen Gardon 7191d8dd6b3SBen Gardon /* 7201d8dd6b3SBen Gardon * Handle the changed_pte MMU notifier for the TDP MMU. 7211d8dd6b3SBen Gardon * data is a pointer to the new pte_t mapping the HVA specified by the MMU 7221d8dd6b3SBen Gardon * notifier. 7231d8dd6b3SBen Gardon * Returns non-zero if a flush is needed before releasing the MMU lock. 7241d8dd6b3SBen Gardon */ 7251d8dd6b3SBen Gardon static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, 7261d8dd6b3SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 7271d8dd6b3SBen Gardon unsigned long data) 7281d8dd6b3SBen Gardon { 7291d8dd6b3SBen Gardon struct tdp_iter iter; 7301d8dd6b3SBen Gardon pte_t *ptep = (pte_t *)data; 7311d8dd6b3SBen Gardon kvm_pfn_t new_pfn; 7321d8dd6b3SBen Gardon u64 new_spte; 7331d8dd6b3SBen Gardon int need_flush = 0; 7341d8dd6b3SBen Gardon 7351d8dd6b3SBen Gardon WARN_ON(pte_huge(*ptep)); 7361d8dd6b3SBen Gardon 7371d8dd6b3SBen Gardon new_pfn = pte_pfn(*ptep); 7381d8dd6b3SBen Gardon 7391d8dd6b3SBen Gardon tdp_root_for_each_pte(iter, root, gfn, gfn + 1) { 7401d8dd6b3SBen Gardon if (iter.level != PG_LEVEL_4K) 7411d8dd6b3SBen Gardon continue; 7421d8dd6b3SBen Gardon 7431d8dd6b3SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 7441d8dd6b3SBen Gardon break; 7451d8dd6b3SBen Gardon 7461d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 7471d8dd6b3SBen Gardon 7481d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1); 7491d8dd6b3SBen Gardon 7501d8dd6b3SBen Gardon if (!pte_write(*ptep)) { 7511d8dd6b3SBen Gardon new_spte = kvm_mmu_changed_pte_notifier_make_spte( 7521d8dd6b3SBen Gardon iter.old_spte, new_pfn); 7531d8dd6b3SBen Gardon 7541d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 7551d8dd6b3SBen Gardon } 7561d8dd6b3SBen Gardon 7571d8dd6b3SBen Gardon need_flush = 1; 7581d8dd6b3SBen Gardon } 7591d8dd6b3SBen Gardon 7601d8dd6b3SBen Gardon if (need_flush) 7611d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); 7621d8dd6b3SBen Gardon 7631d8dd6b3SBen Gardon return 0; 7641d8dd6b3SBen Gardon } 7651d8dd6b3SBen Gardon 7661d8dd6b3SBen Gardon int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address, 7671d8dd6b3SBen Gardon pte_t *host_ptep) 7681d8dd6b3SBen Gardon { 7691d8dd6b3SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1, 7701d8dd6b3SBen Gardon (unsigned long)host_ptep, 7711d8dd6b3SBen Gardon set_tdp_spte); 7721d8dd6b3SBen Gardon } 7731d8dd6b3SBen Gardon 774a6a0b05dSBen Gardon /* 775a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs [start, end). If 776a6a0b05dSBen Gardon * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. 777a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 778a6a0b05dSBen Gardon */ 779a6a0b05dSBen Gardon static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 780a6a0b05dSBen Gardon gfn_t start, gfn_t end, int min_level) 781a6a0b05dSBen Gardon { 782a6a0b05dSBen Gardon struct tdp_iter iter; 783a6a0b05dSBen Gardon u64 new_spte; 784a6a0b05dSBen Gardon bool spte_set = false; 785a6a0b05dSBen Gardon 786a6a0b05dSBen Gardon BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); 787a6a0b05dSBen Gardon 788a6a0b05dSBen Gardon for_each_tdp_pte_min_level(iter, root->spt, root->role.level, 789a6a0b05dSBen Gardon min_level, start, end) { 790a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 791a6a0b05dSBen Gardon !is_last_spte(iter.old_spte, iter.level)) 792a6a0b05dSBen Gardon continue; 793a6a0b05dSBen Gardon 794a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 795a6a0b05dSBen Gardon 796a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 797a6a0b05dSBen Gardon spte_set = true; 798a6a0b05dSBen Gardon 799a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 800a6a0b05dSBen Gardon } 801a6a0b05dSBen Gardon return spte_set; 802a6a0b05dSBen Gardon } 803a6a0b05dSBen Gardon 804a6a0b05dSBen Gardon /* 805a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs in the memslot. Will 806a6a0b05dSBen Gardon * only affect leaf SPTEs down to min_level. 807a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 808a6a0b05dSBen Gardon */ 809a6a0b05dSBen Gardon bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, 810a6a0b05dSBen Gardon int min_level) 811a6a0b05dSBen Gardon { 812a6a0b05dSBen Gardon struct kvm_mmu_page *root; 813a6a0b05dSBen Gardon int root_as_id; 814a6a0b05dSBen Gardon bool spte_set = false; 815a6a0b05dSBen Gardon 816a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 817a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 818a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 819a6a0b05dSBen Gardon continue; 820a6a0b05dSBen Gardon 821a6a0b05dSBen Gardon /* 822a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 823a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 824a6a0b05dSBen Gardon */ 825a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 826a6a0b05dSBen Gardon 827a6a0b05dSBen Gardon spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, 828a6a0b05dSBen Gardon slot->base_gfn + slot->npages, min_level); 829a6a0b05dSBen Gardon 830a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 831a6a0b05dSBen Gardon } 832a6a0b05dSBen Gardon 833a6a0b05dSBen Gardon return spte_set; 834a6a0b05dSBen Gardon } 835a6a0b05dSBen Gardon 836a6a0b05dSBen Gardon /* 837a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 838a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 839a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 840a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 841a6a0b05dSBen Gardon * be flushed. 842a6a0b05dSBen Gardon */ 843a6a0b05dSBen Gardon static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 844a6a0b05dSBen Gardon gfn_t start, gfn_t end) 845a6a0b05dSBen Gardon { 846a6a0b05dSBen Gardon struct tdp_iter iter; 847a6a0b05dSBen Gardon u64 new_spte; 848a6a0b05dSBen Gardon bool spte_set = false; 849a6a0b05dSBen Gardon 850a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 851a6a0b05dSBen Gardon if (spte_ad_need_write_protect(iter.old_spte)) { 852a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 853a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 854a6a0b05dSBen Gardon else 855a6a0b05dSBen Gardon continue; 856a6a0b05dSBen Gardon } else { 857a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 858a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 859a6a0b05dSBen Gardon else 860a6a0b05dSBen Gardon continue; 861a6a0b05dSBen Gardon } 862a6a0b05dSBen Gardon 863a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 864a6a0b05dSBen Gardon spte_set = true; 865a6a0b05dSBen Gardon 866a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 867a6a0b05dSBen Gardon } 868a6a0b05dSBen Gardon return spte_set; 869a6a0b05dSBen Gardon } 870a6a0b05dSBen Gardon 871a6a0b05dSBen Gardon /* 872a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 873a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 874a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 875a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 876a6a0b05dSBen Gardon * be flushed. 877a6a0b05dSBen Gardon */ 878a6a0b05dSBen Gardon bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 879a6a0b05dSBen Gardon { 880a6a0b05dSBen Gardon struct kvm_mmu_page *root; 881a6a0b05dSBen Gardon int root_as_id; 882a6a0b05dSBen Gardon bool spte_set = false; 883a6a0b05dSBen Gardon 884a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 885a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 886a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 887a6a0b05dSBen Gardon continue; 888a6a0b05dSBen Gardon 889a6a0b05dSBen Gardon /* 890a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 891a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 892a6a0b05dSBen Gardon */ 893a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 894a6a0b05dSBen Gardon 895a6a0b05dSBen Gardon spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, 896a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 897a6a0b05dSBen Gardon 898a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 899a6a0b05dSBen Gardon } 900a6a0b05dSBen Gardon 901a6a0b05dSBen Gardon return spte_set; 902a6a0b05dSBen Gardon } 903a6a0b05dSBen Gardon 904a6a0b05dSBen Gardon /* 905a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 906a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 907a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 908a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 909a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 910a6a0b05dSBen Gardon */ 911a6a0b05dSBen Gardon static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, 912a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, bool wrprot) 913a6a0b05dSBen Gardon { 914a6a0b05dSBen Gardon struct tdp_iter iter; 915a6a0b05dSBen Gardon u64 new_spte; 916a6a0b05dSBen Gardon 917a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), 918a6a0b05dSBen Gardon gfn + BITS_PER_LONG) { 919a6a0b05dSBen Gardon if (!mask) 920a6a0b05dSBen Gardon break; 921a6a0b05dSBen Gardon 922a6a0b05dSBen Gardon if (iter.level > PG_LEVEL_4K || 923a6a0b05dSBen Gardon !(mask & (1UL << (iter.gfn - gfn)))) 924a6a0b05dSBen Gardon continue; 925a6a0b05dSBen Gardon 926a6a0b05dSBen Gardon if (wrprot || spte_ad_need_write_protect(iter.old_spte)) { 927a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 928a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 929a6a0b05dSBen Gardon else 930a6a0b05dSBen Gardon continue; 931a6a0b05dSBen Gardon } else { 932a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 933a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 934a6a0b05dSBen Gardon else 935a6a0b05dSBen Gardon continue; 936a6a0b05dSBen Gardon } 937a6a0b05dSBen Gardon 938a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 939a6a0b05dSBen Gardon 940a6a0b05dSBen Gardon mask &= ~(1UL << (iter.gfn - gfn)); 941a6a0b05dSBen Gardon } 942a6a0b05dSBen Gardon } 943a6a0b05dSBen Gardon 944a6a0b05dSBen Gardon /* 945a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 946a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 947a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 948a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 949a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 950a6a0b05dSBen Gardon */ 951a6a0b05dSBen Gardon void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, 952a6a0b05dSBen Gardon struct kvm_memory_slot *slot, 953a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, 954a6a0b05dSBen Gardon bool wrprot) 955a6a0b05dSBen Gardon { 956a6a0b05dSBen Gardon struct kvm_mmu_page *root; 957a6a0b05dSBen Gardon int root_as_id; 958a6a0b05dSBen Gardon 959a6a0b05dSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 960a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 961a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 962a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 963a6a0b05dSBen Gardon continue; 964a6a0b05dSBen Gardon 965a6a0b05dSBen Gardon clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot); 966a6a0b05dSBen Gardon } 967a6a0b05dSBen Gardon } 968a6a0b05dSBen Gardon 969a6a0b05dSBen Gardon /* 970a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 971a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 972a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 973a6a0b05dSBen Gardon */ 974a6a0b05dSBen Gardon static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 975a6a0b05dSBen Gardon gfn_t start, gfn_t end) 976a6a0b05dSBen Gardon { 977a6a0b05dSBen Gardon struct tdp_iter iter; 978a6a0b05dSBen Gardon u64 new_spte; 979a6a0b05dSBen Gardon bool spte_set = false; 980a6a0b05dSBen Gardon 981a6a0b05dSBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 982a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 983a6a0b05dSBen Gardon continue; 984a6a0b05dSBen Gardon 985a6a0b05dSBen Gardon new_spte = iter.old_spte | shadow_dirty_mask; 986a6a0b05dSBen Gardon 987a6a0b05dSBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 988a6a0b05dSBen Gardon spte_set = true; 989a6a0b05dSBen Gardon 990a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 991a6a0b05dSBen Gardon } 992a6a0b05dSBen Gardon 993a6a0b05dSBen Gardon return spte_set; 994a6a0b05dSBen Gardon } 995a6a0b05dSBen Gardon 996a6a0b05dSBen Gardon /* 997a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 998a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 999a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 1000a6a0b05dSBen Gardon */ 1001a6a0b05dSBen Gardon bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) 1002a6a0b05dSBen Gardon { 1003a6a0b05dSBen Gardon struct kvm_mmu_page *root; 1004a6a0b05dSBen Gardon int root_as_id; 1005a6a0b05dSBen Gardon bool spte_set = false; 1006a6a0b05dSBen Gardon 1007a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 1008a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 1009a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 1010a6a0b05dSBen Gardon continue; 1011a6a0b05dSBen Gardon 1012a6a0b05dSBen Gardon /* 1013a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 1014a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 1015a6a0b05dSBen Gardon */ 1016a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 1017a6a0b05dSBen Gardon 1018a6a0b05dSBen Gardon spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, 1019a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 1020a6a0b05dSBen Gardon 1021a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 1022a6a0b05dSBen Gardon } 1023a6a0b05dSBen Gardon return spte_set; 1024a6a0b05dSBen Gardon } 1025a6a0b05dSBen Gardon 102614881998SBen Gardon /* 102714881998SBen Gardon * Clear non-leaf entries (and free associated page tables) which could 102814881998SBen Gardon * be replaced by large mappings, for GFNs within the slot. 102914881998SBen Gardon */ 103014881998SBen Gardon static void zap_collapsible_spte_range(struct kvm *kvm, 103114881998SBen Gardon struct kvm_mmu_page *root, 103214881998SBen Gardon gfn_t start, gfn_t end) 103314881998SBen Gardon { 103414881998SBen Gardon struct tdp_iter iter; 103514881998SBen Gardon kvm_pfn_t pfn; 103614881998SBen Gardon bool spte_set = false; 103714881998SBen Gardon 103814881998SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 103914881998SBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 104014881998SBen Gardon is_last_spte(iter.old_spte, iter.level)) 104114881998SBen Gardon continue; 104214881998SBen Gardon 104314881998SBen Gardon pfn = spte_to_pfn(iter.old_spte); 104414881998SBen Gardon if (kvm_is_reserved_pfn(pfn) || 104514881998SBen Gardon !PageTransCompoundMap(pfn_to_page(pfn))) 104614881998SBen Gardon continue; 104714881998SBen Gardon 104814881998SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 104914881998SBen Gardon 105014881998SBen Gardon spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 105114881998SBen Gardon } 105214881998SBen Gardon 105314881998SBen Gardon if (spte_set) 105414881998SBen Gardon kvm_flush_remote_tlbs(kvm); 105514881998SBen Gardon } 105614881998SBen Gardon 105714881998SBen Gardon /* 105814881998SBen Gardon * Clear non-leaf entries (and free associated page tables) which could 105914881998SBen Gardon * be replaced by large mappings, for GFNs within the slot. 106014881998SBen Gardon */ 106114881998SBen Gardon void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, 106214881998SBen Gardon const struct kvm_memory_slot *slot) 106314881998SBen Gardon { 106414881998SBen Gardon struct kvm_mmu_page *root; 106514881998SBen Gardon int root_as_id; 106614881998SBen Gardon 106714881998SBen Gardon for_each_tdp_mmu_root(kvm, root) { 106814881998SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 106914881998SBen Gardon if (root_as_id != slot->as_id) 107014881998SBen Gardon continue; 107114881998SBen Gardon 107214881998SBen Gardon /* 107314881998SBen Gardon * Take a reference on the root so that it cannot be freed if 107414881998SBen Gardon * this thread releases the MMU lock and yields in this loop. 107514881998SBen Gardon */ 107614881998SBen Gardon kvm_mmu_get_root(kvm, root); 107714881998SBen Gardon 107814881998SBen Gardon zap_collapsible_spte_range(kvm, root, slot->base_gfn, 107914881998SBen Gardon slot->base_gfn + slot->npages); 108014881998SBen Gardon 108114881998SBen Gardon kvm_mmu_put_root(kvm, root); 108214881998SBen Gardon } 108314881998SBen Gardon } 108446044f72SBen Gardon 108546044f72SBen Gardon /* 108646044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 108746044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 108846044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 108946044f72SBen Gardon */ 109046044f72SBen Gardon static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, 109146044f72SBen Gardon gfn_t gfn) 109246044f72SBen Gardon { 109346044f72SBen Gardon struct tdp_iter iter; 109446044f72SBen Gardon u64 new_spte; 109546044f72SBen Gardon bool spte_set = false; 109646044f72SBen Gardon 109746044f72SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { 109846044f72SBen Gardon if (!is_writable_pte(iter.old_spte)) 109946044f72SBen Gardon break; 110046044f72SBen Gardon 110146044f72SBen Gardon new_spte = iter.old_spte & 110246044f72SBen Gardon ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); 110346044f72SBen Gardon 110446044f72SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 110546044f72SBen Gardon spte_set = true; 110646044f72SBen Gardon } 110746044f72SBen Gardon 110846044f72SBen Gardon return spte_set; 110946044f72SBen Gardon } 111046044f72SBen Gardon 111146044f72SBen Gardon /* 111246044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 111346044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 111446044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 111546044f72SBen Gardon */ 111646044f72SBen Gardon bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, 111746044f72SBen Gardon struct kvm_memory_slot *slot, gfn_t gfn) 111846044f72SBen Gardon { 111946044f72SBen Gardon struct kvm_mmu_page *root; 112046044f72SBen Gardon int root_as_id; 112146044f72SBen Gardon bool spte_set = false; 112246044f72SBen Gardon 112346044f72SBen Gardon lockdep_assert_held(&kvm->mmu_lock); 112446044f72SBen Gardon for_each_tdp_mmu_root(kvm, root) { 112546044f72SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 112646044f72SBen Gardon if (root_as_id != slot->as_id) 112746044f72SBen Gardon continue; 112846044f72SBen Gardon 112946044f72SBen Gardon spte_set |= write_protect_gfn(kvm, root, gfn); 113046044f72SBen Gardon } 113146044f72SBen Gardon return spte_set; 113246044f72SBen Gardon } 113346044f72SBen Gardon 113495fb5b02SBen Gardon /* 113595fb5b02SBen Gardon * Return the level of the lowest level SPTE added to sptes. 113695fb5b02SBen Gardon * That SPTE may be non-present. 113795fb5b02SBen Gardon */ 113895fb5b02SBen Gardon int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) 113995fb5b02SBen Gardon { 114095fb5b02SBen Gardon struct tdp_iter iter; 114195fb5b02SBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 114295fb5b02SBen Gardon int leaf = vcpu->arch.mmu->shadow_root_level; 114395fb5b02SBen Gardon gfn_t gfn = addr >> PAGE_SHIFT; 114495fb5b02SBen Gardon 114595fb5b02SBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 114695fb5b02SBen Gardon leaf = iter.level; 114795fb5b02SBen Gardon sptes[leaf - 1] = iter.old_spte; 114895fb5b02SBen Gardon } 114995fb5b02SBen Gardon 115095fb5b02SBen Gardon return leaf; 115195fb5b02SBen Gardon } 1152