1fe5db27dSBen Gardon // SPDX-License-Identifier: GPL-2.0 2fe5db27dSBen Gardon 302c00b3aSBen Gardon #include "mmu.h" 402c00b3aSBen Gardon #include "mmu_internal.h" 5bb18842eSBen Gardon #include "mmutrace.h" 62f2fad08SBen Gardon #include "tdp_iter.h" 7fe5db27dSBen Gardon #include "tdp_mmu.h" 802c00b3aSBen Gardon #include "spte.h" 9fe5db27dSBen Gardon 10*33dd3574SBen Gardon #include <trace/events/kvm.h> 11*33dd3574SBen Gardon 1295fb5b02SBen Gardon #ifdef CONFIG_X86_64 13fe5db27dSBen Gardon static bool __read_mostly tdp_mmu_enabled = false; 1495fb5b02SBen Gardon module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); 1595fb5b02SBen Gardon #endif 16fe5db27dSBen Gardon 17fe5db27dSBen Gardon static bool is_tdp_mmu_enabled(void) 18fe5db27dSBen Gardon { 19fe5db27dSBen Gardon #ifdef CONFIG_X86_64 20fe5db27dSBen Gardon return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 21fe5db27dSBen Gardon #else 22fe5db27dSBen Gardon return false; 23fe5db27dSBen Gardon #endif /* CONFIG_X86_64 */ 24fe5db27dSBen Gardon } 25fe5db27dSBen Gardon 26fe5db27dSBen Gardon /* Initializes the TDP MMU for the VM, if enabled. */ 27fe5db27dSBen Gardon void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 28fe5db27dSBen Gardon { 29fe5db27dSBen Gardon if (!is_tdp_mmu_enabled()) 30fe5db27dSBen Gardon return; 31fe5db27dSBen Gardon 32fe5db27dSBen Gardon /* This should not be changed for the lifetime of the VM. */ 33fe5db27dSBen Gardon kvm->arch.tdp_mmu_enabled = true; 3402c00b3aSBen Gardon 3502c00b3aSBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 3689c0fd49SBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 37fe5db27dSBen Gardon } 38fe5db27dSBen Gardon 39fe5db27dSBen Gardon void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 40fe5db27dSBen Gardon { 41fe5db27dSBen Gardon if (!kvm->arch.tdp_mmu_enabled) 42fe5db27dSBen Gardon return; 4302c00b3aSBen Gardon 4402c00b3aSBen Gardon WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 4502c00b3aSBen Gardon } 4602c00b3aSBen Gardon 4702c00b3aSBen Gardon #define for_each_tdp_mmu_root(_kvm, _root) \ 4802c00b3aSBen Gardon list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 4902c00b3aSBen Gardon 5002c00b3aSBen Gardon bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 5102c00b3aSBen Gardon { 5202c00b3aSBen Gardon struct kvm_mmu_page *sp; 5302c00b3aSBen Gardon 54c887c9b9SPaolo Bonzini if (!kvm->arch.tdp_mmu_enabled) 55c887c9b9SPaolo Bonzini return false; 56c887c9b9SPaolo Bonzini if (WARN_ON(!VALID_PAGE(hpa))) 57c887c9b9SPaolo Bonzini return false; 58c887c9b9SPaolo Bonzini 5902c00b3aSBen Gardon sp = to_shadow_page(hpa); 60c887c9b9SPaolo Bonzini if (WARN_ON(!sp)) 61c887c9b9SPaolo Bonzini return false; 6202c00b3aSBen Gardon 6302c00b3aSBen Gardon return sp->tdp_mmu_page && sp->root_count; 6402c00b3aSBen Gardon } 6502c00b3aSBen Gardon 66faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 67063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield); 68faaf05b0SBen Gardon 6902c00b3aSBen Gardon void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 7002c00b3aSBen Gardon { 71faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 72faaf05b0SBen Gardon 7302c00b3aSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 7402c00b3aSBen Gardon 7502c00b3aSBen Gardon WARN_ON(root->root_count); 7602c00b3aSBen Gardon WARN_ON(!root->tdp_mmu_page); 7702c00b3aSBen Gardon 7802c00b3aSBen Gardon list_del(&root->link); 7902c00b3aSBen Gardon 80063afacdSBen Gardon zap_gfn_range(kvm, root, 0, max_gfn, false); 81faaf05b0SBen Gardon 8202c00b3aSBen Gardon free_page((unsigned long)root->spt); 8302c00b3aSBen Gardon kmem_cache_free(mmu_page_header_cache, root); 8402c00b3aSBen Gardon } 8502c00b3aSBen Gardon 8602c00b3aSBen Gardon static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 8702c00b3aSBen Gardon int level) 8802c00b3aSBen Gardon { 8902c00b3aSBen Gardon union kvm_mmu_page_role role; 9002c00b3aSBen Gardon 9102c00b3aSBen Gardon role = vcpu->arch.mmu->mmu_role.base; 9202c00b3aSBen Gardon role.level = level; 9302c00b3aSBen Gardon role.direct = true; 9402c00b3aSBen Gardon role.gpte_is_8_bytes = true; 9502c00b3aSBen Gardon role.access = ACC_ALL; 9602c00b3aSBen Gardon 9702c00b3aSBen Gardon return role; 9802c00b3aSBen Gardon } 9902c00b3aSBen Gardon 10002c00b3aSBen Gardon static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 10102c00b3aSBen Gardon int level) 10202c00b3aSBen Gardon { 10302c00b3aSBen Gardon struct kvm_mmu_page *sp; 10402c00b3aSBen Gardon 10502c00b3aSBen Gardon sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 10602c00b3aSBen Gardon sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 10702c00b3aSBen Gardon set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 10802c00b3aSBen Gardon 10902c00b3aSBen Gardon sp->role.word = page_role_for_level(vcpu, level).word; 11002c00b3aSBen Gardon sp->gfn = gfn; 11102c00b3aSBen Gardon sp->tdp_mmu_page = true; 11202c00b3aSBen Gardon 113*33dd3574SBen Gardon trace_kvm_mmu_get_page(sp, true); 114*33dd3574SBen Gardon 11502c00b3aSBen Gardon return sp; 11602c00b3aSBen Gardon } 11702c00b3aSBen Gardon 11802c00b3aSBen Gardon static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 11902c00b3aSBen Gardon { 12002c00b3aSBen Gardon union kvm_mmu_page_role role; 12102c00b3aSBen Gardon struct kvm *kvm = vcpu->kvm; 12202c00b3aSBen Gardon struct kvm_mmu_page *root; 12302c00b3aSBen Gardon 12402c00b3aSBen Gardon role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 12502c00b3aSBen Gardon 12602c00b3aSBen Gardon spin_lock(&kvm->mmu_lock); 12702c00b3aSBen Gardon 12802c00b3aSBen Gardon /* Check for an existing root before allocating a new one. */ 12902c00b3aSBen Gardon for_each_tdp_mmu_root(kvm, root) { 13002c00b3aSBen Gardon if (root->role.word == role.word) { 13102c00b3aSBen Gardon kvm_mmu_get_root(kvm, root); 13202c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 13302c00b3aSBen Gardon return root; 13402c00b3aSBen Gardon } 13502c00b3aSBen Gardon } 13602c00b3aSBen Gardon 13702c00b3aSBen Gardon root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 13802c00b3aSBen Gardon root->root_count = 1; 13902c00b3aSBen Gardon 14002c00b3aSBen Gardon list_add(&root->link, &kvm->arch.tdp_mmu_roots); 14102c00b3aSBen Gardon 14202c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 14302c00b3aSBen Gardon 14402c00b3aSBen Gardon return root; 14502c00b3aSBen Gardon } 14602c00b3aSBen Gardon 14702c00b3aSBen Gardon hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 14802c00b3aSBen Gardon { 14902c00b3aSBen Gardon struct kvm_mmu_page *root; 15002c00b3aSBen Gardon 15102c00b3aSBen Gardon root = get_tdp_mmu_vcpu_root(vcpu); 15202c00b3aSBen Gardon if (!root) 15302c00b3aSBen Gardon return INVALID_PAGE; 15402c00b3aSBen Gardon 15502c00b3aSBen Gardon return __pa(root->spt); 156fe5db27dSBen Gardon } 1572f2fad08SBen Gardon 1582f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1592f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level); 1602f2fad08SBen Gardon 161faaf05b0SBen Gardon static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 162faaf05b0SBen Gardon { 163faaf05b0SBen Gardon return sp->role.smm ? 1 : 0; 164faaf05b0SBen Gardon } 165faaf05b0SBen Gardon 166f8e14497SBen Gardon static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 167f8e14497SBen Gardon { 168f8e14497SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 169f8e14497SBen Gardon 170f8e14497SBen Gardon if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level)) 171f8e14497SBen Gardon return; 172f8e14497SBen Gardon 173f8e14497SBen Gardon if (is_accessed_spte(old_spte) && 174f8e14497SBen Gardon (!is_accessed_spte(new_spte) || pfn_changed)) 175f8e14497SBen Gardon kvm_set_pfn_accessed(spte_to_pfn(old_spte)); 176f8e14497SBen Gardon } 177f8e14497SBen Gardon 178a6a0b05dSBen Gardon static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, 179a6a0b05dSBen Gardon u64 old_spte, u64 new_spte, int level) 180a6a0b05dSBen Gardon { 181a6a0b05dSBen Gardon bool pfn_changed; 182a6a0b05dSBen Gardon struct kvm_memory_slot *slot; 183a6a0b05dSBen Gardon 184a6a0b05dSBen Gardon if (level > PG_LEVEL_4K) 185a6a0b05dSBen Gardon return; 186a6a0b05dSBen Gardon 187a6a0b05dSBen Gardon pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 188a6a0b05dSBen Gardon 189a6a0b05dSBen Gardon if ((!is_writable_pte(old_spte) || pfn_changed) && 190a6a0b05dSBen Gardon is_writable_pte(new_spte)) { 191a6a0b05dSBen Gardon slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); 192fb04a1edSPeter Xu mark_page_dirty_in_slot(kvm, slot, gfn); 193a6a0b05dSBen Gardon } 194a6a0b05dSBen Gardon } 195a6a0b05dSBen Gardon 1962f2fad08SBen Gardon /** 1972f2fad08SBen Gardon * handle_changed_spte - handle bookkeeping associated with an SPTE change 1982f2fad08SBen Gardon * @kvm: kvm instance 1992f2fad08SBen Gardon * @as_id: the address space of the paging structure the SPTE was a part of 2002f2fad08SBen Gardon * @gfn: the base GFN that was mapped by the SPTE 2012f2fad08SBen Gardon * @old_spte: The value of the SPTE before the change 2022f2fad08SBen Gardon * @new_spte: The value of the SPTE after the change 2032f2fad08SBen Gardon * @level: the level of the PT the SPTE is part of in the paging structure 2042f2fad08SBen Gardon * 2052f2fad08SBen Gardon * Handle bookkeeping that might result from the modification of a SPTE. 2062f2fad08SBen Gardon * This function must be called for all TDP SPTE modifications. 2072f2fad08SBen Gardon */ 2082f2fad08SBen Gardon static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2092f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 2102f2fad08SBen Gardon { 2112f2fad08SBen Gardon bool was_present = is_shadow_present_pte(old_spte); 2122f2fad08SBen Gardon bool is_present = is_shadow_present_pte(new_spte); 2132f2fad08SBen Gardon bool was_leaf = was_present && is_last_spte(old_spte, level); 2142f2fad08SBen Gardon bool is_leaf = is_present && is_last_spte(new_spte, level); 2152f2fad08SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2162f2fad08SBen Gardon u64 *pt; 21789c0fd49SBen Gardon struct kvm_mmu_page *sp; 2182f2fad08SBen Gardon u64 old_child_spte; 2192f2fad08SBen Gardon int i; 2202f2fad08SBen Gardon 2212f2fad08SBen Gardon WARN_ON(level > PT64_ROOT_MAX_LEVEL); 2222f2fad08SBen Gardon WARN_ON(level < PG_LEVEL_4K); 223764388ceSSean Christopherson WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); 2242f2fad08SBen Gardon 2252f2fad08SBen Gardon /* 2262f2fad08SBen Gardon * If this warning were to trigger it would indicate that there was a 2272f2fad08SBen Gardon * missing MMU notifier or a race with some notifier handler. 2282f2fad08SBen Gardon * A present, leaf SPTE should never be directly replaced with another 2292f2fad08SBen Gardon * present leaf SPTE pointing to a differnt PFN. A notifier handler 2302f2fad08SBen Gardon * should be zapping the SPTE before the main MM's page table is 2312f2fad08SBen Gardon * changed, or the SPTE should be zeroed, and the TLBs flushed by the 2322f2fad08SBen Gardon * thread before replacement. 2332f2fad08SBen Gardon */ 2342f2fad08SBen Gardon if (was_leaf && is_leaf && pfn_changed) { 2352f2fad08SBen Gardon pr_err("Invalid SPTE change: cannot replace a present leaf\n" 2362f2fad08SBen Gardon "SPTE with another present leaf SPTE mapping a\n" 2372f2fad08SBen Gardon "different PFN!\n" 2382f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2392f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2402f2fad08SBen Gardon 2412f2fad08SBen Gardon /* 2422f2fad08SBen Gardon * Crash the host to prevent error propagation and guest data 2432f2fad08SBen Gardon * courruption. 2442f2fad08SBen Gardon */ 2452f2fad08SBen Gardon BUG(); 2462f2fad08SBen Gardon } 2472f2fad08SBen Gardon 2482f2fad08SBen Gardon if (old_spte == new_spte) 2492f2fad08SBen Gardon return; 2502f2fad08SBen Gardon 2512f2fad08SBen Gardon /* 2522f2fad08SBen Gardon * The only times a SPTE should be changed from a non-present to 2532f2fad08SBen Gardon * non-present state is when an MMIO entry is installed/modified/ 2542f2fad08SBen Gardon * removed. In that case, there is nothing to do here. 2552f2fad08SBen Gardon */ 2562f2fad08SBen Gardon if (!was_present && !is_present) { 2572f2fad08SBen Gardon /* 2582f2fad08SBen Gardon * If this change does not involve a MMIO SPTE, it is 2592f2fad08SBen Gardon * unexpected. Log the change, though it should not impact the 2602f2fad08SBen Gardon * guest since both the former and current SPTEs are nonpresent. 2612f2fad08SBen Gardon */ 2622f2fad08SBen Gardon if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 2632f2fad08SBen Gardon pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 2642f2fad08SBen Gardon "should not be replaced with another,\n" 2652f2fad08SBen Gardon "different nonpresent SPTE, unless one or both\n" 2662f2fad08SBen Gardon "are MMIO SPTEs.\n" 2672f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2682f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2692f2fad08SBen Gardon return; 2702f2fad08SBen Gardon } 2712f2fad08SBen Gardon 2722f2fad08SBen Gardon 2732f2fad08SBen Gardon if (was_leaf && is_dirty_spte(old_spte) && 2742f2fad08SBen Gardon (!is_dirty_spte(new_spte) || pfn_changed)) 2752f2fad08SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 2762f2fad08SBen Gardon 2772f2fad08SBen Gardon /* 2782f2fad08SBen Gardon * Recursively handle child PTs if the change removed a subtree from 2792f2fad08SBen Gardon * the paging structure. 2802f2fad08SBen Gardon */ 2812f2fad08SBen Gardon if (was_present && !was_leaf && (pfn_changed || !is_present)) { 2822f2fad08SBen Gardon pt = spte_to_child_pt(old_spte, level); 28389c0fd49SBen Gardon sp = sptep_to_sp(pt); 28489c0fd49SBen Gardon 285*33dd3574SBen Gardon trace_kvm_mmu_prepare_zap_page(sp); 286*33dd3574SBen Gardon 28789c0fd49SBen Gardon list_del(&sp->link); 2882f2fad08SBen Gardon 28929cf0f50SBen Gardon if (sp->lpage_disallowed) 29029cf0f50SBen Gardon unaccount_huge_nx_page(kvm, sp); 29129cf0f50SBen Gardon 2922f2fad08SBen Gardon for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 2932f2fad08SBen Gardon old_child_spte = READ_ONCE(*(pt + i)); 2942f2fad08SBen Gardon WRITE_ONCE(*(pt + i), 0); 2952f2fad08SBen Gardon handle_changed_spte(kvm, as_id, 2962f2fad08SBen Gardon gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 2972f2fad08SBen Gardon old_child_spte, 0, level - 1); 2982f2fad08SBen Gardon } 2992f2fad08SBen Gardon 3002f2fad08SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 3012f2fad08SBen Gardon KVM_PAGES_PER_HPAGE(level)); 3022f2fad08SBen Gardon 3032f2fad08SBen Gardon free_page((unsigned long)pt); 30489c0fd49SBen Gardon kmem_cache_free(mmu_page_header_cache, sp); 3052f2fad08SBen Gardon } 3062f2fad08SBen Gardon } 3072f2fad08SBen Gardon 3082f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 3092f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 3102f2fad08SBen Gardon { 3112f2fad08SBen Gardon __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 312f8e14497SBen Gardon handle_changed_spte_acc_track(old_spte, new_spte, level); 313a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, 314a6a0b05dSBen Gardon new_spte, level); 3152f2fad08SBen Gardon } 316faaf05b0SBen Gardon 317f8e14497SBen Gardon static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 318a6a0b05dSBen Gardon u64 new_spte, bool record_acc_track, 319a6a0b05dSBen Gardon bool record_dirty_log) 320faaf05b0SBen Gardon { 321faaf05b0SBen Gardon u64 *root_pt = tdp_iter_root_pt(iter); 322faaf05b0SBen Gardon struct kvm_mmu_page *root = sptep_to_sp(root_pt); 323faaf05b0SBen Gardon int as_id = kvm_mmu_page_as_id(root); 324faaf05b0SBen Gardon 325f8e14497SBen Gardon WRITE_ONCE(*iter->sptep, new_spte); 326faaf05b0SBen Gardon 327f8e14497SBen Gardon __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 328faaf05b0SBen Gardon iter->level); 329f8e14497SBen Gardon if (record_acc_track) 330f8e14497SBen Gardon handle_changed_spte_acc_track(iter->old_spte, new_spte, 331f8e14497SBen Gardon iter->level); 332a6a0b05dSBen Gardon if (record_dirty_log) 333a6a0b05dSBen Gardon handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, 334a6a0b05dSBen Gardon iter->old_spte, new_spte, 335a6a0b05dSBen Gardon iter->level); 336f8e14497SBen Gardon } 337f8e14497SBen Gardon 338f8e14497SBen Gardon static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 339f8e14497SBen Gardon u64 new_spte) 340f8e14497SBen Gardon { 341a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, true); 342f8e14497SBen Gardon } 343f8e14497SBen Gardon 344f8e14497SBen Gardon static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm, 345f8e14497SBen Gardon struct tdp_iter *iter, 346f8e14497SBen Gardon u64 new_spte) 347f8e14497SBen Gardon { 348a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, false, true); 349a6a0b05dSBen Gardon } 350a6a0b05dSBen Gardon 351a6a0b05dSBen Gardon static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, 352a6a0b05dSBen Gardon struct tdp_iter *iter, 353a6a0b05dSBen Gardon u64 new_spte) 354a6a0b05dSBen Gardon { 355a6a0b05dSBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true, false); 356faaf05b0SBen Gardon } 357faaf05b0SBen Gardon 358faaf05b0SBen Gardon #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 359faaf05b0SBen Gardon for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 360faaf05b0SBen Gardon 361f8e14497SBen Gardon #define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ 362f8e14497SBen Gardon tdp_root_for_each_pte(_iter, _root, _start, _end) \ 363f8e14497SBen Gardon if (!is_shadow_present_pte(_iter.old_spte) || \ 364f8e14497SBen Gardon !is_last_spte(_iter.old_spte, _iter.level)) \ 365f8e14497SBen Gardon continue; \ 366f8e14497SBen Gardon else 367f8e14497SBen Gardon 368bb18842eSBen Gardon #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 369bb18842eSBen Gardon for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 370bb18842eSBen Gardon _mmu->shadow_root_level, _start, _end) 371bb18842eSBen Gardon 372faaf05b0SBen Gardon /* 373faaf05b0SBen Gardon * Flush the TLB if the process should drop kvm->mmu_lock. 374faaf05b0SBen Gardon * Return whether the caller still needs to flush the tlb. 375faaf05b0SBen Gardon */ 376faaf05b0SBen Gardon static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 377faaf05b0SBen Gardon { 378faaf05b0SBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 379faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 380faaf05b0SBen Gardon cond_resched_lock(&kvm->mmu_lock); 381faaf05b0SBen Gardon tdp_iter_refresh_walk(iter); 382faaf05b0SBen Gardon return false; 383faaf05b0SBen Gardon } else { 384faaf05b0SBen Gardon return true; 385faaf05b0SBen Gardon } 386faaf05b0SBen Gardon } 387faaf05b0SBen Gardon 388a6a0b05dSBen Gardon static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 389a6a0b05dSBen Gardon { 390a6a0b05dSBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 391a6a0b05dSBen Gardon cond_resched_lock(&kvm->mmu_lock); 392a6a0b05dSBen Gardon tdp_iter_refresh_walk(iter); 393a6a0b05dSBen Gardon } 394a6a0b05dSBen Gardon } 395a6a0b05dSBen Gardon 396faaf05b0SBen Gardon /* 397faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 398faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 399faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 400faaf05b0SBen Gardon * MMU lock. 401063afacdSBen Gardon * If can_yield is true, will release the MMU lock and reschedule if the 402063afacdSBen Gardon * scheduler needs the CPU or there is contention on the MMU lock. If this 403063afacdSBen Gardon * function cannot yield, it will not release the MMU lock or reschedule and 404063afacdSBen Gardon * the caller must ensure it does not supply too large a GFN range, or the 405063afacdSBen Gardon * operation can cause a soft lockup. 406faaf05b0SBen Gardon */ 407faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 408063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield) 409faaf05b0SBen Gardon { 410faaf05b0SBen Gardon struct tdp_iter iter; 411faaf05b0SBen Gardon bool flush_needed = false; 412faaf05b0SBen Gardon 413faaf05b0SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 414faaf05b0SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 415faaf05b0SBen Gardon continue; 416faaf05b0SBen Gardon 417faaf05b0SBen Gardon /* 418faaf05b0SBen Gardon * If this is a non-last-level SPTE that covers a larger range 419faaf05b0SBen Gardon * than should be zapped, continue, and zap the mappings at a 420faaf05b0SBen Gardon * lower level. 421faaf05b0SBen Gardon */ 422faaf05b0SBen Gardon if ((iter.gfn < start || 423faaf05b0SBen Gardon iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 424faaf05b0SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 425faaf05b0SBen Gardon continue; 426faaf05b0SBen Gardon 427faaf05b0SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 428faaf05b0SBen Gardon 429063afacdSBen Gardon if (can_yield) 430faaf05b0SBen Gardon flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 431063afacdSBen Gardon else 432063afacdSBen Gardon flush_needed = true; 433faaf05b0SBen Gardon } 434faaf05b0SBen Gardon return flush_needed; 435faaf05b0SBen Gardon } 436faaf05b0SBen Gardon 437faaf05b0SBen Gardon /* 438faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 439faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 440faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 441faaf05b0SBen Gardon * MMU lock. 442faaf05b0SBen Gardon */ 443faaf05b0SBen Gardon bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) 444faaf05b0SBen Gardon { 445faaf05b0SBen Gardon struct kvm_mmu_page *root; 446faaf05b0SBen Gardon bool flush = false; 447faaf05b0SBen Gardon 448faaf05b0SBen Gardon for_each_tdp_mmu_root(kvm, root) { 449faaf05b0SBen Gardon /* 450faaf05b0SBen Gardon * Take a reference on the root so that it cannot be freed if 451faaf05b0SBen Gardon * this thread releases the MMU lock and yields in this loop. 452faaf05b0SBen Gardon */ 453faaf05b0SBen Gardon kvm_mmu_get_root(kvm, root); 454faaf05b0SBen Gardon 455063afacdSBen Gardon flush |= zap_gfn_range(kvm, root, start, end, true); 456faaf05b0SBen Gardon 457faaf05b0SBen Gardon kvm_mmu_put_root(kvm, root); 458faaf05b0SBen Gardon } 459faaf05b0SBen Gardon 460faaf05b0SBen Gardon return flush; 461faaf05b0SBen Gardon } 462faaf05b0SBen Gardon 463faaf05b0SBen Gardon void kvm_tdp_mmu_zap_all(struct kvm *kvm) 464faaf05b0SBen Gardon { 465faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 466faaf05b0SBen Gardon bool flush; 467faaf05b0SBen Gardon 468faaf05b0SBen Gardon flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 469faaf05b0SBen Gardon if (flush) 470faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 471faaf05b0SBen Gardon } 472bb18842eSBen Gardon 473bb18842eSBen Gardon /* 474bb18842eSBen Gardon * Installs a last-level SPTE to handle a TDP page fault. 475bb18842eSBen Gardon * (NPT/EPT violation/misconfiguration) 476bb18842eSBen Gardon */ 477bb18842eSBen Gardon static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 478bb18842eSBen Gardon int map_writable, 479bb18842eSBen Gardon struct tdp_iter *iter, 480bb18842eSBen Gardon kvm_pfn_t pfn, bool prefault) 481bb18842eSBen Gardon { 482bb18842eSBen Gardon u64 new_spte; 483bb18842eSBen Gardon int ret = 0; 484bb18842eSBen Gardon int make_spte_ret = 0; 485bb18842eSBen Gardon 486bb18842eSBen Gardon if (unlikely(is_noslot_pfn(pfn))) { 487bb18842eSBen Gardon new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 488bb18842eSBen Gardon trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 489*33dd3574SBen Gardon } else { 490bb18842eSBen Gardon make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 491bb18842eSBen Gardon pfn, iter->old_spte, prefault, true, 492bb18842eSBen Gardon map_writable, !shadow_accessed_mask, 493bb18842eSBen Gardon &new_spte); 494*33dd3574SBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 495*33dd3574SBen Gardon } 496bb18842eSBen Gardon 497bb18842eSBen Gardon if (new_spte == iter->old_spte) 498bb18842eSBen Gardon ret = RET_PF_SPURIOUS; 499bb18842eSBen Gardon else 500bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 501bb18842eSBen Gardon 502bb18842eSBen Gardon /* 503bb18842eSBen Gardon * If the page fault was caused by a write but the page is write 504bb18842eSBen Gardon * protected, emulation is needed. If the emulation was skipped, 505bb18842eSBen Gardon * the vCPU would have the same fault again. 506bb18842eSBen Gardon */ 507bb18842eSBen Gardon if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 508bb18842eSBen Gardon if (write) 509bb18842eSBen Gardon ret = RET_PF_EMULATE; 510bb18842eSBen Gardon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 511bb18842eSBen Gardon } 512bb18842eSBen Gardon 513bb18842eSBen Gardon /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 514bb18842eSBen Gardon if (unlikely(is_mmio_spte(new_spte))) 515bb18842eSBen Gardon ret = RET_PF_EMULATE; 516bb18842eSBen Gardon 517bb18842eSBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 518bb18842eSBen Gardon if (!prefault) 519bb18842eSBen Gardon vcpu->stat.pf_fixed++; 520bb18842eSBen Gardon 521bb18842eSBen Gardon return ret; 522bb18842eSBen Gardon } 523bb18842eSBen Gardon 524bb18842eSBen Gardon /* 525bb18842eSBen Gardon * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 526bb18842eSBen Gardon * page tables and SPTEs to translate the faulting guest physical address. 527bb18842eSBen Gardon */ 528bb18842eSBen Gardon int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 529bb18842eSBen Gardon int map_writable, int max_level, kvm_pfn_t pfn, 530bb18842eSBen Gardon bool prefault) 531bb18842eSBen Gardon { 532bb18842eSBen Gardon bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 533bb18842eSBen Gardon bool write = error_code & PFERR_WRITE_MASK; 534bb18842eSBen Gardon bool exec = error_code & PFERR_FETCH_MASK; 535bb18842eSBen Gardon bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 536bb18842eSBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 537bb18842eSBen Gardon struct tdp_iter iter; 53889c0fd49SBen Gardon struct kvm_mmu_page *sp; 539bb18842eSBen Gardon u64 *child_pt; 540bb18842eSBen Gardon u64 new_spte; 541bb18842eSBen Gardon int ret; 542bb18842eSBen Gardon gfn_t gfn = gpa >> PAGE_SHIFT; 543bb18842eSBen Gardon int level; 544bb18842eSBen Gardon int req_level; 545bb18842eSBen Gardon 546bb18842eSBen Gardon if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 547bb18842eSBen Gardon return RET_PF_RETRY; 548bb18842eSBen Gardon if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 549bb18842eSBen Gardon return RET_PF_RETRY; 550bb18842eSBen Gardon 551bb18842eSBen Gardon level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 552bb18842eSBen Gardon huge_page_disallowed, &req_level); 553bb18842eSBen Gardon 554bb18842eSBen Gardon trace_kvm_mmu_spte_requested(gpa, level, pfn); 555bb18842eSBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 556bb18842eSBen Gardon if (nx_huge_page_workaround_enabled) 557bb18842eSBen Gardon disallowed_hugepage_adjust(iter.old_spte, gfn, 558bb18842eSBen Gardon iter.level, &pfn, &level); 559bb18842eSBen Gardon 560bb18842eSBen Gardon if (iter.level == level) 561bb18842eSBen Gardon break; 562bb18842eSBen Gardon 563bb18842eSBen Gardon /* 564bb18842eSBen Gardon * If there is an SPTE mapping a large page at a higher level 565bb18842eSBen Gardon * than the target, that SPTE must be cleared and replaced 566bb18842eSBen Gardon * with a non-leaf SPTE. 567bb18842eSBen Gardon */ 568bb18842eSBen Gardon if (is_shadow_present_pte(iter.old_spte) && 569bb18842eSBen Gardon is_large_pte(iter.old_spte)) { 570bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 571bb18842eSBen Gardon 572bb18842eSBen Gardon kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 573bb18842eSBen Gardon KVM_PAGES_PER_HPAGE(iter.level)); 574bb18842eSBen Gardon 575bb18842eSBen Gardon /* 576bb18842eSBen Gardon * The iter must explicitly re-read the spte here 577bb18842eSBen Gardon * because the new value informs the !present 578bb18842eSBen Gardon * path below. 579bb18842eSBen Gardon */ 580bb18842eSBen Gardon iter.old_spte = READ_ONCE(*iter.sptep); 581bb18842eSBen Gardon } 582bb18842eSBen Gardon 583bb18842eSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) { 58489c0fd49SBen Gardon sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 58589c0fd49SBen Gardon list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 58689c0fd49SBen Gardon child_pt = sp->spt; 587bb18842eSBen Gardon clear_page(child_pt); 588bb18842eSBen Gardon new_spte = make_nonleaf_spte(child_pt, 589bb18842eSBen Gardon !shadow_accessed_mask); 590bb18842eSBen Gardon 591bb18842eSBen Gardon trace_kvm_mmu_get_page(sp, true); 59229cf0f50SBen Gardon if (huge_page_disallowed && req_level >= iter.level) 59329cf0f50SBen Gardon account_huge_nx_page(vcpu->kvm, sp); 59429cf0f50SBen Gardon 595bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 596bb18842eSBen Gardon } 597bb18842eSBen Gardon } 598bb18842eSBen Gardon 599bb18842eSBen Gardon if (WARN_ON(iter.level != level)) 600bb18842eSBen Gardon return RET_PF_RETRY; 601bb18842eSBen Gardon 602bb18842eSBen Gardon ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 603bb18842eSBen Gardon pfn, prefault); 604bb18842eSBen Gardon 605bb18842eSBen Gardon return ret; 606bb18842eSBen Gardon } 607063afacdSBen Gardon 608063afacdSBen Gardon static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, 609063afacdSBen Gardon unsigned long end, unsigned long data, 610063afacdSBen Gardon int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, 611063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 612063afacdSBen Gardon gfn_t end, unsigned long data)) 613063afacdSBen Gardon { 614063afacdSBen Gardon struct kvm_memslots *slots; 615063afacdSBen Gardon struct kvm_memory_slot *memslot; 616063afacdSBen Gardon struct kvm_mmu_page *root; 617063afacdSBen Gardon int ret = 0; 618063afacdSBen Gardon int as_id; 619063afacdSBen Gardon 620063afacdSBen Gardon for_each_tdp_mmu_root(kvm, root) { 621063afacdSBen Gardon /* 622063afacdSBen Gardon * Take a reference on the root so that it cannot be freed if 623063afacdSBen Gardon * this thread releases the MMU lock and yields in this loop. 624063afacdSBen Gardon */ 625063afacdSBen Gardon kvm_mmu_get_root(kvm, root); 626063afacdSBen Gardon 627063afacdSBen Gardon as_id = kvm_mmu_page_as_id(root); 628063afacdSBen Gardon slots = __kvm_memslots(kvm, as_id); 629063afacdSBen Gardon kvm_for_each_memslot(memslot, slots) { 630063afacdSBen Gardon unsigned long hva_start, hva_end; 631063afacdSBen Gardon gfn_t gfn_start, gfn_end; 632063afacdSBen Gardon 633063afacdSBen Gardon hva_start = max(start, memslot->userspace_addr); 634063afacdSBen Gardon hva_end = min(end, memslot->userspace_addr + 635063afacdSBen Gardon (memslot->npages << PAGE_SHIFT)); 636063afacdSBen Gardon if (hva_start >= hva_end) 637063afacdSBen Gardon continue; 638063afacdSBen Gardon /* 639063afacdSBen Gardon * {gfn(page) | page intersects with [hva_start, hva_end)} = 640063afacdSBen Gardon * {gfn_start, gfn_start+1, ..., gfn_end-1}. 641063afacdSBen Gardon */ 642063afacdSBen Gardon gfn_start = hva_to_gfn_memslot(hva_start, memslot); 643063afacdSBen Gardon gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 644063afacdSBen Gardon 645063afacdSBen Gardon ret |= handler(kvm, memslot, root, gfn_start, 646063afacdSBen Gardon gfn_end, data); 647063afacdSBen Gardon } 648063afacdSBen Gardon 649063afacdSBen Gardon kvm_mmu_put_root(kvm, root); 650063afacdSBen Gardon } 651063afacdSBen Gardon 652063afacdSBen Gardon return ret; 653063afacdSBen Gardon } 654063afacdSBen Gardon 655063afacdSBen Gardon static int zap_gfn_range_hva_wrapper(struct kvm *kvm, 656063afacdSBen Gardon struct kvm_memory_slot *slot, 657063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 658063afacdSBen Gardon gfn_t end, unsigned long unused) 659063afacdSBen Gardon { 660063afacdSBen Gardon return zap_gfn_range(kvm, root, start, end, false); 661063afacdSBen Gardon } 662063afacdSBen Gardon 663063afacdSBen Gardon int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, 664063afacdSBen Gardon unsigned long end) 665063afacdSBen Gardon { 666063afacdSBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 667063afacdSBen Gardon zap_gfn_range_hva_wrapper); 668063afacdSBen Gardon } 669f8e14497SBen Gardon 670f8e14497SBen Gardon /* 671f8e14497SBen Gardon * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero 672f8e14497SBen Gardon * if any of the GFNs in the range have been accessed. 673f8e14497SBen Gardon */ 674f8e14497SBen Gardon static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, 675f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t start, gfn_t end, 676f8e14497SBen Gardon unsigned long unused) 677f8e14497SBen Gardon { 678f8e14497SBen Gardon struct tdp_iter iter; 679f8e14497SBen Gardon int young = 0; 680f8e14497SBen Gardon u64 new_spte = 0; 681f8e14497SBen Gardon 682f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 683f8e14497SBen Gardon /* 684f8e14497SBen Gardon * If we have a non-accessed entry we don't need to change the 685f8e14497SBen Gardon * pte. 686f8e14497SBen Gardon */ 687f8e14497SBen Gardon if (!is_accessed_spte(iter.old_spte)) 688f8e14497SBen Gardon continue; 689f8e14497SBen Gardon 690f8e14497SBen Gardon new_spte = iter.old_spte; 691f8e14497SBen Gardon 692f8e14497SBen Gardon if (spte_ad_enabled(new_spte)) { 693f8e14497SBen Gardon clear_bit((ffs(shadow_accessed_mask) - 1), 694f8e14497SBen Gardon (unsigned long *)&new_spte); 695f8e14497SBen Gardon } else { 696f8e14497SBen Gardon /* 697f8e14497SBen Gardon * Capture the dirty status of the page, so that it doesn't get 698f8e14497SBen Gardon * lost when the SPTE is marked for access tracking. 699f8e14497SBen Gardon */ 700f8e14497SBen Gardon if (is_writable_pte(new_spte)) 701f8e14497SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(new_spte)); 702f8e14497SBen Gardon 703f8e14497SBen Gardon new_spte = mark_spte_for_access_track(new_spte); 704f8e14497SBen Gardon } 705a6a0b05dSBen Gardon new_spte &= ~shadow_dirty_mask; 706f8e14497SBen Gardon 707f8e14497SBen Gardon tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); 708f8e14497SBen Gardon young = 1; 709*33dd3574SBen Gardon 710*33dd3574SBen Gardon trace_kvm_age_page(iter.gfn, iter.level, slot, young); 711f8e14497SBen Gardon } 712f8e14497SBen Gardon 713f8e14497SBen Gardon return young; 714f8e14497SBen Gardon } 715f8e14497SBen Gardon 716f8e14497SBen Gardon int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, 717f8e14497SBen Gardon unsigned long end) 718f8e14497SBen Gardon { 719f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 720f8e14497SBen Gardon age_gfn_range); 721f8e14497SBen Gardon } 722f8e14497SBen Gardon 723f8e14497SBen Gardon static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 724f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 725f8e14497SBen Gardon unsigned long unused2) 726f8e14497SBen Gardon { 727f8e14497SBen Gardon struct tdp_iter iter; 728f8e14497SBen Gardon 729f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) 730f8e14497SBen Gardon if (is_accessed_spte(iter.old_spte)) 731f8e14497SBen Gardon return 1; 732f8e14497SBen Gardon 733f8e14497SBen Gardon return 0; 734f8e14497SBen Gardon } 735f8e14497SBen Gardon 736f8e14497SBen Gardon int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva) 737f8e14497SBen Gardon { 738f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0, 739f8e14497SBen Gardon test_age_gfn); 740f8e14497SBen Gardon } 7411d8dd6b3SBen Gardon 7421d8dd6b3SBen Gardon /* 7431d8dd6b3SBen Gardon * Handle the changed_pte MMU notifier for the TDP MMU. 7441d8dd6b3SBen Gardon * data is a pointer to the new pte_t mapping the HVA specified by the MMU 7451d8dd6b3SBen Gardon * notifier. 7461d8dd6b3SBen Gardon * Returns non-zero if a flush is needed before releasing the MMU lock. 7471d8dd6b3SBen Gardon */ 7481d8dd6b3SBen Gardon static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, 7491d8dd6b3SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 7501d8dd6b3SBen Gardon unsigned long data) 7511d8dd6b3SBen Gardon { 7521d8dd6b3SBen Gardon struct tdp_iter iter; 7531d8dd6b3SBen Gardon pte_t *ptep = (pte_t *)data; 7541d8dd6b3SBen Gardon kvm_pfn_t new_pfn; 7551d8dd6b3SBen Gardon u64 new_spte; 7561d8dd6b3SBen Gardon int need_flush = 0; 7571d8dd6b3SBen Gardon 7581d8dd6b3SBen Gardon WARN_ON(pte_huge(*ptep)); 7591d8dd6b3SBen Gardon 7601d8dd6b3SBen Gardon new_pfn = pte_pfn(*ptep); 7611d8dd6b3SBen Gardon 7621d8dd6b3SBen Gardon tdp_root_for_each_pte(iter, root, gfn, gfn + 1) { 7631d8dd6b3SBen Gardon if (iter.level != PG_LEVEL_4K) 7641d8dd6b3SBen Gardon continue; 7651d8dd6b3SBen Gardon 7661d8dd6b3SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 7671d8dd6b3SBen Gardon break; 7681d8dd6b3SBen Gardon 7691d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 7701d8dd6b3SBen Gardon 7711d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1); 7721d8dd6b3SBen Gardon 7731d8dd6b3SBen Gardon if (!pte_write(*ptep)) { 7741d8dd6b3SBen Gardon new_spte = kvm_mmu_changed_pte_notifier_make_spte( 7751d8dd6b3SBen Gardon iter.old_spte, new_pfn); 7761d8dd6b3SBen Gardon 7771d8dd6b3SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 7781d8dd6b3SBen Gardon } 7791d8dd6b3SBen Gardon 7801d8dd6b3SBen Gardon need_flush = 1; 7811d8dd6b3SBen Gardon } 7821d8dd6b3SBen Gardon 7831d8dd6b3SBen Gardon if (need_flush) 7841d8dd6b3SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); 7851d8dd6b3SBen Gardon 7861d8dd6b3SBen Gardon return 0; 7871d8dd6b3SBen Gardon } 7881d8dd6b3SBen Gardon 7891d8dd6b3SBen Gardon int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address, 7901d8dd6b3SBen Gardon pte_t *host_ptep) 7911d8dd6b3SBen Gardon { 7921d8dd6b3SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1, 7931d8dd6b3SBen Gardon (unsigned long)host_ptep, 7941d8dd6b3SBen Gardon set_tdp_spte); 7951d8dd6b3SBen Gardon } 7961d8dd6b3SBen Gardon 797a6a0b05dSBen Gardon /* 798a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs [start, end). If 799a6a0b05dSBen Gardon * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. 800a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 801a6a0b05dSBen Gardon */ 802a6a0b05dSBen Gardon static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 803a6a0b05dSBen Gardon gfn_t start, gfn_t end, int min_level) 804a6a0b05dSBen Gardon { 805a6a0b05dSBen Gardon struct tdp_iter iter; 806a6a0b05dSBen Gardon u64 new_spte; 807a6a0b05dSBen Gardon bool spte_set = false; 808a6a0b05dSBen Gardon 809a6a0b05dSBen Gardon BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); 810a6a0b05dSBen Gardon 811a6a0b05dSBen Gardon for_each_tdp_pte_min_level(iter, root->spt, root->role.level, 812a6a0b05dSBen Gardon min_level, start, end) { 813a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 814a6a0b05dSBen Gardon !is_last_spte(iter.old_spte, iter.level)) 815a6a0b05dSBen Gardon continue; 816a6a0b05dSBen Gardon 817a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 818a6a0b05dSBen Gardon 819a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 820a6a0b05dSBen Gardon spte_set = true; 821a6a0b05dSBen Gardon 822a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 823a6a0b05dSBen Gardon } 824a6a0b05dSBen Gardon return spte_set; 825a6a0b05dSBen Gardon } 826a6a0b05dSBen Gardon 827a6a0b05dSBen Gardon /* 828a6a0b05dSBen Gardon * Remove write access from all the SPTEs mapping GFNs in the memslot. Will 829a6a0b05dSBen Gardon * only affect leaf SPTEs down to min_level. 830a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 831a6a0b05dSBen Gardon */ 832a6a0b05dSBen Gardon bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, 833a6a0b05dSBen Gardon int min_level) 834a6a0b05dSBen Gardon { 835a6a0b05dSBen Gardon struct kvm_mmu_page *root; 836a6a0b05dSBen Gardon int root_as_id; 837a6a0b05dSBen Gardon bool spte_set = false; 838a6a0b05dSBen Gardon 839a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 840a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 841a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 842a6a0b05dSBen Gardon continue; 843a6a0b05dSBen Gardon 844a6a0b05dSBen Gardon /* 845a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 846a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 847a6a0b05dSBen Gardon */ 848a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 849a6a0b05dSBen Gardon 850a6a0b05dSBen Gardon spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, 851a6a0b05dSBen Gardon slot->base_gfn + slot->npages, min_level); 852a6a0b05dSBen Gardon 853a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 854a6a0b05dSBen Gardon } 855a6a0b05dSBen Gardon 856a6a0b05dSBen Gardon return spte_set; 857a6a0b05dSBen Gardon } 858a6a0b05dSBen Gardon 859a6a0b05dSBen Gardon /* 860a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 861a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 862a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 863a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 864a6a0b05dSBen Gardon * be flushed. 865a6a0b05dSBen Gardon */ 866a6a0b05dSBen Gardon static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 867a6a0b05dSBen Gardon gfn_t start, gfn_t end) 868a6a0b05dSBen Gardon { 869a6a0b05dSBen Gardon struct tdp_iter iter; 870a6a0b05dSBen Gardon u64 new_spte; 871a6a0b05dSBen Gardon bool spte_set = false; 872a6a0b05dSBen Gardon 873a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 874a6a0b05dSBen Gardon if (spte_ad_need_write_protect(iter.old_spte)) { 875a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 876a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 877a6a0b05dSBen Gardon else 878a6a0b05dSBen Gardon continue; 879a6a0b05dSBen Gardon } else { 880a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 881a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 882a6a0b05dSBen Gardon else 883a6a0b05dSBen Gardon continue; 884a6a0b05dSBen Gardon } 885a6a0b05dSBen Gardon 886a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 887a6a0b05dSBen Gardon spte_set = true; 888a6a0b05dSBen Gardon 889a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 890a6a0b05dSBen Gardon } 891a6a0b05dSBen Gardon return spte_set; 892a6a0b05dSBen Gardon } 893a6a0b05dSBen Gardon 894a6a0b05dSBen Gardon /* 895a6a0b05dSBen Gardon * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 896a6a0b05dSBen Gardon * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 897a6a0b05dSBen Gardon * If AD bits are not enabled, this will require clearing the writable bit on 898a6a0b05dSBen Gardon * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 899a6a0b05dSBen Gardon * be flushed. 900a6a0b05dSBen Gardon */ 901a6a0b05dSBen Gardon bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 902a6a0b05dSBen Gardon { 903a6a0b05dSBen Gardon struct kvm_mmu_page *root; 904a6a0b05dSBen Gardon int root_as_id; 905a6a0b05dSBen Gardon bool spte_set = false; 906a6a0b05dSBen Gardon 907a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 908a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 909a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 910a6a0b05dSBen Gardon continue; 911a6a0b05dSBen Gardon 912a6a0b05dSBen Gardon /* 913a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 914a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 915a6a0b05dSBen Gardon */ 916a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 917a6a0b05dSBen Gardon 918a6a0b05dSBen Gardon spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, 919a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 920a6a0b05dSBen Gardon 921a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 922a6a0b05dSBen Gardon } 923a6a0b05dSBen Gardon 924a6a0b05dSBen Gardon return spte_set; 925a6a0b05dSBen Gardon } 926a6a0b05dSBen Gardon 927a6a0b05dSBen Gardon /* 928a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 929a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 930a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 931a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 932a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 933a6a0b05dSBen Gardon */ 934a6a0b05dSBen Gardon static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, 935a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, bool wrprot) 936a6a0b05dSBen Gardon { 937a6a0b05dSBen Gardon struct tdp_iter iter; 938a6a0b05dSBen Gardon u64 new_spte; 939a6a0b05dSBen Gardon 940a6a0b05dSBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), 941a6a0b05dSBen Gardon gfn + BITS_PER_LONG) { 942a6a0b05dSBen Gardon if (!mask) 943a6a0b05dSBen Gardon break; 944a6a0b05dSBen Gardon 945a6a0b05dSBen Gardon if (iter.level > PG_LEVEL_4K || 946a6a0b05dSBen Gardon !(mask & (1UL << (iter.gfn - gfn)))) 947a6a0b05dSBen Gardon continue; 948a6a0b05dSBen Gardon 949a6a0b05dSBen Gardon if (wrprot || spte_ad_need_write_protect(iter.old_spte)) { 950a6a0b05dSBen Gardon if (is_writable_pte(iter.old_spte)) 951a6a0b05dSBen Gardon new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 952a6a0b05dSBen Gardon else 953a6a0b05dSBen Gardon continue; 954a6a0b05dSBen Gardon } else { 955a6a0b05dSBen Gardon if (iter.old_spte & shadow_dirty_mask) 956a6a0b05dSBen Gardon new_spte = iter.old_spte & ~shadow_dirty_mask; 957a6a0b05dSBen Gardon else 958a6a0b05dSBen Gardon continue; 959a6a0b05dSBen Gardon } 960a6a0b05dSBen Gardon 961a6a0b05dSBen Gardon tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 962a6a0b05dSBen Gardon 963a6a0b05dSBen Gardon mask &= ~(1UL << (iter.gfn - gfn)); 964a6a0b05dSBen Gardon } 965a6a0b05dSBen Gardon } 966a6a0b05dSBen Gardon 967a6a0b05dSBen Gardon /* 968a6a0b05dSBen Gardon * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 969a6a0b05dSBen Gardon * set in mask, starting at gfn. The given memslot is expected to contain all 970a6a0b05dSBen Gardon * the GFNs represented by set bits in the mask. If AD bits are enabled, 971a6a0b05dSBen Gardon * clearing the dirty status will involve clearing the dirty bit on each SPTE 972a6a0b05dSBen Gardon * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 973a6a0b05dSBen Gardon */ 974a6a0b05dSBen Gardon void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, 975a6a0b05dSBen Gardon struct kvm_memory_slot *slot, 976a6a0b05dSBen Gardon gfn_t gfn, unsigned long mask, 977a6a0b05dSBen Gardon bool wrprot) 978a6a0b05dSBen Gardon { 979a6a0b05dSBen Gardon struct kvm_mmu_page *root; 980a6a0b05dSBen Gardon int root_as_id; 981a6a0b05dSBen Gardon 982a6a0b05dSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 983a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 984a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 985a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 986a6a0b05dSBen Gardon continue; 987a6a0b05dSBen Gardon 988a6a0b05dSBen Gardon clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot); 989a6a0b05dSBen Gardon } 990a6a0b05dSBen Gardon } 991a6a0b05dSBen Gardon 992a6a0b05dSBen Gardon /* 993a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 994a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 995a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 996a6a0b05dSBen Gardon */ 997a6a0b05dSBen Gardon static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 998a6a0b05dSBen Gardon gfn_t start, gfn_t end) 999a6a0b05dSBen Gardon { 1000a6a0b05dSBen Gardon struct tdp_iter iter; 1001a6a0b05dSBen Gardon u64 new_spte; 1002a6a0b05dSBen Gardon bool spte_set = false; 1003a6a0b05dSBen Gardon 1004a6a0b05dSBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 1005a6a0b05dSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 1006a6a0b05dSBen Gardon continue; 1007a6a0b05dSBen Gardon 1008a6a0b05dSBen Gardon new_spte = iter.old_spte | shadow_dirty_mask; 1009a6a0b05dSBen Gardon 1010a6a0b05dSBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 1011a6a0b05dSBen Gardon spte_set = true; 1012a6a0b05dSBen Gardon 1013a6a0b05dSBen Gardon tdp_mmu_iter_cond_resched(kvm, &iter); 1014a6a0b05dSBen Gardon } 1015a6a0b05dSBen Gardon 1016a6a0b05dSBen Gardon return spte_set; 1017a6a0b05dSBen Gardon } 1018a6a0b05dSBen Gardon 1019a6a0b05dSBen Gardon /* 1020a6a0b05dSBen Gardon * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 1021a6a0b05dSBen Gardon * only used for PML, and so will involve setting the dirty bit on each SPTE. 1022a6a0b05dSBen Gardon * Returns true if an SPTE has been changed and the TLBs need to be flushed. 1023a6a0b05dSBen Gardon */ 1024a6a0b05dSBen Gardon bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) 1025a6a0b05dSBen Gardon { 1026a6a0b05dSBen Gardon struct kvm_mmu_page *root; 1027a6a0b05dSBen Gardon int root_as_id; 1028a6a0b05dSBen Gardon bool spte_set = false; 1029a6a0b05dSBen Gardon 1030a6a0b05dSBen Gardon for_each_tdp_mmu_root(kvm, root) { 1031a6a0b05dSBen Gardon root_as_id = kvm_mmu_page_as_id(root); 1032a6a0b05dSBen Gardon if (root_as_id != slot->as_id) 1033a6a0b05dSBen Gardon continue; 1034a6a0b05dSBen Gardon 1035a6a0b05dSBen Gardon /* 1036a6a0b05dSBen Gardon * Take a reference on the root so that it cannot be freed if 1037a6a0b05dSBen Gardon * this thread releases the MMU lock and yields in this loop. 1038a6a0b05dSBen Gardon */ 1039a6a0b05dSBen Gardon kvm_mmu_get_root(kvm, root); 1040a6a0b05dSBen Gardon 1041a6a0b05dSBen Gardon spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, 1042a6a0b05dSBen Gardon slot->base_gfn + slot->npages); 1043a6a0b05dSBen Gardon 1044a6a0b05dSBen Gardon kvm_mmu_put_root(kvm, root); 1045a6a0b05dSBen Gardon } 1046a6a0b05dSBen Gardon return spte_set; 1047a6a0b05dSBen Gardon } 1048a6a0b05dSBen Gardon 104914881998SBen Gardon /* 105014881998SBen Gardon * Clear non-leaf entries (and free associated page tables) which could 105114881998SBen Gardon * be replaced by large mappings, for GFNs within the slot. 105214881998SBen Gardon */ 105314881998SBen Gardon static void zap_collapsible_spte_range(struct kvm *kvm, 105414881998SBen Gardon struct kvm_mmu_page *root, 105514881998SBen Gardon gfn_t start, gfn_t end) 105614881998SBen Gardon { 105714881998SBen Gardon struct tdp_iter iter; 105814881998SBen Gardon kvm_pfn_t pfn; 105914881998SBen Gardon bool spte_set = false; 106014881998SBen Gardon 106114881998SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 106214881998SBen Gardon if (!is_shadow_present_pte(iter.old_spte) || 106314881998SBen Gardon is_last_spte(iter.old_spte, iter.level)) 106414881998SBen Gardon continue; 106514881998SBen Gardon 106614881998SBen Gardon pfn = spte_to_pfn(iter.old_spte); 106714881998SBen Gardon if (kvm_is_reserved_pfn(pfn) || 106814881998SBen Gardon !PageTransCompoundMap(pfn_to_page(pfn))) 106914881998SBen Gardon continue; 107014881998SBen Gardon 107114881998SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 107214881998SBen Gardon 107314881998SBen Gardon spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 107414881998SBen Gardon } 107514881998SBen Gardon 107614881998SBen Gardon if (spte_set) 107714881998SBen Gardon kvm_flush_remote_tlbs(kvm); 107814881998SBen Gardon } 107914881998SBen Gardon 108014881998SBen Gardon /* 108114881998SBen Gardon * Clear non-leaf entries (and free associated page tables) which could 108214881998SBen Gardon * be replaced by large mappings, for GFNs within the slot. 108314881998SBen Gardon */ 108414881998SBen Gardon void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, 108514881998SBen Gardon const struct kvm_memory_slot *slot) 108614881998SBen Gardon { 108714881998SBen Gardon struct kvm_mmu_page *root; 108814881998SBen Gardon int root_as_id; 108914881998SBen Gardon 109014881998SBen Gardon for_each_tdp_mmu_root(kvm, root) { 109114881998SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 109214881998SBen Gardon if (root_as_id != slot->as_id) 109314881998SBen Gardon continue; 109414881998SBen Gardon 109514881998SBen Gardon /* 109614881998SBen Gardon * Take a reference on the root so that it cannot be freed if 109714881998SBen Gardon * this thread releases the MMU lock and yields in this loop. 109814881998SBen Gardon */ 109914881998SBen Gardon kvm_mmu_get_root(kvm, root); 110014881998SBen Gardon 110114881998SBen Gardon zap_collapsible_spte_range(kvm, root, slot->base_gfn, 110214881998SBen Gardon slot->base_gfn + slot->npages); 110314881998SBen Gardon 110414881998SBen Gardon kvm_mmu_put_root(kvm, root); 110514881998SBen Gardon } 110614881998SBen Gardon } 110746044f72SBen Gardon 110846044f72SBen Gardon /* 110946044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 111046044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 111146044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 111246044f72SBen Gardon */ 111346044f72SBen Gardon static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, 111446044f72SBen Gardon gfn_t gfn) 111546044f72SBen Gardon { 111646044f72SBen Gardon struct tdp_iter iter; 111746044f72SBen Gardon u64 new_spte; 111846044f72SBen Gardon bool spte_set = false; 111946044f72SBen Gardon 112046044f72SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { 112146044f72SBen Gardon if (!is_writable_pte(iter.old_spte)) 112246044f72SBen Gardon break; 112346044f72SBen Gardon 112446044f72SBen Gardon new_spte = iter.old_spte & 112546044f72SBen Gardon ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); 112646044f72SBen Gardon 112746044f72SBen Gardon tdp_mmu_set_spte(kvm, &iter, new_spte); 112846044f72SBen Gardon spte_set = true; 112946044f72SBen Gardon } 113046044f72SBen Gardon 113146044f72SBen Gardon return spte_set; 113246044f72SBen Gardon } 113346044f72SBen Gardon 113446044f72SBen Gardon /* 113546044f72SBen Gardon * Removes write access on the last level SPTE mapping this GFN and unsets the 113646044f72SBen Gardon * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 113746044f72SBen Gardon * Returns true if an SPTE was set and a TLB flush is needed. 113846044f72SBen Gardon */ 113946044f72SBen Gardon bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, 114046044f72SBen Gardon struct kvm_memory_slot *slot, gfn_t gfn) 114146044f72SBen Gardon { 114246044f72SBen Gardon struct kvm_mmu_page *root; 114346044f72SBen Gardon int root_as_id; 114446044f72SBen Gardon bool spte_set = false; 114546044f72SBen Gardon 114646044f72SBen Gardon lockdep_assert_held(&kvm->mmu_lock); 114746044f72SBen Gardon for_each_tdp_mmu_root(kvm, root) { 114846044f72SBen Gardon root_as_id = kvm_mmu_page_as_id(root); 114946044f72SBen Gardon if (root_as_id != slot->as_id) 115046044f72SBen Gardon continue; 115146044f72SBen Gardon 115246044f72SBen Gardon spte_set |= write_protect_gfn(kvm, root, gfn); 115346044f72SBen Gardon } 115446044f72SBen Gardon return spte_set; 115546044f72SBen Gardon } 115646044f72SBen Gardon 115795fb5b02SBen Gardon /* 115895fb5b02SBen Gardon * Return the level of the lowest level SPTE added to sptes. 115995fb5b02SBen Gardon * That SPTE may be non-present. 116095fb5b02SBen Gardon */ 116195fb5b02SBen Gardon int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) 116295fb5b02SBen Gardon { 116395fb5b02SBen Gardon struct tdp_iter iter; 116495fb5b02SBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 116595fb5b02SBen Gardon int leaf = vcpu->arch.mmu->shadow_root_level; 116695fb5b02SBen Gardon gfn_t gfn = addr >> PAGE_SHIFT; 116795fb5b02SBen Gardon 116895fb5b02SBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 116995fb5b02SBen Gardon leaf = iter.level; 117095fb5b02SBen Gardon sptes[leaf - 1] = iter.old_spte; 117195fb5b02SBen Gardon } 117295fb5b02SBen Gardon 117395fb5b02SBen Gardon return leaf; 117495fb5b02SBen Gardon } 1175