1fe5db27dSBen Gardon // SPDX-License-Identifier: GPL-2.0 2fe5db27dSBen Gardon 302c00b3aSBen Gardon #include "mmu.h" 402c00b3aSBen Gardon #include "mmu_internal.h" 5bb18842eSBen Gardon #include "mmutrace.h" 62f2fad08SBen Gardon #include "tdp_iter.h" 7fe5db27dSBen Gardon #include "tdp_mmu.h" 802c00b3aSBen Gardon #include "spte.h" 9fe5db27dSBen Gardon 10fe5db27dSBen Gardon static bool __read_mostly tdp_mmu_enabled = false; 11fe5db27dSBen Gardon 12fe5db27dSBen Gardon static bool is_tdp_mmu_enabled(void) 13fe5db27dSBen Gardon { 14fe5db27dSBen Gardon #ifdef CONFIG_X86_64 15fe5db27dSBen Gardon return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 16fe5db27dSBen Gardon #else 17fe5db27dSBen Gardon return false; 18fe5db27dSBen Gardon #endif /* CONFIG_X86_64 */ 19fe5db27dSBen Gardon } 20fe5db27dSBen Gardon 21fe5db27dSBen Gardon /* Initializes the TDP MMU for the VM, if enabled. */ 22fe5db27dSBen Gardon void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 23fe5db27dSBen Gardon { 24fe5db27dSBen Gardon if (!is_tdp_mmu_enabled()) 25fe5db27dSBen Gardon return; 26fe5db27dSBen Gardon 27fe5db27dSBen Gardon /* This should not be changed for the lifetime of the VM. */ 28fe5db27dSBen Gardon kvm->arch.tdp_mmu_enabled = true; 2902c00b3aSBen Gardon 3002c00b3aSBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 3189c0fd49SBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 32fe5db27dSBen Gardon } 33fe5db27dSBen Gardon 34fe5db27dSBen Gardon void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 35fe5db27dSBen Gardon { 36fe5db27dSBen Gardon if (!kvm->arch.tdp_mmu_enabled) 37fe5db27dSBen Gardon return; 3802c00b3aSBen Gardon 3902c00b3aSBen Gardon WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 4002c00b3aSBen Gardon } 4102c00b3aSBen Gardon 4202c00b3aSBen Gardon #define for_each_tdp_mmu_root(_kvm, _root) \ 4302c00b3aSBen Gardon list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 4402c00b3aSBen Gardon 4502c00b3aSBen Gardon bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 4602c00b3aSBen Gardon { 4702c00b3aSBen Gardon struct kvm_mmu_page *sp; 4802c00b3aSBen Gardon 4902c00b3aSBen Gardon sp = to_shadow_page(hpa); 5002c00b3aSBen Gardon 5102c00b3aSBen Gardon return sp->tdp_mmu_page && sp->root_count; 5202c00b3aSBen Gardon } 5302c00b3aSBen Gardon 54faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 55063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield); 56faaf05b0SBen Gardon 5702c00b3aSBen Gardon void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 5802c00b3aSBen Gardon { 59faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 60faaf05b0SBen Gardon 6102c00b3aSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 6202c00b3aSBen Gardon 6302c00b3aSBen Gardon WARN_ON(root->root_count); 6402c00b3aSBen Gardon WARN_ON(!root->tdp_mmu_page); 6502c00b3aSBen Gardon 6602c00b3aSBen Gardon list_del(&root->link); 6702c00b3aSBen Gardon 68063afacdSBen Gardon zap_gfn_range(kvm, root, 0, max_gfn, false); 69faaf05b0SBen Gardon 7002c00b3aSBen Gardon free_page((unsigned long)root->spt); 7102c00b3aSBen Gardon kmem_cache_free(mmu_page_header_cache, root); 7202c00b3aSBen Gardon } 7302c00b3aSBen Gardon 7402c00b3aSBen Gardon static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 7502c00b3aSBen Gardon int level) 7602c00b3aSBen Gardon { 7702c00b3aSBen Gardon union kvm_mmu_page_role role; 7802c00b3aSBen Gardon 7902c00b3aSBen Gardon role = vcpu->arch.mmu->mmu_role.base; 8002c00b3aSBen Gardon role.level = level; 8102c00b3aSBen Gardon role.direct = true; 8202c00b3aSBen Gardon role.gpte_is_8_bytes = true; 8302c00b3aSBen Gardon role.access = ACC_ALL; 8402c00b3aSBen Gardon 8502c00b3aSBen Gardon return role; 8602c00b3aSBen Gardon } 8702c00b3aSBen Gardon 8802c00b3aSBen Gardon static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 8902c00b3aSBen Gardon int level) 9002c00b3aSBen Gardon { 9102c00b3aSBen Gardon struct kvm_mmu_page *sp; 9202c00b3aSBen Gardon 9302c00b3aSBen Gardon sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 9402c00b3aSBen Gardon sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 9502c00b3aSBen Gardon set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 9602c00b3aSBen Gardon 9702c00b3aSBen Gardon sp->role.word = page_role_for_level(vcpu, level).word; 9802c00b3aSBen Gardon sp->gfn = gfn; 9902c00b3aSBen Gardon sp->tdp_mmu_page = true; 10002c00b3aSBen Gardon 10102c00b3aSBen Gardon return sp; 10202c00b3aSBen Gardon } 10302c00b3aSBen Gardon 10402c00b3aSBen Gardon static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 10502c00b3aSBen Gardon { 10602c00b3aSBen Gardon union kvm_mmu_page_role role; 10702c00b3aSBen Gardon struct kvm *kvm = vcpu->kvm; 10802c00b3aSBen Gardon struct kvm_mmu_page *root; 10902c00b3aSBen Gardon 11002c00b3aSBen Gardon role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 11102c00b3aSBen Gardon 11202c00b3aSBen Gardon spin_lock(&kvm->mmu_lock); 11302c00b3aSBen Gardon 11402c00b3aSBen Gardon /* Check for an existing root before allocating a new one. */ 11502c00b3aSBen Gardon for_each_tdp_mmu_root(kvm, root) { 11602c00b3aSBen Gardon if (root->role.word == role.word) { 11702c00b3aSBen Gardon kvm_mmu_get_root(kvm, root); 11802c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 11902c00b3aSBen Gardon return root; 12002c00b3aSBen Gardon } 12102c00b3aSBen Gardon } 12202c00b3aSBen Gardon 12302c00b3aSBen Gardon root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 12402c00b3aSBen Gardon root->root_count = 1; 12502c00b3aSBen Gardon 12602c00b3aSBen Gardon list_add(&root->link, &kvm->arch.tdp_mmu_roots); 12702c00b3aSBen Gardon 12802c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 12902c00b3aSBen Gardon 13002c00b3aSBen Gardon return root; 13102c00b3aSBen Gardon } 13202c00b3aSBen Gardon 13302c00b3aSBen Gardon hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 13402c00b3aSBen Gardon { 13502c00b3aSBen Gardon struct kvm_mmu_page *root; 13602c00b3aSBen Gardon 13702c00b3aSBen Gardon root = get_tdp_mmu_vcpu_root(vcpu); 13802c00b3aSBen Gardon if (!root) 13902c00b3aSBen Gardon return INVALID_PAGE; 14002c00b3aSBen Gardon 14102c00b3aSBen Gardon return __pa(root->spt); 142fe5db27dSBen Gardon } 1432f2fad08SBen Gardon 1442f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1452f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level); 1462f2fad08SBen Gardon 147faaf05b0SBen Gardon static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 148faaf05b0SBen Gardon { 149faaf05b0SBen Gardon return sp->role.smm ? 1 : 0; 150faaf05b0SBen Gardon } 151faaf05b0SBen Gardon 152f8e14497SBen Gardon static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 153f8e14497SBen Gardon { 154f8e14497SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 155f8e14497SBen Gardon 156f8e14497SBen Gardon if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level)) 157f8e14497SBen Gardon return; 158f8e14497SBen Gardon 159f8e14497SBen Gardon if (is_accessed_spte(old_spte) && 160f8e14497SBen Gardon (!is_accessed_spte(new_spte) || pfn_changed)) 161f8e14497SBen Gardon kvm_set_pfn_accessed(spte_to_pfn(old_spte)); 162f8e14497SBen Gardon } 163f8e14497SBen Gardon 1642f2fad08SBen Gardon /** 1652f2fad08SBen Gardon * handle_changed_spte - handle bookkeeping associated with an SPTE change 1662f2fad08SBen Gardon * @kvm: kvm instance 1672f2fad08SBen Gardon * @as_id: the address space of the paging structure the SPTE was a part of 1682f2fad08SBen Gardon * @gfn: the base GFN that was mapped by the SPTE 1692f2fad08SBen Gardon * @old_spte: The value of the SPTE before the change 1702f2fad08SBen Gardon * @new_spte: The value of the SPTE after the change 1712f2fad08SBen Gardon * @level: the level of the PT the SPTE is part of in the paging structure 1722f2fad08SBen Gardon * 1732f2fad08SBen Gardon * Handle bookkeeping that might result from the modification of a SPTE. 1742f2fad08SBen Gardon * This function must be called for all TDP SPTE modifications. 1752f2fad08SBen Gardon */ 1762f2fad08SBen Gardon static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1772f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 1782f2fad08SBen Gardon { 1792f2fad08SBen Gardon bool was_present = is_shadow_present_pte(old_spte); 1802f2fad08SBen Gardon bool is_present = is_shadow_present_pte(new_spte); 1812f2fad08SBen Gardon bool was_leaf = was_present && is_last_spte(old_spte, level); 1822f2fad08SBen Gardon bool is_leaf = is_present && is_last_spte(new_spte, level); 1832f2fad08SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 1842f2fad08SBen Gardon u64 *pt; 18589c0fd49SBen Gardon struct kvm_mmu_page *sp; 1862f2fad08SBen Gardon u64 old_child_spte; 1872f2fad08SBen Gardon int i; 1882f2fad08SBen Gardon 1892f2fad08SBen Gardon WARN_ON(level > PT64_ROOT_MAX_LEVEL); 1902f2fad08SBen Gardon WARN_ON(level < PG_LEVEL_4K); 1912f2fad08SBen Gardon WARN_ON(gfn % KVM_PAGES_PER_HPAGE(level)); 1922f2fad08SBen Gardon 1932f2fad08SBen Gardon /* 1942f2fad08SBen Gardon * If this warning were to trigger it would indicate that there was a 1952f2fad08SBen Gardon * missing MMU notifier or a race with some notifier handler. 1962f2fad08SBen Gardon * A present, leaf SPTE should never be directly replaced with another 1972f2fad08SBen Gardon * present leaf SPTE pointing to a differnt PFN. A notifier handler 1982f2fad08SBen Gardon * should be zapping the SPTE before the main MM's page table is 1992f2fad08SBen Gardon * changed, or the SPTE should be zeroed, and the TLBs flushed by the 2002f2fad08SBen Gardon * thread before replacement. 2012f2fad08SBen Gardon */ 2022f2fad08SBen Gardon if (was_leaf && is_leaf && pfn_changed) { 2032f2fad08SBen Gardon pr_err("Invalid SPTE change: cannot replace a present leaf\n" 2042f2fad08SBen Gardon "SPTE with another present leaf SPTE mapping a\n" 2052f2fad08SBen Gardon "different PFN!\n" 2062f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2072f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2082f2fad08SBen Gardon 2092f2fad08SBen Gardon /* 2102f2fad08SBen Gardon * Crash the host to prevent error propagation and guest data 2112f2fad08SBen Gardon * courruption. 2122f2fad08SBen Gardon */ 2132f2fad08SBen Gardon BUG(); 2142f2fad08SBen Gardon } 2152f2fad08SBen Gardon 2162f2fad08SBen Gardon if (old_spte == new_spte) 2172f2fad08SBen Gardon return; 2182f2fad08SBen Gardon 2192f2fad08SBen Gardon /* 2202f2fad08SBen Gardon * The only times a SPTE should be changed from a non-present to 2212f2fad08SBen Gardon * non-present state is when an MMIO entry is installed/modified/ 2222f2fad08SBen Gardon * removed. In that case, there is nothing to do here. 2232f2fad08SBen Gardon */ 2242f2fad08SBen Gardon if (!was_present && !is_present) { 2252f2fad08SBen Gardon /* 2262f2fad08SBen Gardon * If this change does not involve a MMIO SPTE, it is 2272f2fad08SBen Gardon * unexpected. Log the change, though it should not impact the 2282f2fad08SBen Gardon * guest since both the former and current SPTEs are nonpresent. 2292f2fad08SBen Gardon */ 2302f2fad08SBen Gardon if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 2312f2fad08SBen Gardon pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 2322f2fad08SBen Gardon "should not be replaced with another,\n" 2332f2fad08SBen Gardon "different nonpresent SPTE, unless one or both\n" 2342f2fad08SBen Gardon "are MMIO SPTEs.\n" 2352f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2362f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2372f2fad08SBen Gardon return; 2382f2fad08SBen Gardon } 2392f2fad08SBen Gardon 2402f2fad08SBen Gardon 2412f2fad08SBen Gardon if (was_leaf && is_dirty_spte(old_spte) && 2422f2fad08SBen Gardon (!is_dirty_spte(new_spte) || pfn_changed)) 2432f2fad08SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 2442f2fad08SBen Gardon 2452f2fad08SBen Gardon /* 2462f2fad08SBen Gardon * Recursively handle child PTs if the change removed a subtree from 2472f2fad08SBen Gardon * the paging structure. 2482f2fad08SBen Gardon */ 2492f2fad08SBen Gardon if (was_present && !was_leaf && (pfn_changed || !is_present)) { 2502f2fad08SBen Gardon pt = spte_to_child_pt(old_spte, level); 25189c0fd49SBen Gardon sp = sptep_to_sp(pt); 25289c0fd49SBen Gardon 25389c0fd49SBen Gardon list_del(&sp->link); 2542f2fad08SBen Gardon 2552f2fad08SBen Gardon for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 2562f2fad08SBen Gardon old_child_spte = READ_ONCE(*(pt + i)); 2572f2fad08SBen Gardon WRITE_ONCE(*(pt + i), 0); 2582f2fad08SBen Gardon handle_changed_spte(kvm, as_id, 2592f2fad08SBen Gardon gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 2602f2fad08SBen Gardon old_child_spte, 0, level - 1); 2612f2fad08SBen Gardon } 2622f2fad08SBen Gardon 2632f2fad08SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 2642f2fad08SBen Gardon KVM_PAGES_PER_HPAGE(level)); 2652f2fad08SBen Gardon 2662f2fad08SBen Gardon free_page((unsigned long)pt); 26789c0fd49SBen Gardon kmem_cache_free(mmu_page_header_cache, sp); 2682f2fad08SBen Gardon } 2692f2fad08SBen Gardon } 2702f2fad08SBen Gardon 2712f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2722f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 2732f2fad08SBen Gardon { 2742f2fad08SBen Gardon __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 275f8e14497SBen Gardon handle_changed_spte_acc_track(old_spte, new_spte, level); 2762f2fad08SBen Gardon } 277faaf05b0SBen Gardon 278f8e14497SBen Gardon static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 279f8e14497SBen Gardon u64 new_spte, bool record_acc_track) 280faaf05b0SBen Gardon { 281faaf05b0SBen Gardon u64 *root_pt = tdp_iter_root_pt(iter); 282faaf05b0SBen Gardon struct kvm_mmu_page *root = sptep_to_sp(root_pt); 283faaf05b0SBen Gardon int as_id = kvm_mmu_page_as_id(root); 284faaf05b0SBen Gardon 285f8e14497SBen Gardon WRITE_ONCE(*iter->sptep, new_spte); 286faaf05b0SBen Gardon 287f8e14497SBen Gardon __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 288faaf05b0SBen Gardon iter->level); 289f8e14497SBen Gardon if (record_acc_track) 290f8e14497SBen Gardon handle_changed_spte_acc_track(iter->old_spte, new_spte, 291f8e14497SBen Gardon iter->level); 292f8e14497SBen Gardon } 293f8e14497SBen Gardon 294f8e14497SBen Gardon static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 295f8e14497SBen Gardon u64 new_spte) 296f8e14497SBen Gardon { 297f8e14497SBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, true); 298f8e14497SBen Gardon } 299f8e14497SBen Gardon 300f8e14497SBen Gardon static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm, 301f8e14497SBen Gardon struct tdp_iter *iter, 302f8e14497SBen Gardon u64 new_spte) 303f8e14497SBen Gardon { 304f8e14497SBen Gardon __tdp_mmu_set_spte(kvm, iter, new_spte, false); 305faaf05b0SBen Gardon } 306faaf05b0SBen Gardon 307faaf05b0SBen Gardon #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 308faaf05b0SBen Gardon for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 309faaf05b0SBen Gardon 310f8e14497SBen Gardon #define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ 311f8e14497SBen Gardon tdp_root_for_each_pte(_iter, _root, _start, _end) \ 312f8e14497SBen Gardon if (!is_shadow_present_pte(_iter.old_spte) || \ 313f8e14497SBen Gardon !is_last_spte(_iter.old_spte, _iter.level)) \ 314f8e14497SBen Gardon continue; \ 315f8e14497SBen Gardon else 316f8e14497SBen Gardon 317bb18842eSBen Gardon #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 318bb18842eSBen Gardon for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 319bb18842eSBen Gardon _mmu->shadow_root_level, _start, _end) 320bb18842eSBen Gardon 321faaf05b0SBen Gardon /* 322faaf05b0SBen Gardon * Flush the TLB if the process should drop kvm->mmu_lock. 323faaf05b0SBen Gardon * Return whether the caller still needs to flush the tlb. 324faaf05b0SBen Gardon */ 325faaf05b0SBen Gardon static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 326faaf05b0SBen Gardon { 327faaf05b0SBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 328faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 329faaf05b0SBen Gardon cond_resched_lock(&kvm->mmu_lock); 330faaf05b0SBen Gardon tdp_iter_refresh_walk(iter); 331faaf05b0SBen Gardon return false; 332faaf05b0SBen Gardon } else { 333faaf05b0SBen Gardon return true; 334faaf05b0SBen Gardon } 335faaf05b0SBen Gardon } 336faaf05b0SBen Gardon 337faaf05b0SBen Gardon /* 338faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 339faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 340faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 341faaf05b0SBen Gardon * MMU lock. 342063afacdSBen Gardon * If can_yield is true, will release the MMU lock and reschedule if the 343063afacdSBen Gardon * scheduler needs the CPU or there is contention on the MMU lock. If this 344063afacdSBen Gardon * function cannot yield, it will not release the MMU lock or reschedule and 345063afacdSBen Gardon * the caller must ensure it does not supply too large a GFN range, or the 346063afacdSBen Gardon * operation can cause a soft lockup. 347faaf05b0SBen Gardon */ 348faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 349063afacdSBen Gardon gfn_t start, gfn_t end, bool can_yield) 350faaf05b0SBen Gardon { 351faaf05b0SBen Gardon struct tdp_iter iter; 352faaf05b0SBen Gardon bool flush_needed = false; 353faaf05b0SBen Gardon 354faaf05b0SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 355faaf05b0SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 356faaf05b0SBen Gardon continue; 357faaf05b0SBen Gardon 358faaf05b0SBen Gardon /* 359faaf05b0SBen Gardon * If this is a non-last-level SPTE that covers a larger range 360faaf05b0SBen Gardon * than should be zapped, continue, and zap the mappings at a 361faaf05b0SBen Gardon * lower level. 362faaf05b0SBen Gardon */ 363faaf05b0SBen Gardon if ((iter.gfn < start || 364faaf05b0SBen Gardon iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 365faaf05b0SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 366faaf05b0SBen Gardon continue; 367faaf05b0SBen Gardon 368faaf05b0SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 369faaf05b0SBen Gardon 370063afacdSBen Gardon if (can_yield) 371faaf05b0SBen Gardon flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 372063afacdSBen Gardon else 373063afacdSBen Gardon flush_needed = true; 374faaf05b0SBen Gardon } 375faaf05b0SBen Gardon return flush_needed; 376faaf05b0SBen Gardon } 377faaf05b0SBen Gardon 378faaf05b0SBen Gardon /* 379faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 380faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 381faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 382faaf05b0SBen Gardon * MMU lock. 383faaf05b0SBen Gardon */ 384faaf05b0SBen Gardon bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) 385faaf05b0SBen Gardon { 386faaf05b0SBen Gardon struct kvm_mmu_page *root; 387faaf05b0SBen Gardon bool flush = false; 388faaf05b0SBen Gardon 389faaf05b0SBen Gardon for_each_tdp_mmu_root(kvm, root) { 390faaf05b0SBen Gardon /* 391faaf05b0SBen Gardon * Take a reference on the root so that it cannot be freed if 392faaf05b0SBen Gardon * this thread releases the MMU lock and yields in this loop. 393faaf05b0SBen Gardon */ 394faaf05b0SBen Gardon kvm_mmu_get_root(kvm, root); 395faaf05b0SBen Gardon 396063afacdSBen Gardon flush |= zap_gfn_range(kvm, root, start, end, true); 397faaf05b0SBen Gardon 398faaf05b0SBen Gardon kvm_mmu_put_root(kvm, root); 399faaf05b0SBen Gardon } 400faaf05b0SBen Gardon 401faaf05b0SBen Gardon return flush; 402faaf05b0SBen Gardon } 403faaf05b0SBen Gardon 404faaf05b0SBen Gardon void kvm_tdp_mmu_zap_all(struct kvm *kvm) 405faaf05b0SBen Gardon { 406faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 407faaf05b0SBen Gardon bool flush; 408faaf05b0SBen Gardon 409faaf05b0SBen Gardon flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 410faaf05b0SBen Gardon if (flush) 411faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 412faaf05b0SBen Gardon } 413bb18842eSBen Gardon 414bb18842eSBen Gardon /* 415bb18842eSBen Gardon * Installs a last-level SPTE to handle a TDP page fault. 416bb18842eSBen Gardon * (NPT/EPT violation/misconfiguration) 417bb18842eSBen Gardon */ 418bb18842eSBen Gardon static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 419bb18842eSBen Gardon int map_writable, 420bb18842eSBen Gardon struct tdp_iter *iter, 421bb18842eSBen Gardon kvm_pfn_t pfn, bool prefault) 422bb18842eSBen Gardon { 423bb18842eSBen Gardon u64 new_spte; 424bb18842eSBen Gardon int ret = 0; 425bb18842eSBen Gardon int make_spte_ret = 0; 426bb18842eSBen Gardon 427bb18842eSBen Gardon if (unlikely(is_noslot_pfn(pfn))) { 428bb18842eSBen Gardon new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 429bb18842eSBen Gardon trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 430bb18842eSBen Gardon } else 431bb18842eSBen Gardon make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 432bb18842eSBen Gardon pfn, iter->old_spte, prefault, true, 433bb18842eSBen Gardon map_writable, !shadow_accessed_mask, 434bb18842eSBen Gardon &new_spte); 435bb18842eSBen Gardon 436bb18842eSBen Gardon if (new_spte == iter->old_spte) 437bb18842eSBen Gardon ret = RET_PF_SPURIOUS; 438bb18842eSBen Gardon else 439bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 440bb18842eSBen Gardon 441bb18842eSBen Gardon /* 442bb18842eSBen Gardon * If the page fault was caused by a write but the page is write 443bb18842eSBen Gardon * protected, emulation is needed. If the emulation was skipped, 444bb18842eSBen Gardon * the vCPU would have the same fault again. 445bb18842eSBen Gardon */ 446bb18842eSBen Gardon if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 447bb18842eSBen Gardon if (write) 448bb18842eSBen Gardon ret = RET_PF_EMULATE; 449bb18842eSBen Gardon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 450bb18842eSBen Gardon } 451bb18842eSBen Gardon 452bb18842eSBen Gardon /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 453bb18842eSBen Gardon if (unlikely(is_mmio_spte(new_spte))) 454bb18842eSBen Gardon ret = RET_PF_EMULATE; 455bb18842eSBen Gardon 456bb18842eSBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 457bb18842eSBen Gardon if (!prefault) 458bb18842eSBen Gardon vcpu->stat.pf_fixed++; 459bb18842eSBen Gardon 460bb18842eSBen Gardon return ret; 461bb18842eSBen Gardon } 462bb18842eSBen Gardon 463bb18842eSBen Gardon /* 464bb18842eSBen Gardon * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 465bb18842eSBen Gardon * page tables and SPTEs to translate the faulting guest physical address. 466bb18842eSBen Gardon */ 467bb18842eSBen Gardon int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 468bb18842eSBen Gardon int map_writable, int max_level, kvm_pfn_t pfn, 469bb18842eSBen Gardon bool prefault) 470bb18842eSBen Gardon { 471bb18842eSBen Gardon bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 472bb18842eSBen Gardon bool write = error_code & PFERR_WRITE_MASK; 473bb18842eSBen Gardon bool exec = error_code & PFERR_FETCH_MASK; 474bb18842eSBen Gardon bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 475bb18842eSBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 476bb18842eSBen Gardon struct tdp_iter iter; 47789c0fd49SBen Gardon struct kvm_mmu_page *sp; 478bb18842eSBen Gardon u64 *child_pt; 479bb18842eSBen Gardon u64 new_spte; 480bb18842eSBen Gardon int ret; 481bb18842eSBen Gardon gfn_t gfn = gpa >> PAGE_SHIFT; 482bb18842eSBen Gardon int level; 483bb18842eSBen Gardon int req_level; 484bb18842eSBen Gardon 485bb18842eSBen Gardon if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 486bb18842eSBen Gardon return RET_PF_RETRY; 487bb18842eSBen Gardon if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 488bb18842eSBen Gardon return RET_PF_RETRY; 489bb18842eSBen Gardon 490bb18842eSBen Gardon level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 491bb18842eSBen Gardon huge_page_disallowed, &req_level); 492bb18842eSBen Gardon 493bb18842eSBen Gardon trace_kvm_mmu_spte_requested(gpa, level, pfn); 494bb18842eSBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 495bb18842eSBen Gardon if (nx_huge_page_workaround_enabled) 496bb18842eSBen Gardon disallowed_hugepage_adjust(iter.old_spte, gfn, 497bb18842eSBen Gardon iter.level, &pfn, &level); 498bb18842eSBen Gardon 499bb18842eSBen Gardon if (iter.level == level) 500bb18842eSBen Gardon break; 501bb18842eSBen Gardon 502bb18842eSBen Gardon /* 503bb18842eSBen Gardon * If there is an SPTE mapping a large page at a higher level 504bb18842eSBen Gardon * than the target, that SPTE must be cleared and replaced 505bb18842eSBen Gardon * with a non-leaf SPTE. 506bb18842eSBen Gardon */ 507bb18842eSBen Gardon if (is_shadow_present_pte(iter.old_spte) && 508bb18842eSBen Gardon is_large_pte(iter.old_spte)) { 509bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 510bb18842eSBen Gardon 511bb18842eSBen Gardon kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 512bb18842eSBen Gardon KVM_PAGES_PER_HPAGE(iter.level)); 513bb18842eSBen Gardon 514bb18842eSBen Gardon /* 515bb18842eSBen Gardon * The iter must explicitly re-read the spte here 516bb18842eSBen Gardon * because the new value informs the !present 517bb18842eSBen Gardon * path below. 518bb18842eSBen Gardon */ 519bb18842eSBen Gardon iter.old_spte = READ_ONCE(*iter.sptep); 520bb18842eSBen Gardon } 521bb18842eSBen Gardon 522bb18842eSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) { 52389c0fd49SBen Gardon sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 52489c0fd49SBen Gardon list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 52589c0fd49SBen Gardon child_pt = sp->spt; 526bb18842eSBen Gardon clear_page(child_pt); 527bb18842eSBen Gardon new_spte = make_nonleaf_spte(child_pt, 528bb18842eSBen Gardon !shadow_accessed_mask); 529bb18842eSBen Gardon 530bb18842eSBen Gardon trace_kvm_mmu_get_page(sp, true); 531bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 532bb18842eSBen Gardon } 533bb18842eSBen Gardon } 534bb18842eSBen Gardon 535bb18842eSBen Gardon if (WARN_ON(iter.level != level)) 536bb18842eSBen Gardon return RET_PF_RETRY; 537bb18842eSBen Gardon 538bb18842eSBen Gardon ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 539bb18842eSBen Gardon pfn, prefault); 540bb18842eSBen Gardon 541bb18842eSBen Gardon return ret; 542bb18842eSBen Gardon } 543063afacdSBen Gardon 544063afacdSBen Gardon static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, 545063afacdSBen Gardon unsigned long end, unsigned long data, 546063afacdSBen Gardon int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, 547063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 548063afacdSBen Gardon gfn_t end, unsigned long data)) 549063afacdSBen Gardon { 550063afacdSBen Gardon struct kvm_memslots *slots; 551063afacdSBen Gardon struct kvm_memory_slot *memslot; 552063afacdSBen Gardon struct kvm_mmu_page *root; 553063afacdSBen Gardon int ret = 0; 554063afacdSBen Gardon int as_id; 555063afacdSBen Gardon 556063afacdSBen Gardon for_each_tdp_mmu_root(kvm, root) { 557063afacdSBen Gardon /* 558063afacdSBen Gardon * Take a reference on the root so that it cannot be freed if 559063afacdSBen Gardon * this thread releases the MMU lock and yields in this loop. 560063afacdSBen Gardon */ 561063afacdSBen Gardon kvm_mmu_get_root(kvm, root); 562063afacdSBen Gardon 563063afacdSBen Gardon as_id = kvm_mmu_page_as_id(root); 564063afacdSBen Gardon slots = __kvm_memslots(kvm, as_id); 565063afacdSBen Gardon kvm_for_each_memslot(memslot, slots) { 566063afacdSBen Gardon unsigned long hva_start, hva_end; 567063afacdSBen Gardon gfn_t gfn_start, gfn_end; 568063afacdSBen Gardon 569063afacdSBen Gardon hva_start = max(start, memslot->userspace_addr); 570063afacdSBen Gardon hva_end = min(end, memslot->userspace_addr + 571063afacdSBen Gardon (memslot->npages << PAGE_SHIFT)); 572063afacdSBen Gardon if (hva_start >= hva_end) 573063afacdSBen Gardon continue; 574063afacdSBen Gardon /* 575063afacdSBen Gardon * {gfn(page) | page intersects with [hva_start, hva_end)} = 576063afacdSBen Gardon * {gfn_start, gfn_start+1, ..., gfn_end-1}. 577063afacdSBen Gardon */ 578063afacdSBen Gardon gfn_start = hva_to_gfn_memslot(hva_start, memslot); 579063afacdSBen Gardon gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 580063afacdSBen Gardon 581063afacdSBen Gardon ret |= handler(kvm, memslot, root, gfn_start, 582063afacdSBen Gardon gfn_end, data); 583063afacdSBen Gardon } 584063afacdSBen Gardon 585063afacdSBen Gardon kvm_mmu_put_root(kvm, root); 586063afacdSBen Gardon } 587063afacdSBen Gardon 588063afacdSBen Gardon return ret; 589063afacdSBen Gardon } 590063afacdSBen Gardon 591063afacdSBen Gardon static int zap_gfn_range_hva_wrapper(struct kvm *kvm, 592063afacdSBen Gardon struct kvm_memory_slot *slot, 593063afacdSBen Gardon struct kvm_mmu_page *root, gfn_t start, 594063afacdSBen Gardon gfn_t end, unsigned long unused) 595063afacdSBen Gardon { 596063afacdSBen Gardon return zap_gfn_range(kvm, root, start, end, false); 597063afacdSBen Gardon } 598063afacdSBen Gardon 599063afacdSBen Gardon int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, 600063afacdSBen Gardon unsigned long end) 601063afacdSBen Gardon { 602063afacdSBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 603063afacdSBen Gardon zap_gfn_range_hva_wrapper); 604063afacdSBen Gardon } 605f8e14497SBen Gardon 606f8e14497SBen Gardon /* 607f8e14497SBen Gardon * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero 608f8e14497SBen Gardon * if any of the GFNs in the range have been accessed. 609f8e14497SBen Gardon */ 610f8e14497SBen Gardon static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, 611f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t start, gfn_t end, 612f8e14497SBen Gardon unsigned long unused) 613f8e14497SBen Gardon { 614f8e14497SBen Gardon struct tdp_iter iter; 615f8e14497SBen Gardon int young = 0; 616f8e14497SBen Gardon u64 new_spte = 0; 617f8e14497SBen Gardon 618f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, start, end) { 619f8e14497SBen Gardon /* 620f8e14497SBen Gardon * If we have a non-accessed entry we don't need to change the 621f8e14497SBen Gardon * pte. 622f8e14497SBen Gardon */ 623f8e14497SBen Gardon if (!is_accessed_spte(iter.old_spte)) 624f8e14497SBen Gardon continue; 625f8e14497SBen Gardon 626f8e14497SBen Gardon new_spte = iter.old_spte; 627f8e14497SBen Gardon 628f8e14497SBen Gardon if (spte_ad_enabled(new_spte)) { 629f8e14497SBen Gardon clear_bit((ffs(shadow_accessed_mask) - 1), 630f8e14497SBen Gardon (unsigned long *)&new_spte); 631f8e14497SBen Gardon } else { 632f8e14497SBen Gardon /* 633f8e14497SBen Gardon * Capture the dirty status of the page, so that it doesn't get 634f8e14497SBen Gardon * lost when the SPTE is marked for access tracking. 635f8e14497SBen Gardon */ 636f8e14497SBen Gardon if (is_writable_pte(new_spte)) 637f8e14497SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(new_spte)); 638f8e14497SBen Gardon 639f8e14497SBen Gardon new_spte = mark_spte_for_access_track(new_spte); 640f8e14497SBen Gardon } 641f8e14497SBen Gardon 642f8e14497SBen Gardon tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); 643f8e14497SBen Gardon young = 1; 644f8e14497SBen Gardon } 645f8e14497SBen Gardon 646f8e14497SBen Gardon return young; 647f8e14497SBen Gardon } 648f8e14497SBen Gardon 649f8e14497SBen Gardon int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, 650f8e14497SBen Gardon unsigned long end) 651f8e14497SBen Gardon { 652f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 653f8e14497SBen Gardon age_gfn_range); 654f8e14497SBen Gardon } 655f8e14497SBen Gardon 656f8e14497SBen Gardon static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 657f8e14497SBen Gardon struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 658f8e14497SBen Gardon unsigned long unused2) 659f8e14497SBen Gardon { 660f8e14497SBen Gardon struct tdp_iter iter; 661f8e14497SBen Gardon 662f8e14497SBen Gardon tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) 663f8e14497SBen Gardon if (is_accessed_spte(iter.old_spte)) 664f8e14497SBen Gardon return 1; 665f8e14497SBen Gardon 666f8e14497SBen Gardon return 0; 667f8e14497SBen Gardon } 668f8e14497SBen Gardon 669f8e14497SBen Gardon int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva) 670f8e14497SBen Gardon { 671f8e14497SBen Gardon return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0, 672f8e14497SBen Gardon test_age_gfn); 673f8e14497SBen Gardon } 674