1fe5db27dSBen Gardon // SPDX-License-Identifier: GPL-2.0 2fe5db27dSBen Gardon 302c00b3aSBen Gardon #include "mmu.h" 402c00b3aSBen Gardon #include "mmu_internal.h" 5bb18842eSBen Gardon #include "mmutrace.h" 62f2fad08SBen Gardon #include "tdp_iter.h" 7fe5db27dSBen Gardon #include "tdp_mmu.h" 802c00b3aSBen Gardon #include "spte.h" 9fe5db27dSBen Gardon 10fe5db27dSBen Gardon static bool __read_mostly tdp_mmu_enabled = false; 11fe5db27dSBen Gardon 12fe5db27dSBen Gardon static bool is_tdp_mmu_enabled(void) 13fe5db27dSBen Gardon { 14fe5db27dSBen Gardon #ifdef CONFIG_X86_64 15fe5db27dSBen Gardon return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 16fe5db27dSBen Gardon #else 17fe5db27dSBen Gardon return false; 18fe5db27dSBen Gardon #endif /* CONFIG_X86_64 */ 19fe5db27dSBen Gardon } 20fe5db27dSBen Gardon 21fe5db27dSBen Gardon /* Initializes the TDP MMU for the VM, if enabled. */ 22fe5db27dSBen Gardon void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 23fe5db27dSBen Gardon { 24fe5db27dSBen Gardon if (!is_tdp_mmu_enabled()) 25fe5db27dSBen Gardon return; 26fe5db27dSBen Gardon 27fe5db27dSBen Gardon /* This should not be changed for the lifetime of the VM. */ 28fe5db27dSBen Gardon kvm->arch.tdp_mmu_enabled = true; 2902c00b3aSBen Gardon 3002c00b3aSBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 3189c0fd49SBen Gardon INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 32fe5db27dSBen Gardon } 33fe5db27dSBen Gardon 34fe5db27dSBen Gardon void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 35fe5db27dSBen Gardon { 36fe5db27dSBen Gardon if (!kvm->arch.tdp_mmu_enabled) 37fe5db27dSBen Gardon return; 3802c00b3aSBen Gardon 3902c00b3aSBen Gardon WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 4002c00b3aSBen Gardon } 4102c00b3aSBen Gardon 4202c00b3aSBen Gardon #define for_each_tdp_mmu_root(_kvm, _root) \ 4302c00b3aSBen Gardon list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 4402c00b3aSBen Gardon 4502c00b3aSBen Gardon bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 4602c00b3aSBen Gardon { 4702c00b3aSBen Gardon struct kvm_mmu_page *sp; 4802c00b3aSBen Gardon 4902c00b3aSBen Gardon sp = to_shadow_page(hpa); 5002c00b3aSBen Gardon 5102c00b3aSBen Gardon return sp->tdp_mmu_page && sp->root_count; 5202c00b3aSBen Gardon } 5302c00b3aSBen Gardon 54faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 55faaf05b0SBen Gardon gfn_t start, gfn_t end); 56faaf05b0SBen Gardon 5702c00b3aSBen Gardon void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 5802c00b3aSBen Gardon { 59faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 60faaf05b0SBen Gardon 6102c00b3aSBen Gardon lockdep_assert_held(&kvm->mmu_lock); 6202c00b3aSBen Gardon 6302c00b3aSBen Gardon WARN_ON(root->root_count); 6402c00b3aSBen Gardon WARN_ON(!root->tdp_mmu_page); 6502c00b3aSBen Gardon 6602c00b3aSBen Gardon list_del(&root->link); 6702c00b3aSBen Gardon 68faaf05b0SBen Gardon zap_gfn_range(kvm, root, 0, max_gfn); 69faaf05b0SBen Gardon 7002c00b3aSBen Gardon free_page((unsigned long)root->spt); 7102c00b3aSBen Gardon kmem_cache_free(mmu_page_header_cache, root); 7202c00b3aSBen Gardon } 7302c00b3aSBen Gardon 7402c00b3aSBen Gardon static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 7502c00b3aSBen Gardon int level) 7602c00b3aSBen Gardon { 7702c00b3aSBen Gardon union kvm_mmu_page_role role; 7802c00b3aSBen Gardon 7902c00b3aSBen Gardon role = vcpu->arch.mmu->mmu_role.base; 8002c00b3aSBen Gardon role.level = level; 8102c00b3aSBen Gardon role.direct = true; 8202c00b3aSBen Gardon role.gpte_is_8_bytes = true; 8302c00b3aSBen Gardon role.access = ACC_ALL; 8402c00b3aSBen Gardon 8502c00b3aSBen Gardon return role; 8602c00b3aSBen Gardon } 8702c00b3aSBen Gardon 8802c00b3aSBen Gardon static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 8902c00b3aSBen Gardon int level) 9002c00b3aSBen Gardon { 9102c00b3aSBen Gardon struct kvm_mmu_page *sp; 9202c00b3aSBen Gardon 9302c00b3aSBen Gardon sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 9402c00b3aSBen Gardon sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 9502c00b3aSBen Gardon set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 9602c00b3aSBen Gardon 9702c00b3aSBen Gardon sp->role.word = page_role_for_level(vcpu, level).word; 9802c00b3aSBen Gardon sp->gfn = gfn; 9902c00b3aSBen Gardon sp->tdp_mmu_page = true; 10002c00b3aSBen Gardon 10102c00b3aSBen Gardon return sp; 10202c00b3aSBen Gardon } 10302c00b3aSBen Gardon 10402c00b3aSBen Gardon static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 10502c00b3aSBen Gardon { 10602c00b3aSBen Gardon union kvm_mmu_page_role role; 10702c00b3aSBen Gardon struct kvm *kvm = vcpu->kvm; 10802c00b3aSBen Gardon struct kvm_mmu_page *root; 10902c00b3aSBen Gardon 11002c00b3aSBen Gardon role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 11102c00b3aSBen Gardon 11202c00b3aSBen Gardon spin_lock(&kvm->mmu_lock); 11302c00b3aSBen Gardon 11402c00b3aSBen Gardon /* Check for an existing root before allocating a new one. */ 11502c00b3aSBen Gardon for_each_tdp_mmu_root(kvm, root) { 11602c00b3aSBen Gardon if (root->role.word == role.word) { 11702c00b3aSBen Gardon kvm_mmu_get_root(kvm, root); 11802c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 11902c00b3aSBen Gardon return root; 12002c00b3aSBen Gardon } 12102c00b3aSBen Gardon } 12202c00b3aSBen Gardon 12302c00b3aSBen Gardon root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 12402c00b3aSBen Gardon root->root_count = 1; 12502c00b3aSBen Gardon 12602c00b3aSBen Gardon list_add(&root->link, &kvm->arch.tdp_mmu_roots); 12702c00b3aSBen Gardon 12802c00b3aSBen Gardon spin_unlock(&kvm->mmu_lock); 12902c00b3aSBen Gardon 13002c00b3aSBen Gardon return root; 13102c00b3aSBen Gardon } 13202c00b3aSBen Gardon 13302c00b3aSBen Gardon hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 13402c00b3aSBen Gardon { 13502c00b3aSBen Gardon struct kvm_mmu_page *root; 13602c00b3aSBen Gardon 13702c00b3aSBen Gardon root = get_tdp_mmu_vcpu_root(vcpu); 13802c00b3aSBen Gardon if (!root) 13902c00b3aSBen Gardon return INVALID_PAGE; 14002c00b3aSBen Gardon 14102c00b3aSBen Gardon return __pa(root->spt); 142fe5db27dSBen Gardon } 1432f2fad08SBen Gardon 1442f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1452f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level); 1462f2fad08SBen Gardon 147faaf05b0SBen Gardon static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 148faaf05b0SBen Gardon { 149faaf05b0SBen Gardon return sp->role.smm ? 1 : 0; 150faaf05b0SBen Gardon } 151faaf05b0SBen Gardon 1522f2fad08SBen Gardon /** 1532f2fad08SBen Gardon * handle_changed_spte - handle bookkeeping associated with an SPTE change 1542f2fad08SBen Gardon * @kvm: kvm instance 1552f2fad08SBen Gardon * @as_id: the address space of the paging structure the SPTE was a part of 1562f2fad08SBen Gardon * @gfn: the base GFN that was mapped by the SPTE 1572f2fad08SBen Gardon * @old_spte: The value of the SPTE before the change 1582f2fad08SBen Gardon * @new_spte: The value of the SPTE after the change 1592f2fad08SBen Gardon * @level: the level of the PT the SPTE is part of in the paging structure 1602f2fad08SBen Gardon * 1612f2fad08SBen Gardon * Handle bookkeeping that might result from the modification of a SPTE. 1622f2fad08SBen Gardon * This function must be called for all TDP SPTE modifications. 1632f2fad08SBen Gardon */ 1642f2fad08SBen Gardon static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1652f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 1662f2fad08SBen Gardon { 1672f2fad08SBen Gardon bool was_present = is_shadow_present_pte(old_spte); 1682f2fad08SBen Gardon bool is_present = is_shadow_present_pte(new_spte); 1692f2fad08SBen Gardon bool was_leaf = was_present && is_last_spte(old_spte, level); 1702f2fad08SBen Gardon bool is_leaf = is_present && is_last_spte(new_spte, level); 1712f2fad08SBen Gardon bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 1722f2fad08SBen Gardon u64 *pt; 17389c0fd49SBen Gardon struct kvm_mmu_page *sp; 1742f2fad08SBen Gardon u64 old_child_spte; 1752f2fad08SBen Gardon int i; 1762f2fad08SBen Gardon 1772f2fad08SBen Gardon WARN_ON(level > PT64_ROOT_MAX_LEVEL); 1782f2fad08SBen Gardon WARN_ON(level < PG_LEVEL_4K); 1792f2fad08SBen Gardon WARN_ON(gfn % KVM_PAGES_PER_HPAGE(level)); 1802f2fad08SBen Gardon 1812f2fad08SBen Gardon /* 1822f2fad08SBen Gardon * If this warning were to trigger it would indicate that there was a 1832f2fad08SBen Gardon * missing MMU notifier or a race with some notifier handler. 1842f2fad08SBen Gardon * A present, leaf SPTE should never be directly replaced with another 1852f2fad08SBen Gardon * present leaf SPTE pointing to a differnt PFN. A notifier handler 1862f2fad08SBen Gardon * should be zapping the SPTE before the main MM's page table is 1872f2fad08SBen Gardon * changed, or the SPTE should be zeroed, and the TLBs flushed by the 1882f2fad08SBen Gardon * thread before replacement. 1892f2fad08SBen Gardon */ 1902f2fad08SBen Gardon if (was_leaf && is_leaf && pfn_changed) { 1912f2fad08SBen Gardon pr_err("Invalid SPTE change: cannot replace a present leaf\n" 1922f2fad08SBen Gardon "SPTE with another present leaf SPTE mapping a\n" 1932f2fad08SBen Gardon "different PFN!\n" 1942f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 1952f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 1962f2fad08SBen Gardon 1972f2fad08SBen Gardon /* 1982f2fad08SBen Gardon * Crash the host to prevent error propagation and guest data 1992f2fad08SBen Gardon * courruption. 2002f2fad08SBen Gardon */ 2012f2fad08SBen Gardon BUG(); 2022f2fad08SBen Gardon } 2032f2fad08SBen Gardon 2042f2fad08SBen Gardon if (old_spte == new_spte) 2052f2fad08SBen Gardon return; 2062f2fad08SBen Gardon 2072f2fad08SBen Gardon /* 2082f2fad08SBen Gardon * The only times a SPTE should be changed from a non-present to 2092f2fad08SBen Gardon * non-present state is when an MMIO entry is installed/modified/ 2102f2fad08SBen Gardon * removed. In that case, there is nothing to do here. 2112f2fad08SBen Gardon */ 2122f2fad08SBen Gardon if (!was_present && !is_present) { 2132f2fad08SBen Gardon /* 2142f2fad08SBen Gardon * If this change does not involve a MMIO SPTE, it is 2152f2fad08SBen Gardon * unexpected. Log the change, though it should not impact the 2162f2fad08SBen Gardon * guest since both the former and current SPTEs are nonpresent. 2172f2fad08SBen Gardon */ 2182f2fad08SBen Gardon if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 2192f2fad08SBen Gardon pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 2202f2fad08SBen Gardon "should not be replaced with another,\n" 2212f2fad08SBen Gardon "different nonpresent SPTE, unless one or both\n" 2222f2fad08SBen Gardon "are MMIO SPTEs.\n" 2232f2fad08SBen Gardon "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2242f2fad08SBen Gardon as_id, gfn, old_spte, new_spte, level); 2252f2fad08SBen Gardon return; 2262f2fad08SBen Gardon } 2272f2fad08SBen Gardon 2282f2fad08SBen Gardon 2292f2fad08SBen Gardon if (was_leaf && is_dirty_spte(old_spte) && 2302f2fad08SBen Gardon (!is_dirty_spte(new_spte) || pfn_changed)) 2312f2fad08SBen Gardon kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 2322f2fad08SBen Gardon 2332f2fad08SBen Gardon /* 2342f2fad08SBen Gardon * Recursively handle child PTs if the change removed a subtree from 2352f2fad08SBen Gardon * the paging structure. 2362f2fad08SBen Gardon */ 2372f2fad08SBen Gardon if (was_present && !was_leaf && (pfn_changed || !is_present)) { 2382f2fad08SBen Gardon pt = spte_to_child_pt(old_spte, level); 23989c0fd49SBen Gardon sp = sptep_to_sp(pt); 24089c0fd49SBen Gardon 24189c0fd49SBen Gardon list_del(&sp->link); 2422f2fad08SBen Gardon 2432f2fad08SBen Gardon for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 2442f2fad08SBen Gardon old_child_spte = READ_ONCE(*(pt + i)); 2452f2fad08SBen Gardon WRITE_ONCE(*(pt + i), 0); 2462f2fad08SBen Gardon handle_changed_spte(kvm, as_id, 2472f2fad08SBen Gardon gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 2482f2fad08SBen Gardon old_child_spte, 0, level - 1); 2492f2fad08SBen Gardon } 2502f2fad08SBen Gardon 2512f2fad08SBen Gardon kvm_flush_remote_tlbs_with_address(kvm, gfn, 2522f2fad08SBen Gardon KVM_PAGES_PER_HPAGE(level)); 2532f2fad08SBen Gardon 2542f2fad08SBen Gardon free_page((unsigned long)pt); 25589c0fd49SBen Gardon kmem_cache_free(mmu_page_header_cache, sp); 2562f2fad08SBen Gardon } 2572f2fad08SBen Gardon } 2582f2fad08SBen Gardon 2592f2fad08SBen Gardon static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2602f2fad08SBen Gardon u64 old_spte, u64 new_spte, int level) 2612f2fad08SBen Gardon { 2622f2fad08SBen Gardon __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 2632f2fad08SBen Gardon } 264faaf05b0SBen Gardon 265faaf05b0SBen Gardon static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 266faaf05b0SBen Gardon u64 new_spte) 267faaf05b0SBen Gardon { 268faaf05b0SBen Gardon u64 *root_pt = tdp_iter_root_pt(iter); 269faaf05b0SBen Gardon struct kvm_mmu_page *root = sptep_to_sp(root_pt); 270faaf05b0SBen Gardon int as_id = kvm_mmu_page_as_id(root); 271faaf05b0SBen Gardon 272faaf05b0SBen Gardon *iter->sptep = new_spte; 273faaf05b0SBen Gardon 274faaf05b0SBen Gardon handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 275faaf05b0SBen Gardon iter->level); 276faaf05b0SBen Gardon } 277faaf05b0SBen Gardon 278faaf05b0SBen Gardon #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 279faaf05b0SBen Gardon for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 280faaf05b0SBen Gardon 281bb18842eSBen Gardon #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 282bb18842eSBen Gardon for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 283bb18842eSBen Gardon _mmu->shadow_root_level, _start, _end) 284bb18842eSBen Gardon 285faaf05b0SBen Gardon /* 286faaf05b0SBen Gardon * Flush the TLB if the process should drop kvm->mmu_lock. 287faaf05b0SBen Gardon * Return whether the caller still needs to flush the tlb. 288faaf05b0SBen Gardon */ 289faaf05b0SBen Gardon static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter) 290faaf05b0SBen Gardon { 291faaf05b0SBen Gardon if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 292faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 293faaf05b0SBen Gardon cond_resched_lock(&kvm->mmu_lock); 294faaf05b0SBen Gardon tdp_iter_refresh_walk(iter); 295faaf05b0SBen Gardon return false; 296faaf05b0SBen Gardon } else { 297faaf05b0SBen Gardon return true; 298faaf05b0SBen Gardon } 299faaf05b0SBen Gardon } 300faaf05b0SBen Gardon 301faaf05b0SBen Gardon /* 302faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 303faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 304faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 305faaf05b0SBen Gardon * MMU lock. 306faaf05b0SBen Gardon */ 307faaf05b0SBen Gardon static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 308faaf05b0SBen Gardon gfn_t start, gfn_t end) 309faaf05b0SBen Gardon { 310faaf05b0SBen Gardon struct tdp_iter iter; 311faaf05b0SBen Gardon bool flush_needed = false; 312faaf05b0SBen Gardon 313faaf05b0SBen Gardon tdp_root_for_each_pte(iter, root, start, end) { 314faaf05b0SBen Gardon if (!is_shadow_present_pte(iter.old_spte)) 315faaf05b0SBen Gardon continue; 316faaf05b0SBen Gardon 317faaf05b0SBen Gardon /* 318faaf05b0SBen Gardon * If this is a non-last-level SPTE that covers a larger range 319faaf05b0SBen Gardon * than should be zapped, continue, and zap the mappings at a 320faaf05b0SBen Gardon * lower level. 321faaf05b0SBen Gardon */ 322faaf05b0SBen Gardon if ((iter.gfn < start || 323faaf05b0SBen Gardon iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 324faaf05b0SBen Gardon !is_last_spte(iter.old_spte, iter.level)) 325faaf05b0SBen Gardon continue; 326faaf05b0SBen Gardon 327faaf05b0SBen Gardon tdp_mmu_set_spte(kvm, &iter, 0); 328faaf05b0SBen Gardon 329faaf05b0SBen Gardon flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter); 330faaf05b0SBen Gardon } 331faaf05b0SBen Gardon return flush_needed; 332faaf05b0SBen Gardon } 333faaf05b0SBen Gardon 334faaf05b0SBen Gardon /* 335faaf05b0SBen Gardon * Tears down the mappings for the range of gfns, [start, end), and frees the 336faaf05b0SBen Gardon * non-root pages mapping GFNs strictly within that range. Returns true if 337faaf05b0SBen Gardon * SPTEs have been cleared and a TLB flush is needed before releasing the 338faaf05b0SBen Gardon * MMU lock. 339faaf05b0SBen Gardon */ 340faaf05b0SBen Gardon bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) 341faaf05b0SBen Gardon { 342faaf05b0SBen Gardon struct kvm_mmu_page *root; 343faaf05b0SBen Gardon bool flush = false; 344faaf05b0SBen Gardon 345faaf05b0SBen Gardon for_each_tdp_mmu_root(kvm, root) { 346faaf05b0SBen Gardon /* 347faaf05b0SBen Gardon * Take a reference on the root so that it cannot be freed if 348faaf05b0SBen Gardon * this thread releases the MMU lock and yields in this loop. 349faaf05b0SBen Gardon */ 350faaf05b0SBen Gardon kvm_mmu_get_root(kvm, root); 351faaf05b0SBen Gardon 352faaf05b0SBen Gardon flush |= zap_gfn_range(kvm, root, start, end); 353faaf05b0SBen Gardon 354faaf05b0SBen Gardon kvm_mmu_put_root(kvm, root); 355faaf05b0SBen Gardon } 356faaf05b0SBen Gardon 357faaf05b0SBen Gardon return flush; 358faaf05b0SBen Gardon } 359faaf05b0SBen Gardon 360faaf05b0SBen Gardon void kvm_tdp_mmu_zap_all(struct kvm *kvm) 361faaf05b0SBen Gardon { 362faaf05b0SBen Gardon gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); 363faaf05b0SBen Gardon bool flush; 364faaf05b0SBen Gardon 365faaf05b0SBen Gardon flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 366faaf05b0SBen Gardon if (flush) 367faaf05b0SBen Gardon kvm_flush_remote_tlbs(kvm); 368faaf05b0SBen Gardon } 369bb18842eSBen Gardon 370bb18842eSBen Gardon /* 371bb18842eSBen Gardon * Installs a last-level SPTE to handle a TDP page fault. 372bb18842eSBen Gardon * (NPT/EPT violation/misconfiguration) 373bb18842eSBen Gardon */ 374bb18842eSBen Gardon static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 375bb18842eSBen Gardon int map_writable, 376bb18842eSBen Gardon struct tdp_iter *iter, 377bb18842eSBen Gardon kvm_pfn_t pfn, bool prefault) 378bb18842eSBen Gardon { 379bb18842eSBen Gardon u64 new_spte; 380bb18842eSBen Gardon int ret = 0; 381bb18842eSBen Gardon int make_spte_ret = 0; 382bb18842eSBen Gardon 383bb18842eSBen Gardon if (unlikely(is_noslot_pfn(pfn))) { 384bb18842eSBen Gardon new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 385bb18842eSBen Gardon trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 386bb18842eSBen Gardon } else 387bb18842eSBen Gardon make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 388bb18842eSBen Gardon pfn, iter->old_spte, prefault, true, 389bb18842eSBen Gardon map_writable, !shadow_accessed_mask, 390bb18842eSBen Gardon &new_spte); 391bb18842eSBen Gardon 392bb18842eSBen Gardon if (new_spte == iter->old_spte) 393bb18842eSBen Gardon ret = RET_PF_SPURIOUS; 394bb18842eSBen Gardon else 395bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 396bb18842eSBen Gardon 397bb18842eSBen Gardon /* 398bb18842eSBen Gardon * If the page fault was caused by a write but the page is write 399bb18842eSBen Gardon * protected, emulation is needed. If the emulation was skipped, 400bb18842eSBen Gardon * the vCPU would have the same fault again. 401bb18842eSBen Gardon */ 402bb18842eSBen Gardon if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 403bb18842eSBen Gardon if (write) 404bb18842eSBen Gardon ret = RET_PF_EMULATE; 405bb18842eSBen Gardon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 406bb18842eSBen Gardon } 407bb18842eSBen Gardon 408bb18842eSBen Gardon /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 409bb18842eSBen Gardon if (unlikely(is_mmio_spte(new_spte))) 410bb18842eSBen Gardon ret = RET_PF_EMULATE; 411bb18842eSBen Gardon 412bb18842eSBen Gardon trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 413bb18842eSBen Gardon if (!prefault) 414bb18842eSBen Gardon vcpu->stat.pf_fixed++; 415bb18842eSBen Gardon 416bb18842eSBen Gardon return ret; 417bb18842eSBen Gardon } 418bb18842eSBen Gardon 419bb18842eSBen Gardon /* 420bb18842eSBen Gardon * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 421bb18842eSBen Gardon * page tables and SPTEs to translate the faulting guest physical address. 422bb18842eSBen Gardon */ 423bb18842eSBen Gardon int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 424bb18842eSBen Gardon int map_writable, int max_level, kvm_pfn_t pfn, 425bb18842eSBen Gardon bool prefault) 426bb18842eSBen Gardon { 427bb18842eSBen Gardon bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 428bb18842eSBen Gardon bool write = error_code & PFERR_WRITE_MASK; 429bb18842eSBen Gardon bool exec = error_code & PFERR_FETCH_MASK; 430bb18842eSBen Gardon bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 431bb18842eSBen Gardon struct kvm_mmu *mmu = vcpu->arch.mmu; 432bb18842eSBen Gardon struct tdp_iter iter; 43389c0fd49SBen Gardon struct kvm_mmu_page *sp; 434bb18842eSBen Gardon u64 *child_pt; 435bb18842eSBen Gardon u64 new_spte; 436bb18842eSBen Gardon int ret; 437bb18842eSBen Gardon gfn_t gfn = gpa >> PAGE_SHIFT; 438bb18842eSBen Gardon int level; 439bb18842eSBen Gardon int req_level; 440bb18842eSBen Gardon 441bb18842eSBen Gardon if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 442bb18842eSBen Gardon return RET_PF_RETRY; 443bb18842eSBen Gardon if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 444bb18842eSBen Gardon return RET_PF_RETRY; 445bb18842eSBen Gardon 446bb18842eSBen Gardon level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 447bb18842eSBen Gardon huge_page_disallowed, &req_level); 448bb18842eSBen Gardon 449bb18842eSBen Gardon trace_kvm_mmu_spte_requested(gpa, level, pfn); 450bb18842eSBen Gardon tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 451bb18842eSBen Gardon if (nx_huge_page_workaround_enabled) 452bb18842eSBen Gardon disallowed_hugepage_adjust(iter.old_spte, gfn, 453bb18842eSBen Gardon iter.level, &pfn, &level); 454bb18842eSBen Gardon 455bb18842eSBen Gardon if (iter.level == level) 456bb18842eSBen Gardon break; 457bb18842eSBen Gardon 458bb18842eSBen Gardon /* 459bb18842eSBen Gardon * If there is an SPTE mapping a large page at a higher level 460bb18842eSBen Gardon * than the target, that SPTE must be cleared and replaced 461bb18842eSBen Gardon * with a non-leaf SPTE. 462bb18842eSBen Gardon */ 463bb18842eSBen Gardon if (is_shadow_present_pte(iter.old_spte) && 464bb18842eSBen Gardon is_large_pte(iter.old_spte)) { 465bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 466bb18842eSBen Gardon 467bb18842eSBen Gardon kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 468bb18842eSBen Gardon KVM_PAGES_PER_HPAGE(iter.level)); 469bb18842eSBen Gardon 470bb18842eSBen Gardon /* 471bb18842eSBen Gardon * The iter must explicitly re-read the spte here 472bb18842eSBen Gardon * because the new value informs the !present 473bb18842eSBen Gardon * path below. 474bb18842eSBen Gardon */ 475bb18842eSBen Gardon iter.old_spte = READ_ONCE(*iter.sptep); 476bb18842eSBen Gardon } 477bb18842eSBen Gardon 478bb18842eSBen Gardon if (!is_shadow_present_pte(iter.old_spte)) { 47989c0fd49SBen Gardon sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 48089c0fd49SBen Gardon list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 48189c0fd49SBen Gardon child_pt = sp->spt; 482bb18842eSBen Gardon clear_page(child_pt); 483bb18842eSBen Gardon new_spte = make_nonleaf_spte(child_pt, 484bb18842eSBen Gardon !shadow_accessed_mask); 485bb18842eSBen Gardon 486bb18842eSBen Gardon trace_kvm_mmu_get_page(sp, true); 487bb18842eSBen Gardon tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 488bb18842eSBen Gardon } 489bb18842eSBen Gardon } 490bb18842eSBen Gardon 491bb18842eSBen Gardon if (WARN_ON(iter.level != level)) 492bb18842eSBen Gardon return RET_PF_RETRY; 493bb18842eSBen Gardon 494bb18842eSBen Gardon ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 495bb18842eSBen Gardon pfn, prefault); 496bb18842eSBen Gardon 497bb18842eSBen Gardon return ret; 498bb18842eSBen Gardon } 499