19ed24f4bSMarc Zyngier // SPDX-License-Identifier: GPL-2.0-only
29ed24f4bSMarc Zyngier /*
39ed24f4bSMarc Zyngier * Copyright (C) 2012 - Virtual Open Systems and Columbia University
49ed24f4bSMarc Zyngier * Author: Christoffer Dall <c.dall@virtualopensystems.com>
59ed24f4bSMarc Zyngier */
69ed24f4bSMarc Zyngier
79ed24f4bSMarc Zyngier #include <linux/mman.h>
89ed24f4bSMarc Zyngier #include <linux/kvm_host.h>
99ed24f4bSMarc Zyngier #include <linux/io.h>
109ed24f4bSMarc Zyngier #include <linux/hugetlb.h>
119ed24f4bSMarc Zyngier #include <linux/sched/signal.h>
129ed24f4bSMarc Zyngier #include <trace/events/kvm.h>
139ed24f4bSMarc Zyngier #include <asm/pgalloc.h>
149ed24f4bSMarc Zyngier #include <asm/cacheflush.h>
159ed24f4bSMarc Zyngier #include <asm/kvm_arm.h>
169ed24f4bSMarc Zyngier #include <asm/kvm_mmu.h>
170f9d09b8SWill Deacon #include <asm/kvm_pgtable.h>
189ed24f4bSMarc Zyngier #include <asm/kvm_ras.h>
199ed24f4bSMarc Zyngier #include <asm/kvm_asm.h>
209ed24f4bSMarc Zyngier #include <asm/kvm_emulate.h>
219ed24f4bSMarc Zyngier #include <asm/virt.h>
229ed24f4bSMarc Zyngier
239ed24f4bSMarc Zyngier #include "trace.h"
249ed24f4bSMarc Zyngier
250f9d09b8SWill Deacon static struct kvm_pgtable *hyp_pgtable;
269ed24f4bSMarc Zyngier static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
279ed24f4bSMarc Zyngier
288d20bd63SSean Christopherson static unsigned long __ro_after_init hyp_idmap_start;
298d20bd63SSean Christopherson static unsigned long __ro_after_init hyp_idmap_end;
308d20bd63SSean Christopherson static phys_addr_t __ro_after_init hyp_idmap_vector;
319ed24f4bSMarc Zyngier
328d20bd63SSean Christopherson static unsigned long __ro_after_init io_map_base;
339ed24f4bSMarc Zyngier
__stage2_range_addr_end(phys_addr_t addr,phys_addr_t end,phys_addr_t size)34e7bf7a49SRicardo Koller static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
35e7bf7a49SRicardo Koller phys_addr_t size)
365994bc9eSOliver Upton {
375994bc9eSOliver Upton phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
385994bc9eSOliver Upton
395994bc9eSOliver Upton return (boundary - 1 < end - 1) ? boundary : end;
405994bc9eSOliver Upton }
419ed24f4bSMarc Zyngier
stage2_range_addr_end(phys_addr_t addr,phys_addr_t end)42e7bf7a49SRicardo Koller static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
43e7bf7a49SRicardo Koller {
44e7bf7a49SRicardo Koller phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
45e7bf7a49SRicardo Koller
46e7bf7a49SRicardo Koller return __stage2_range_addr_end(addr, end, size);
47e7bf7a49SRicardo Koller }
48e7bf7a49SRicardo Koller
4952bae936SWill Deacon /*
5052bae936SWill Deacon * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
5152bae936SWill Deacon * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
5252bae936SWill Deacon * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too
5352bae936SWill Deacon * long will also starve other vCPUs. We have to also make sure that the page
5452bae936SWill Deacon * tables are not freed while we released the lock.
5552bae936SWill Deacon */
stage2_apply_range(struct kvm_s2_mmu * mmu,phys_addr_t addr,phys_addr_t end,int (* fn)(struct kvm_pgtable *,u64,u64),bool resched)568531bd63SMarc Zyngier static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
5752bae936SWill Deacon phys_addr_t end,
5852bae936SWill Deacon int (*fn)(struct kvm_pgtable *, u64, u64),
5952bae936SWill Deacon bool resched)
6052bae936SWill Deacon {
618531bd63SMarc Zyngier struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
6252bae936SWill Deacon int ret;
6352bae936SWill Deacon u64 next;
6452bae936SWill Deacon
6552bae936SWill Deacon do {
668531bd63SMarc Zyngier struct kvm_pgtable *pgt = mmu->pgt;
6752bae936SWill Deacon if (!pgt)
6852bae936SWill Deacon return -EINVAL;
6952bae936SWill Deacon
705994bc9eSOliver Upton next = stage2_range_addr_end(addr, end);
7152bae936SWill Deacon ret = fn(pgt, addr, next - addr);
7252bae936SWill Deacon if (ret)
7352bae936SWill Deacon break;
7452bae936SWill Deacon
7552bae936SWill Deacon if (resched && next != end)
76fcc5bf89SJing Zhang cond_resched_rwlock_write(&kvm->mmu_lock);
7752bae936SWill Deacon } while (addr = next, addr != end);
7852bae936SWill Deacon
7952bae936SWill Deacon return ret;
8052bae936SWill Deacon }
8152bae936SWill Deacon
828531bd63SMarc Zyngier #define stage2_apply_range_resched(mmu, addr, end, fn) \
838531bd63SMarc Zyngier stage2_apply_range(mmu, addr, end, fn, true)
84cc38d61cSQuentin Perret
85e7bf7a49SRicardo Koller /*
86e7bf7a49SRicardo Koller * Get the maximum number of page-tables pages needed to split a range
87e7bf7a49SRicardo Koller * of blocks into PAGE_SIZE PTEs. It assumes the range is already
88e7bf7a49SRicardo Koller * mapped at level 2, or at level 1 if allowed.
89e7bf7a49SRicardo Koller */
kvm_mmu_split_nr_page_tables(u64 range)90e7bf7a49SRicardo Koller static int kvm_mmu_split_nr_page_tables(u64 range)
91e7bf7a49SRicardo Koller {
92e7bf7a49SRicardo Koller int n = 0;
93e7bf7a49SRicardo Koller
94e7bf7a49SRicardo Koller if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2)
9514c3555fSArnd Bergmann n += DIV_ROUND_UP(range, PUD_SIZE);
9614c3555fSArnd Bergmann n += DIV_ROUND_UP(range, PMD_SIZE);
97e7bf7a49SRicardo Koller return n;
98e7bf7a49SRicardo Koller }
99e7bf7a49SRicardo Koller
need_split_memcache_topup_or_resched(struct kvm * kvm)100e7bf7a49SRicardo Koller static bool need_split_memcache_topup_or_resched(struct kvm *kvm)
101e7bf7a49SRicardo Koller {
102e7bf7a49SRicardo Koller struct kvm_mmu_memory_cache *cache;
103e7bf7a49SRicardo Koller u64 chunk_size, min;
104e7bf7a49SRicardo Koller
105e7bf7a49SRicardo Koller if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
106e7bf7a49SRicardo Koller return true;
107e7bf7a49SRicardo Koller
108e7bf7a49SRicardo Koller chunk_size = kvm->arch.mmu.split_page_chunk_size;
109e7bf7a49SRicardo Koller min = kvm_mmu_split_nr_page_tables(chunk_size);
110e7bf7a49SRicardo Koller cache = &kvm->arch.mmu.split_page_cache;
111e7bf7a49SRicardo Koller return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
112e7bf7a49SRicardo Koller }
113e7bf7a49SRicardo Koller
kvm_mmu_split_huge_pages(struct kvm * kvm,phys_addr_t addr,phys_addr_t end)114e7bf7a49SRicardo Koller static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
115e7bf7a49SRicardo Koller phys_addr_t end)
116e7bf7a49SRicardo Koller {
117e7bf7a49SRicardo Koller struct kvm_mmu_memory_cache *cache;
118e7bf7a49SRicardo Koller struct kvm_pgtable *pgt;
119e7bf7a49SRicardo Koller int ret, cache_capacity;
120e7bf7a49SRicardo Koller u64 next, chunk_size;
121e7bf7a49SRicardo Koller
122e7bf7a49SRicardo Koller lockdep_assert_held_write(&kvm->mmu_lock);
123e7bf7a49SRicardo Koller
124e7bf7a49SRicardo Koller chunk_size = kvm->arch.mmu.split_page_chunk_size;
125e7bf7a49SRicardo Koller cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size);
126e7bf7a49SRicardo Koller
127e7bf7a49SRicardo Koller if (chunk_size == 0)
128e7bf7a49SRicardo Koller return 0;
129e7bf7a49SRicardo Koller
130e7bf7a49SRicardo Koller cache = &kvm->arch.mmu.split_page_cache;
131e7bf7a49SRicardo Koller
132e7bf7a49SRicardo Koller do {
133e7bf7a49SRicardo Koller if (need_split_memcache_topup_or_resched(kvm)) {
134e7bf7a49SRicardo Koller write_unlock(&kvm->mmu_lock);
135e7bf7a49SRicardo Koller cond_resched();
136e7bf7a49SRicardo Koller /* Eager page splitting is best-effort. */
137e7bf7a49SRicardo Koller ret = __kvm_mmu_topup_memory_cache(cache,
138e7bf7a49SRicardo Koller cache_capacity,
139e7bf7a49SRicardo Koller cache_capacity);
140e7bf7a49SRicardo Koller write_lock(&kvm->mmu_lock);
141e7bf7a49SRicardo Koller if (ret)
142e7bf7a49SRicardo Koller break;
143e7bf7a49SRicardo Koller }
144e7bf7a49SRicardo Koller
145e7bf7a49SRicardo Koller pgt = kvm->arch.mmu.pgt;
146e7bf7a49SRicardo Koller if (!pgt)
147e7bf7a49SRicardo Koller return -EINVAL;
148e7bf7a49SRicardo Koller
149e7bf7a49SRicardo Koller next = __stage2_range_addr_end(addr, end, chunk_size);
150e7bf7a49SRicardo Koller ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
151e7bf7a49SRicardo Koller if (ret)
152e7bf7a49SRicardo Koller break;
153e7bf7a49SRicardo Koller } while (addr = next, addr != end);
154e7bf7a49SRicardo Koller
155e7bf7a49SRicardo Koller return ret;
156e7bf7a49SRicardo Koller }
157e7bf7a49SRicardo Koller
memslot_is_logging(struct kvm_memory_slot * memslot)1589ed24f4bSMarc Zyngier static bool memslot_is_logging(struct kvm_memory_slot *memslot)
1599ed24f4bSMarc Zyngier {
1609ed24f4bSMarc Zyngier return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
1619ed24f4bSMarc Zyngier }
1629ed24f4bSMarc Zyngier
1639ed24f4bSMarc Zyngier /**
16432121c81SRaghavendra Rao Ananta * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
1659ed24f4bSMarc Zyngier * @kvm: pointer to kvm structure.
1669ed24f4bSMarc Zyngier *
1679ed24f4bSMarc Zyngier * Interface to HYP function to flush all VM TLB entries
1689ed24f4bSMarc Zyngier */
kvm_arch_flush_remote_tlbs(struct kvm * kvm)16932121c81SRaghavendra Rao Ananta int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
1709ed24f4bSMarc Zyngier {
171a0e50aa3SChristoffer Dall kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
17232121c81SRaghavendra Rao Ananta return 0;
1739ed24f4bSMarc Zyngier }
1749ed24f4bSMarc Zyngier
kvm_arch_flush_remote_tlbs_range(struct kvm * kvm,gfn_t gfn,u64 nr_pages)175c42b6f0bSRaghavendra Rao Ananta int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
176c42b6f0bSRaghavendra Rao Ananta gfn_t gfn, u64 nr_pages)
177c42b6f0bSRaghavendra Rao Ananta {
178c42b6f0bSRaghavendra Rao Ananta kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
179c42b6f0bSRaghavendra Rao Ananta gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
180c42b6f0bSRaghavendra Rao Ananta return 0;
181c42b6f0bSRaghavendra Rao Ananta }
182c42b6f0bSRaghavendra Rao Ananta
kvm_is_device_pfn(unsigned long pfn)1839ed24f4bSMarc Zyngier static bool kvm_is_device_pfn(unsigned long pfn)
1849ed24f4bSMarc Zyngier {
185873ba463SMike Rapoport return !pfn_is_map_memory(pfn);
1869ed24f4bSMarc Zyngier }
1879ed24f4bSMarc Zyngier
stage2_memcache_zalloc_page(void * arg)1887aef0cbcSQuentin Perret static void *stage2_memcache_zalloc_page(void *arg)
1897aef0cbcSQuentin Perret {
1907aef0cbcSQuentin Perret struct kvm_mmu_memory_cache *mc = arg;
191d38ba8ccSYosry Ahmed void *virt;
1927aef0cbcSQuentin Perret
1937aef0cbcSQuentin Perret /* Allocated with __GFP_ZERO, so no need to zero */
194d38ba8ccSYosry Ahmed virt = kvm_mmu_memory_cache_alloc(mc);
195d38ba8ccSYosry Ahmed if (virt)
196d38ba8ccSYosry Ahmed kvm_account_pgtable_pages(virt, 1);
197d38ba8ccSYosry Ahmed return virt;
1987aef0cbcSQuentin Perret }
1997aef0cbcSQuentin Perret
kvm_host_zalloc_pages_exact(size_t size)2007aef0cbcSQuentin Perret static void *kvm_host_zalloc_pages_exact(size_t size)
2017aef0cbcSQuentin Perret {
2027aef0cbcSQuentin Perret return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2037aef0cbcSQuentin Perret }
2047aef0cbcSQuentin Perret
kvm_s2_zalloc_pages_exact(size_t size)205d38ba8ccSYosry Ahmed static void *kvm_s2_zalloc_pages_exact(size_t size)
206d38ba8ccSYosry Ahmed {
207d38ba8ccSYosry Ahmed void *virt = kvm_host_zalloc_pages_exact(size);
208d38ba8ccSYosry Ahmed
209d38ba8ccSYosry Ahmed if (virt)
210d38ba8ccSYosry Ahmed kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT));
211d38ba8ccSYosry Ahmed return virt;
212d38ba8ccSYosry Ahmed }
213d38ba8ccSYosry Ahmed
kvm_s2_free_pages_exact(void * virt,size_t size)214d38ba8ccSYosry Ahmed static void kvm_s2_free_pages_exact(void *virt, size_t size)
215d38ba8ccSYosry Ahmed {
216d38ba8ccSYosry Ahmed kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT));
217d38ba8ccSYosry Ahmed free_pages_exact(virt, size);
218d38ba8ccSYosry Ahmed }
219d38ba8ccSYosry Ahmed
2205c359ccaSOliver Upton static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
2215c359ccaSOliver Upton
stage2_free_unlinked_table_rcu_cb(struct rcu_head * head)222c14d08c5SRicardo Koller static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
223c3119ae4SOliver Upton {
224c3119ae4SOliver Upton struct page *page = container_of(head, struct page, rcu_head);
225c3119ae4SOliver Upton void *pgtable = page_to_virt(page);
226c3119ae4SOliver Upton u32 level = page_private(page);
227c3119ae4SOliver Upton
228c14d08c5SRicardo Koller kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
229c3119ae4SOliver Upton }
230c3119ae4SOliver Upton
stage2_free_unlinked_table(void * addr,u32 level)231c14d08c5SRicardo Koller static void stage2_free_unlinked_table(void *addr, u32 level)
2325c359ccaSOliver Upton {
233c3119ae4SOliver Upton struct page *page = virt_to_page(addr);
234c3119ae4SOliver Upton
235c3119ae4SOliver Upton set_page_private(page, (unsigned long)level);
236c14d08c5SRicardo Koller call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb);
2375c359ccaSOliver Upton }
2385c359ccaSOliver Upton
kvm_host_get_page(void * addr)2397aef0cbcSQuentin Perret static void kvm_host_get_page(void *addr)
2407aef0cbcSQuentin Perret {
2417aef0cbcSQuentin Perret get_page(virt_to_page(addr));
2427aef0cbcSQuentin Perret }
2437aef0cbcSQuentin Perret
kvm_host_put_page(void * addr)2447aef0cbcSQuentin Perret static void kvm_host_put_page(void *addr)
2457aef0cbcSQuentin Perret {
2467aef0cbcSQuentin Perret put_page(virt_to_page(addr));
2477aef0cbcSQuentin Perret }
2487aef0cbcSQuentin Perret
kvm_s2_put_page(void * addr)249d38ba8ccSYosry Ahmed static void kvm_s2_put_page(void *addr)
250d38ba8ccSYosry Ahmed {
251d38ba8ccSYosry Ahmed struct page *p = virt_to_page(addr);
252d38ba8ccSYosry Ahmed /* Dropping last refcount, the page will be freed */
253d38ba8ccSYosry Ahmed if (page_count(p) == 1)
254d38ba8ccSYosry Ahmed kvm_account_pgtable_pages(addr, -1);
255d38ba8ccSYosry Ahmed put_page(p);
256d38ba8ccSYosry Ahmed }
257d38ba8ccSYosry Ahmed
kvm_host_page_count(void * addr)2587aef0cbcSQuentin Perret static int kvm_host_page_count(void *addr)
2597aef0cbcSQuentin Perret {
2607aef0cbcSQuentin Perret return page_count(virt_to_page(addr));
2617aef0cbcSQuentin Perret }
2627aef0cbcSQuentin Perret
kvm_host_pa(void * addr)2637aef0cbcSQuentin Perret static phys_addr_t kvm_host_pa(void *addr)
2647aef0cbcSQuentin Perret {
2657aef0cbcSQuentin Perret return __pa(addr);
2667aef0cbcSQuentin Perret }
2677aef0cbcSQuentin Perret
kvm_host_va(phys_addr_t phys)2687aef0cbcSQuentin Perret static void *kvm_host_va(phys_addr_t phys)
2697aef0cbcSQuentin Perret {
2707aef0cbcSQuentin Perret return __va(phys);
2717aef0cbcSQuentin Perret }
2727aef0cbcSQuentin Perret
clean_dcache_guest_page(void * va,size_t size)273378e6a9cSYanan Wang static void clean_dcache_guest_page(void *va, size_t size)
274378e6a9cSYanan Wang {
275378e6a9cSYanan Wang __clean_dcache_guest_page(va, size);
276378e6a9cSYanan Wang }
277378e6a9cSYanan Wang
invalidate_icache_guest_page(void * va,size_t size)278378e6a9cSYanan Wang static void invalidate_icache_guest_page(void *va, size_t size)
279378e6a9cSYanan Wang {
280378e6a9cSYanan Wang __invalidate_icache_guest_page(va, size);
281378e6a9cSYanan Wang }
282378e6a9cSYanan Wang
2839ed24f4bSMarc Zyngier /*
2849ed24f4bSMarc Zyngier * Unmapping vs dcache management:
2859ed24f4bSMarc Zyngier *
2869ed24f4bSMarc Zyngier * If a guest maps certain memory pages as uncached, all writes will
2879ed24f4bSMarc Zyngier * bypass the data cache and go directly to RAM. However, the CPUs
2889ed24f4bSMarc Zyngier * can still speculate reads (not writes) and fill cache lines with
2899ed24f4bSMarc Zyngier * data.
2909ed24f4bSMarc Zyngier *
2919ed24f4bSMarc Zyngier * Those cache lines will be *clean* cache lines though, so a
2929ed24f4bSMarc Zyngier * clean+invalidate operation is equivalent to an invalidate
2939ed24f4bSMarc Zyngier * operation, because no cache lines are marked dirty.
2949ed24f4bSMarc Zyngier *
2959ed24f4bSMarc Zyngier * Those clean cache lines could be filled prior to an uncached write
2969ed24f4bSMarc Zyngier * by the guest, and the cache coherent IO subsystem would therefore
2979ed24f4bSMarc Zyngier * end up writing old data to disk.
2989ed24f4bSMarc Zyngier *
2999ed24f4bSMarc Zyngier * This is why right after unmapping a page/section and invalidating
30052bae936SWill Deacon * the corresponding TLBs, we flush to make sure the IO subsystem will
30152bae936SWill Deacon * never hit in the cache.
3029ed24f4bSMarc Zyngier *
3039ed24f4bSMarc Zyngier * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
3049ed24f4bSMarc Zyngier * we then fully enforce cacheability of RAM, no matter what the guest
3059ed24f4bSMarc Zyngier * does.
3069ed24f4bSMarc Zyngier */
3079ed24f4bSMarc Zyngier /**
3089ed24f4bSMarc Zyngier * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
309c9c0279cSXiaofei Tan * @mmu: The KVM stage-2 MMU pointer
3109ed24f4bSMarc Zyngier * @start: The intermediate physical base address of the range to unmap
3119ed24f4bSMarc Zyngier * @size: The size of the area to unmap
312c9c0279cSXiaofei Tan * @may_block: Whether or not we are permitted to block
3139ed24f4bSMarc Zyngier *
3149ed24f4bSMarc Zyngier * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
3159ed24f4bSMarc Zyngier * be called while holding mmu_lock (unless for freeing the stage2 pgd before
3169ed24f4bSMarc Zyngier * destroying the VM), otherwise another faulting VCPU may come in and mess
3179ed24f4bSMarc Zyngier * with things behind our backs.
3189ed24f4bSMarc Zyngier */
__unmap_stage2_range(struct kvm_s2_mmu * mmu,phys_addr_t start,u64 size,bool may_block)319b5331379SWill Deacon static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
320b5331379SWill Deacon bool may_block)
3219ed24f4bSMarc Zyngier {
322cfb1a98dSQuentin Perret struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
32352bae936SWill Deacon phys_addr_t end = start + size;
3249ed24f4bSMarc Zyngier
325fcc5bf89SJing Zhang lockdep_assert_held_write(&kvm->mmu_lock);
3269ed24f4bSMarc Zyngier WARN_ON(size & ~PAGE_MASK);
3278531bd63SMarc Zyngier WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
32852bae936SWill Deacon may_block));
3299ed24f4bSMarc Zyngier }
3309ed24f4bSMarc Zyngier
unmap_stage2_range(struct kvm_s2_mmu * mmu,phys_addr_t start,u64 size)331b5331379SWill Deacon static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
332b5331379SWill Deacon {
333b5331379SWill Deacon __unmap_stage2_range(mmu, start, size, true);
334b5331379SWill Deacon }
335b5331379SWill Deacon
stage2_flush_memslot(struct kvm * kvm,struct kvm_memory_slot * memslot)3369ed24f4bSMarc Zyngier static void stage2_flush_memslot(struct kvm *kvm,
3379ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot)
3389ed24f4bSMarc Zyngier {
3399ed24f4bSMarc Zyngier phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
3409ed24f4bSMarc Zyngier phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
3419ed24f4bSMarc Zyngier
3428531bd63SMarc Zyngier stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush);
3439ed24f4bSMarc Zyngier }
3449ed24f4bSMarc Zyngier
3459ed24f4bSMarc Zyngier /**
3469ed24f4bSMarc Zyngier * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
3479ed24f4bSMarc Zyngier * @kvm: The struct kvm pointer
3489ed24f4bSMarc Zyngier *
3499ed24f4bSMarc Zyngier * Go through the stage 2 page tables and invalidate any cache lines
3509ed24f4bSMarc Zyngier * backing memory already mapped to the VM.
3519ed24f4bSMarc Zyngier */
stage2_flush_vm(struct kvm * kvm)3529ed24f4bSMarc Zyngier static void stage2_flush_vm(struct kvm *kvm)
3539ed24f4bSMarc Zyngier {
3549ed24f4bSMarc Zyngier struct kvm_memslots *slots;
3559ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot;
356a54d8066SMaciej S. Szmigiero int idx, bkt;
3579ed24f4bSMarc Zyngier
3589ed24f4bSMarc Zyngier idx = srcu_read_lock(&kvm->srcu);
359fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
3609ed24f4bSMarc Zyngier
3619ed24f4bSMarc Zyngier slots = kvm_memslots(kvm);
362a54d8066SMaciej S. Szmigiero kvm_for_each_memslot(memslot, bkt, slots)
3639ed24f4bSMarc Zyngier stage2_flush_memslot(kvm, memslot);
3649ed24f4bSMarc Zyngier
365fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
3669ed24f4bSMarc Zyngier srcu_read_unlock(&kvm->srcu, idx);
3679ed24f4bSMarc Zyngier }
3689ed24f4bSMarc Zyngier
3699ed24f4bSMarc Zyngier /**
3709ed24f4bSMarc Zyngier * free_hyp_pgds - free Hyp-mode page tables
3719ed24f4bSMarc Zyngier */
free_hyp_pgds(void)3728d20bd63SSean Christopherson void __init free_hyp_pgds(void)
3739ed24f4bSMarc Zyngier {
3749ed24f4bSMarc Zyngier mutex_lock(&kvm_hyp_pgd_mutex);
3750f9d09b8SWill Deacon if (hyp_pgtable) {
3760f9d09b8SWill Deacon kvm_pgtable_hyp_destroy(hyp_pgtable);
3770f9d09b8SWill Deacon kfree(hyp_pgtable);
378bfa79a80SQuentin Perret hyp_pgtable = NULL;
3799ed24f4bSMarc Zyngier }
3809ed24f4bSMarc Zyngier mutex_unlock(&kvm_hyp_pgd_mutex);
3819ed24f4bSMarc Zyngier }
3829ed24f4bSMarc Zyngier
kvm_host_owns_hyp_mappings(void)383bfa79a80SQuentin Perret static bool kvm_host_owns_hyp_mappings(void)
384bfa79a80SQuentin Perret {
38564a1fbdaSQuentin Perret if (is_kernel_in_hyp_mode())
38664a1fbdaSQuentin Perret return false;
38764a1fbdaSQuentin Perret
388bfa79a80SQuentin Perret if (static_branch_likely(&kvm_protected_mode_initialized))
389bfa79a80SQuentin Perret return false;
390bfa79a80SQuentin Perret
391bfa79a80SQuentin Perret /*
392bfa79a80SQuentin Perret * This can happen at boot time when __create_hyp_mappings() is called
393bfa79a80SQuentin Perret * after the hyp protection has been enabled, but the static key has
394bfa79a80SQuentin Perret * not been flipped yet.
395bfa79a80SQuentin Perret */
396bfa79a80SQuentin Perret if (!hyp_pgtable && is_protected_kvm_enabled())
397bfa79a80SQuentin Perret return false;
398bfa79a80SQuentin Perret
399bfa79a80SQuentin Perret WARN_ON(!hyp_pgtable);
400bfa79a80SQuentin Perret
401bfa79a80SQuentin Perret return true;
402bfa79a80SQuentin Perret }
403bfa79a80SQuentin Perret
__create_hyp_mappings(unsigned long start,unsigned long size,unsigned long phys,enum kvm_pgtable_prot prot)404ce335431SKalesh Singh int __create_hyp_mappings(unsigned long start, unsigned long size,
4050f9d09b8SWill Deacon unsigned long phys, enum kvm_pgtable_prot prot)
4069ed24f4bSMarc Zyngier {
4070f9d09b8SWill Deacon int err;
4089ed24f4bSMarc Zyngier
40966c57eddSQuentin Perret if (WARN_ON(!kvm_host_owns_hyp_mappings()))
41066c57eddSQuentin Perret return -EINVAL;
411bfa79a80SQuentin Perret
4129ed24f4bSMarc Zyngier mutex_lock(&kvm_hyp_pgd_mutex);
4130f9d09b8SWill Deacon err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
4149ed24f4bSMarc Zyngier mutex_unlock(&kvm_hyp_pgd_mutex);
4150f9d09b8SWill Deacon
4169ed24f4bSMarc Zyngier return err;
4179ed24f4bSMarc Zyngier }
4189ed24f4bSMarc Zyngier
kvm_kaddr_to_phys(void * kaddr)4199ed24f4bSMarc Zyngier static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
4209ed24f4bSMarc Zyngier {
4219ed24f4bSMarc Zyngier if (!is_vmalloc_addr(kaddr)) {
4229ed24f4bSMarc Zyngier BUG_ON(!virt_addr_valid(kaddr));
4239ed24f4bSMarc Zyngier return __pa(kaddr);
4249ed24f4bSMarc Zyngier } else {
4259ed24f4bSMarc Zyngier return page_to_phys(vmalloc_to_page(kaddr)) +
4269ed24f4bSMarc Zyngier offset_in_page(kaddr);
4279ed24f4bSMarc Zyngier }
4289ed24f4bSMarc Zyngier }
4299ed24f4bSMarc Zyngier
430a83e2191SQuentin Perret struct hyp_shared_pfn {
431a83e2191SQuentin Perret u64 pfn;
432a83e2191SQuentin Perret int count;
433a83e2191SQuentin Perret struct rb_node node;
434a83e2191SQuentin Perret };
43566c57eddSQuentin Perret
436a83e2191SQuentin Perret static DEFINE_MUTEX(hyp_shared_pfns_lock);
437a83e2191SQuentin Perret static struct rb_root hyp_shared_pfns = RB_ROOT;
438a83e2191SQuentin Perret
find_shared_pfn(u64 pfn,struct rb_node *** node,struct rb_node ** parent)439a83e2191SQuentin Perret static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
440a83e2191SQuentin Perret struct rb_node **parent)
44166c57eddSQuentin Perret {
442a83e2191SQuentin Perret struct hyp_shared_pfn *this;
443a83e2191SQuentin Perret
444a83e2191SQuentin Perret *node = &hyp_shared_pfns.rb_node;
445a83e2191SQuentin Perret *parent = NULL;
446a83e2191SQuentin Perret while (**node) {
447a83e2191SQuentin Perret this = container_of(**node, struct hyp_shared_pfn, node);
448a83e2191SQuentin Perret *parent = **node;
449a83e2191SQuentin Perret if (this->pfn < pfn)
450a83e2191SQuentin Perret *node = &((**node)->rb_left);
451a83e2191SQuentin Perret else if (this->pfn > pfn)
452a83e2191SQuentin Perret *node = &((**node)->rb_right);
453a83e2191SQuentin Perret else
454a83e2191SQuentin Perret return this;
45566c57eddSQuentin Perret }
45666c57eddSQuentin Perret
457a83e2191SQuentin Perret return NULL;
458a83e2191SQuentin Perret }
459a83e2191SQuentin Perret
share_pfn_hyp(u64 pfn)460a83e2191SQuentin Perret static int share_pfn_hyp(u64 pfn)
461a83e2191SQuentin Perret {
462a83e2191SQuentin Perret struct rb_node **node, *parent;
463a83e2191SQuentin Perret struct hyp_shared_pfn *this;
464a83e2191SQuentin Perret int ret = 0;
465a83e2191SQuentin Perret
466a83e2191SQuentin Perret mutex_lock(&hyp_shared_pfns_lock);
467a83e2191SQuentin Perret this = find_shared_pfn(pfn, &node, &parent);
468a83e2191SQuentin Perret if (this) {
469a83e2191SQuentin Perret this->count++;
470a83e2191SQuentin Perret goto unlock;
471a83e2191SQuentin Perret }
472a83e2191SQuentin Perret
473a83e2191SQuentin Perret this = kzalloc(sizeof(*this), GFP_KERNEL);
474a83e2191SQuentin Perret if (!this) {
475a83e2191SQuentin Perret ret = -ENOMEM;
476a83e2191SQuentin Perret goto unlock;
477a83e2191SQuentin Perret }
478a83e2191SQuentin Perret
479a83e2191SQuentin Perret this->pfn = pfn;
480a83e2191SQuentin Perret this->count = 1;
481a83e2191SQuentin Perret rb_link_node(&this->node, parent, node);
482a83e2191SQuentin Perret rb_insert_color(&this->node, &hyp_shared_pfns);
483a83e2191SQuentin Perret ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
484a83e2191SQuentin Perret unlock:
485a83e2191SQuentin Perret mutex_unlock(&hyp_shared_pfns_lock);
486a83e2191SQuentin Perret
487a83e2191SQuentin Perret return ret;
48866c57eddSQuentin Perret }
48966c57eddSQuentin Perret
unshare_pfn_hyp(u64 pfn)49052b28657SQuentin Perret static int unshare_pfn_hyp(u64 pfn)
49152b28657SQuentin Perret {
49252b28657SQuentin Perret struct rb_node **node, *parent;
49352b28657SQuentin Perret struct hyp_shared_pfn *this;
49452b28657SQuentin Perret int ret = 0;
49552b28657SQuentin Perret
49652b28657SQuentin Perret mutex_lock(&hyp_shared_pfns_lock);
49752b28657SQuentin Perret this = find_shared_pfn(pfn, &node, &parent);
49852b28657SQuentin Perret if (WARN_ON(!this)) {
49952b28657SQuentin Perret ret = -ENOENT;
50052b28657SQuentin Perret goto unlock;
50152b28657SQuentin Perret }
50252b28657SQuentin Perret
50352b28657SQuentin Perret this->count--;
50452b28657SQuentin Perret if (this->count)
50552b28657SQuentin Perret goto unlock;
50652b28657SQuentin Perret
50752b28657SQuentin Perret rb_erase(&this->node, &hyp_shared_pfns);
50852b28657SQuentin Perret kfree(this);
50952b28657SQuentin Perret ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
51052b28657SQuentin Perret unlock:
51152b28657SQuentin Perret mutex_unlock(&hyp_shared_pfns_lock);
51252b28657SQuentin Perret
51352b28657SQuentin Perret return ret;
51452b28657SQuentin Perret }
51552b28657SQuentin Perret
kvm_share_hyp(void * from,void * to)5163f868e14SQuentin Perret int kvm_share_hyp(void *from, void *to)
5173f868e14SQuentin Perret {
518a83e2191SQuentin Perret phys_addr_t start, end, cur;
519a83e2191SQuentin Perret u64 pfn;
5209ed24f4bSMarc Zyngier int ret;
5219ed24f4bSMarc Zyngier
5223f868e14SQuentin Perret if (is_kernel_in_hyp_mode())
5233f868e14SQuentin Perret return 0;
5243f868e14SQuentin Perret
5253f868e14SQuentin Perret /*
5263f868e14SQuentin Perret * The share hcall maps things in the 'fixed-offset' region of the hyp
5273f868e14SQuentin Perret * VA space, so we can only share physically contiguous data-structures
5283f868e14SQuentin Perret * for now.
5293f868e14SQuentin Perret */
5303f868e14SQuentin Perret if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
5313f868e14SQuentin Perret return -EINVAL;
5323f868e14SQuentin Perret
5333f868e14SQuentin Perret if (kvm_host_owns_hyp_mappings())
5343f868e14SQuentin Perret return create_hyp_mappings(from, to, PAGE_HYP);
5353f868e14SQuentin Perret
536a83e2191SQuentin Perret start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
537a83e2191SQuentin Perret end = PAGE_ALIGN(__pa(to));
538a83e2191SQuentin Perret for (cur = start; cur < end; cur += PAGE_SIZE) {
539a83e2191SQuentin Perret pfn = __phys_to_pfn(cur);
540a83e2191SQuentin Perret ret = share_pfn_hyp(pfn);
5419ed24f4bSMarc Zyngier if (ret)
5429ed24f4bSMarc Zyngier return ret;
5439ed24f4bSMarc Zyngier }
5449ed24f4bSMarc Zyngier
5459ed24f4bSMarc Zyngier return 0;
5469ed24f4bSMarc Zyngier }
5479ed24f4bSMarc Zyngier
kvm_unshare_hyp(void * from,void * to)54852b28657SQuentin Perret void kvm_unshare_hyp(void *from, void *to)
54952b28657SQuentin Perret {
55052b28657SQuentin Perret phys_addr_t start, end, cur;
55152b28657SQuentin Perret u64 pfn;
55252b28657SQuentin Perret
55352b28657SQuentin Perret if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
55452b28657SQuentin Perret return;
55552b28657SQuentin Perret
55652b28657SQuentin Perret start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
55752b28657SQuentin Perret end = PAGE_ALIGN(__pa(to));
55852b28657SQuentin Perret for (cur = start; cur < end; cur += PAGE_SIZE) {
55952b28657SQuentin Perret pfn = __phys_to_pfn(cur);
56052b28657SQuentin Perret WARN_ON(unshare_pfn_hyp(pfn));
56152b28657SQuentin Perret }
56252b28657SQuentin Perret }
56352b28657SQuentin Perret
5649ed24f4bSMarc Zyngier /**
5659ed24f4bSMarc Zyngier * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
5669ed24f4bSMarc Zyngier * @from: The virtual kernel start address of the range
5679ed24f4bSMarc Zyngier * @to: The virtual kernel end address of the range (exclusive)
5689ed24f4bSMarc Zyngier * @prot: The protection to be applied to this range
5699ed24f4bSMarc Zyngier *
5709ed24f4bSMarc Zyngier * The same virtual address as the kernel virtual address is also used
5719ed24f4bSMarc Zyngier * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
5729ed24f4bSMarc Zyngier * physical pages.
5739ed24f4bSMarc Zyngier */
create_hyp_mappings(void * from,void * to,enum kvm_pgtable_prot prot)5740f9d09b8SWill Deacon int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
5759ed24f4bSMarc Zyngier {
5769ed24f4bSMarc Zyngier phys_addr_t phys_addr;
5779ed24f4bSMarc Zyngier unsigned long virt_addr;
5789ed24f4bSMarc Zyngier unsigned long start = kern_hyp_va((unsigned long)from);
5799ed24f4bSMarc Zyngier unsigned long end = kern_hyp_va((unsigned long)to);
5809ed24f4bSMarc Zyngier
5819ed24f4bSMarc Zyngier if (is_kernel_in_hyp_mode())
5829ed24f4bSMarc Zyngier return 0;
5839ed24f4bSMarc Zyngier
5843f868e14SQuentin Perret if (!kvm_host_owns_hyp_mappings())
58566c57eddSQuentin Perret return -EPERM;
58666c57eddSQuentin Perret
5879ed24f4bSMarc Zyngier start = start & PAGE_MASK;
5889ed24f4bSMarc Zyngier end = PAGE_ALIGN(end);
5899ed24f4bSMarc Zyngier
5909ed24f4bSMarc Zyngier for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
5919ed24f4bSMarc Zyngier int err;
5929ed24f4bSMarc Zyngier
5939ed24f4bSMarc Zyngier phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
5940f9d09b8SWill Deacon err = __create_hyp_mappings(virt_addr, PAGE_SIZE, phys_addr,
5959ed24f4bSMarc Zyngier prot);
5969ed24f4bSMarc Zyngier if (err)
5979ed24f4bSMarc Zyngier return err;
5989ed24f4bSMarc Zyngier }
5999ed24f4bSMarc Zyngier
6009ed24f4bSMarc Zyngier return 0;
6019ed24f4bSMarc Zyngier }
6029ed24f4bSMarc Zyngier
__hyp_alloc_private_va_range(unsigned long base)603f156a7d1SVincent Donnefort static int __hyp_alloc_private_va_range(unsigned long base)
604f156a7d1SVincent Donnefort {
605f156a7d1SVincent Donnefort lockdep_assert_held(&kvm_hyp_pgd_mutex);
606f156a7d1SVincent Donnefort
607f156a7d1SVincent Donnefort if (!PAGE_ALIGNED(base))
608f156a7d1SVincent Donnefort return -EINVAL;
609f156a7d1SVincent Donnefort
610f156a7d1SVincent Donnefort /*
611f156a7d1SVincent Donnefort * Verify that BIT(VA_BITS - 1) hasn't been flipped by
612f156a7d1SVincent Donnefort * allocating the new area, as it would indicate we've
613f156a7d1SVincent Donnefort * overflowed the idmap/IO address range.
614f156a7d1SVincent Donnefort */
615f156a7d1SVincent Donnefort if ((base ^ io_map_base) & BIT(VA_BITS - 1))
616f156a7d1SVincent Donnefort return -ENOMEM;
617f156a7d1SVincent Donnefort
618f156a7d1SVincent Donnefort io_map_base = base;
619f156a7d1SVincent Donnefort
620f156a7d1SVincent Donnefort return 0;
621f156a7d1SVincent Donnefort }
62292abe0f8SKalesh Singh
62392abe0f8SKalesh Singh /**
62492abe0f8SKalesh Singh * hyp_alloc_private_va_range - Allocates a private VA range.
62592abe0f8SKalesh Singh * @size: The size of the VA range to reserve.
62692abe0f8SKalesh Singh * @haddr: The hypervisor virtual start address of the allocation.
62792abe0f8SKalesh Singh *
62892abe0f8SKalesh Singh * The private virtual address (VA) range is allocated below io_map_base
62992abe0f8SKalesh Singh * and aligned based on the order of @size.
63092abe0f8SKalesh Singh *
63192abe0f8SKalesh Singh * Return: 0 on success or negative error code on failure.
63292abe0f8SKalesh Singh */
hyp_alloc_private_va_range(size_t size,unsigned long * haddr)63392abe0f8SKalesh Singh int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
6349ed24f4bSMarc Zyngier {
6359ed24f4bSMarc Zyngier unsigned long base;
6369ed24f4bSMarc Zyngier int ret = 0;
6379ed24f4bSMarc Zyngier
6389ed24f4bSMarc Zyngier mutex_lock(&kvm_hyp_pgd_mutex);
6399ed24f4bSMarc Zyngier
6409ed24f4bSMarc Zyngier /*
641656012c7SFuad Tabba * This assumes that we have enough space below the idmap
642f156a7d1SVincent Donnefort * page to allocate our VAs. If not, the check in
643f156a7d1SVincent Donnefort * __hyp_alloc_private_va_range() will kick. A potential
644f156a7d1SVincent Donnefort * alternative would be to detect that overflow and switch
645f156a7d1SVincent Donnefort * to an allocation above the idmap.
6469ed24f4bSMarc Zyngier *
6479ed24f4bSMarc Zyngier * The allocated size is always a multiple of PAGE_SIZE.
6489ed24f4bSMarc Zyngier */
649f156a7d1SVincent Donnefort size = PAGE_ALIGN(size);
650f156a7d1SVincent Donnefort base = io_map_base - size;
651f156a7d1SVincent Donnefort ret = __hyp_alloc_private_va_range(base);
6529ed24f4bSMarc Zyngier
6539ed24f4bSMarc Zyngier mutex_unlock(&kvm_hyp_pgd_mutex);
6549ed24f4bSMarc Zyngier
655*3579dc74SMarc Zyngier if (!ret)
656*3579dc74SMarc Zyngier *haddr = base;
657*3579dc74SMarc Zyngier
65892abe0f8SKalesh Singh return ret;
65992abe0f8SKalesh Singh }
6609ed24f4bSMarc Zyngier
__create_hyp_private_mapping(phys_addr_t phys_addr,size_t size,unsigned long * haddr,enum kvm_pgtable_prot prot)66192abe0f8SKalesh Singh static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
66292abe0f8SKalesh Singh unsigned long *haddr,
66392abe0f8SKalesh Singh enum kvm_pgtable_prot prot)
66492abe0f8SKalesh Singh {
66592abe0f8SKalesh Singh unsigned long addr;
66692abe0f8SKalesh Singh int ret = 0;
6679ed24f4bSMarc Zyngier
66892abe0f8SKalesh Singh if (!kvm_host_owns_hyp_mappings()) {
66992abe0f8SKalesh Singh addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
67092abe0f8SKalesh Singh phys_addr, size, prot);
67192abe0f8SKalesh Singh if (IS_ERR_VALUE(addr))
67292abe0f8SKalesh Singh return addr;
67392abe0f8SKalesh Singh *haddr = addr;
67492abe0f8SKalesh Singh
67592abe0f8SKalesh Singh return 0;
67692abe0f8SKalesh Singh }
67792abe0f8SKalesh Singh
67892abe0f8SKalesh Singh size = PAGE_ALIGN(size + offset_in_page(phys_addr));
67992abe0f8SKalesh Singh ret = hyp_alloc_private_va_range(size, &addr);
68092abe0f8SKalesh Singh if (ret)
68192abe0f8SKalesh Singh return ret;
68292abe0f8SKalesh Singh
68392abe0f8SKalesh Singh ret = __create_hyp_mappings(addr, size, phys_addr, prot);
68492abe0f8SKalesh Singh if (ret)
68592abe0f8SKalesh Singh return ret;
68692abe0f8SKalesh Singh
68792abe0f8SKalesh Singh *haddr = addr + offset_in_page(phys_addr);
6889ed24f4bSMarc Zyngier return ret;
6899ed24f4bSMarc Zyngier }
6909ed24f4bSMarc Zyngier
create_hyp_stack(phys_addr_t phys_addr,unsigned long * haddr)691f156a7d1SVincent Donnefort int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
692f156a7d1SVincent Donnefort {
693f156a7d1SVincent Donnefort unsigned long base;
694f156a7d1SVincent Donnefort size_t size;
695f156a7d1SVincent Donnefort int ret;
696f156a7d1SVincent Donnefort
697f156a7d1SVincent Donnefort mutex_lock(&kvm_hyp_pgd_mutex);
698f156a7d1SVincent Donnefort /*
699f156a7d1SVincent Donnefort * Efficient stack verification using the PAGE_SHIFT bit implies
700f156a7d1SVincent Donnefort * an alignment of our allocation on the order of the size.
701f156a7d1SVincent Donnefort */
702f156a7d1SVincent Donnefort size = PAGE_SIZE * 2;
703f156a7d1SVincent Donnefort base = ALIGN_DOWN(io_map_base - size, size);
704f156a7d1SVincent Donnefort
705f156a7d1SVincent Donnefort ret = __hyp_alloc_private_va_range(base);
706f156a7d1SVincent Donnefort
707f156a7d1SVincent Donnefort mutex_unlock(&kvm_hyp_pgd_mutex);
708f156a7d1SVincent Donnefort
709f156a7d1SVincent Donnefort if (ret) {
710f156a7d1SVincent Donnefort kvm_err("Cannot allocate hyp stack guard page\n");
711f156a7d1SVincent Donnefort return ret;
712f156a7d1SVincent Donnefort }
713f156a7d1SVincent Donnefort
714f156a7d1SVincent Donnefort /*
715f156a7d1SVincent Donnefort * Since the stack grows downwards, map the stack to the page
716f156a7d1SVincent Donnefort * at the higher address and leave the lower guard page
717f156a7d1SVincent Donnefort * unbacked.
718f156a7d1SVincent Donnefort *
719f156a7d1SVincent Donnefort * Any valid stack address now has the PAGE_SHIFT bit as 1
720f156a7d1SVincent Donnefort * and addresses corresponding to the guard page have the
721f156a7d1SVincent Donnefort * PAGE_SHIFT bit as 0 - this is used for overflow detection.
722f156a7d1SVincent Donnefort */
723f156a7d1SVincent Donnefort ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
724f156a7d1SVincent Donnefort PAGE_HYP);
725f156a7d1SVincent Donnefort if (ret)
726f156a7d1SVincent Donnefort kvm_err("Cannot map hyp stack\n");
727f156a7d1SVincent Donnefort
728f156a7d1SVincent Donnefort *haddr = base + size;
729f156a7d1SVincent Donnefort
730f156a7d1SVincent Donnefort return ret;
731f156a7d1SVincent Donnefort }
732f156a7d1SVincent Donnefort
7339ed24f4bSMarc Zyngier /**
7349ed24f4bSMarc Zyngier * create_hyp_io_mappings - Map IO into both kernel and HYP
7359ed24f4bSMarc Zyngier * @phys_addr: The physical start address which gets mapped
7369ed24f4bSMarc Zyngier * @size: Size of the region being mapped
7379ed24f4bSMarc Zyngier * @kaddr: Kernel VA for this mapping
7389ed24f4bSMarc Zyngier * @haddr: HYP VA for this mapping
7399ed24f4bSMarc Zyngier */
create_hyp_io_mappings(phys_addr_t phys_addr,size_t size,void __iomem ** kaddr,void __iomem ** haddr)7409ed24f4bSMarc Zyngier int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
7419ed24f4bSMarc Zyngier void __iomem **kaddr,
7429ed24f4bSMarc Zyngier void __iomem **haddr)
7439ed24f4bSMarc Zyngier {
7449ed24f4bSMarc Zyngier unsigned long addr;
7459ed24f4bSMarc Zyngier int ret;
7469ed24f4bSMarc Zyngier
747bff01cb6SQuentin Perret if (is_protected_kvm_enabled())
748bff01cb6SQuentin Perret return -EPERM;
749bff01cb6SQuentin Perret
7509ed24f4bSMarc Zyngier *kaddr = ioremap(phys_addr, size);
7519ed24f4bSMarc Zyngier if (!*kaddr)
7529ed24f4bSMarc Zyngier return -ENOMEM;
7539ed24f4bSMarc Zyngier
7549ed24f4bSMarc Zyngier if (is_kernel_in_hyp_mode()) {
7559ed24f4bSMarc Zyngier *haddr = *kaddr;
7569ed24f4bSMarc Zyngier return 0;
7579ed24f4bSMarc Zyngier }
7589ed24f4bSMarc Zyngier
7599ed24f4bSMarc Zyngier ret = __create_hyp_private_mapping(phys_addr, size,
7609ed24f4bSMarc Zyngier &addr, PAGE_HYP_DEVICE);
7619ed24f4bSMarc Zyngier if (ret) {
7629ed24f4bSMarc Zyngier iounmap(*kaddr);
7639ed24f4bSMarc Zyngier *kaddr = NULL;
7649ed24f4bSMarc Zyngier *haddr = NULL;
7659ed24f4bSMarc Zyngier return ret;
7669ed24f4bSMarc Zyngier }
7679ed24f4bSMarc Zyngier
7689ed24f4bSMarc Zyngier *haddr = (void __iomem *)addr;
7699ed24f4bSMarc Zyngier return 0;
7709ed24f4bSMarc Zyngier }
7719ed24f4bSMarc Zyngier
7729ed24f4bSMarc Zyngier /**
7739ed24f4bSMarc Zyngier * create_hyp_exec_mappings - Map an executable range into HYP
7749ed24f4bSMarc Zyngier * @phys_addr: The physical start address which gets mapped
7759ed24f4bSMarc Zyngier * @size: Size of the region being mapped
7769ed24f4bSMarc Zyngier * @haddr: HYP VA for this mapping
7779ed24f4bSMarc Zyngier */
create_hyp_exec_mappings(phys_addr_t phys_addr,size_t size,void ** haddr)7789ed24f4bSMarc Zyngier int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
7799ed24f4bSMarc Zyngier void **haddr)
7809ed24f4bSMarc Zyngier {
7819ed24f4bSMarc Zyngier unsigned long addr;
7829ed24f4bSMarc Zyngier int ret;
7839ed24f4bSMarc Zyngier
7849ed24f4bSMarc Zyngier BUG_ON(is_kernel_in_hyp_mode());
7859ed24f4bSMarc Zyngier
7869ed24f4bSMarc Zyngier ret = __create_hyp_private_mapping(phys_addr, size,
7879ed24f4bSMarc Zyngier &addr, PAGE_HYP_EXEC);
7889ed24f4bSMarc Zyngier if (ret) {
7899ed24f4bSMarc Zyngier *haddr = NULL;
7909ed24f4bSMarc Zyngier return ret;
7919ed24f4bSMarc Zyngier }
7929ed24f4bSMarc Zyngier
7939ed24f4bSMarc Zyngier *haddr = (void *)addr;
7949ed24f4bSMarc Zyngier return 0;
7959ed24f4bSMarc Zyngier }
7969ed24f4bSMarc Zyngier
7976011cf68SMarc Zyngier static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
7986011cf68SMarc Zyngier /* We shouldn't need any other callback to walk the PT */
7996011cf68SMarc Zyngier .phys_to_virt = kvm_host_va,
8006011cf68SMarc Zyngier };
8016011cf68SMarc Zyngier
get_user_mapping_size(struct kvm * kvm,u64 addr)8026011cf68SMarc Zyngier static int get_user_mapping_size(struct kvm *kvm, u64 addr)
8036011cf68SMarc Zyngier {
8046011cf68SMarc Zyngier struct kvm_pgtable pgt = {
8056b91b8f9SOliver Upton .pgd = (kvm_pteref_t)kvm->mm->pgd,
806219072c0SRyan Roberts .ia_bits = vabits_actual,
8076011cf68SMarc Zyngier .start_level = (KVM_PGTABLE_MAX_LEVELS -
8086011cf68SMarc Zyngier CONFIG_PGTABLE_LEVELS),
8096011cf68SMarc Zyngier .mm_ops = &kvm_user_mm_ops,
8106011cf68SMarc Zyngier };
811e86fc1a3SMarc Zyngier unsigned long flags;
8126011cf68SMarc Zyngier kvm_pte_t pte = 0; /* Keep GCC quiet... */
8136011cf68SMarc Zyngier u32 level = ~0;
8146011cf68SMarc Zyngier int ret;
8156011cf68SMarc Zyngier
816e86fc1a3SMarc Zyngier /*
817e86fc1a3SMarc Zyngier * Disable IRQs so that we hazard against a concurrent
818e86fc1a3SMarc Zyngier * teardown of the userspace page tables (which relies on
819e86fc1a3SMarc Zyngier * IPI-ing threads).
820e86fc1a3SMarc Zyngier */
821e86fc1a3SMarc Zyngier local_irq_save(flags);
8226011cf68SMarc Zyngier ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
823e86fc1a3SMarc Zyngier local_irq_restore(flags);
824e86fc1a3SMarc Zyngier
825e86fc1a3SMarc Zyngier if (ret)
826e86fc1a3SMarc Zyngier return ret;
827e86fc1a3SMarc Zyngier
828e86fc1a3SMarc Zyngier /*
829e86fc1a3SMarc Zyngier * Not seeing an error, but not updating level? Something went
830e86fc1a3SMarc Zyngier * deeply wrong...
831e86fc1a3SMarc Zyngier */
832e86fc1a3SMarc Zyngier if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
833e86fc1a3SMarc Zyngier return -EFAULT;
834e86fc1a3SMarc Zyngier
835e86fc1a3SMarc Zyngier /* Oops, the userspace PTs are gone... Replay the fault */
836e86fc1a3SMarc Zyngier if (!kvm_pte_valid(pte))
837e86fc1a3SMarc Zyngier return -EAGAIN;
8386011cf68SMarc Zyngier
8396011cf68SMarc Zyngier return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
8406011cf68SMarc Zyngier }
8416011cf68SMarc Zyngier
8427aef0cbcSQuentin Perret static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
8437aef0cbcSQuentin Perret .zalloc_page = stage2_memcache_zalloc_page,
844d38ba8ccSYosry Ahmed .zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
845d38ba8ccSYosry Ahmed .free_pages_exact = kvm_s2_free_pages_exact,
846c14d08c5SRicardo Koller .free_unlinked_table = stage2_free_unlinked_table,
8477aef0cbcSQuentin Perret .get_page = kvm_host_get_page,
848d38ba8ccSYosry Ahmed .put_page = kvm_s2_put_page,
8497aef0cbcSQuentin Perret .page_count = kvm_host_page_count,
8507aef0cbcSQuentin Perret .phys_to_virt = kvm_host_va,
8517aef0cbcSQuentin Perret .virt_to_phys = kvm_host_pa,
85225aa2869SYanan Wang .dcache_clean_inval_poc = clean_dcache_guest_page,
85325aa2869SYanan Wang .icache_inval_pou = invalidate_icache_guest_page,
8547aef0cbcSQuentin Perret };
8557aef0cbcSQuentin Perret
8569ed24f4bSMarc Zyngier /**
85721ea4578SJulia Lawall * kvm_init_stage2_mmu - Initialise a S2 MMU structure
858a0e50aa3SChristoffer Dall * @kvm: The pointer to the KVM structure
859a0e50aa3SChristoffer Dall * @mmu: The pointer to the s2 MMU structure
860315775ffSQuentin Perret * @type: The machine type of the virtual machine
8619ed24f4bSMarc Zyngier *
86271233d05SWill Deacon * Allocates only the stage-2 HW PGD level table(s).
8639ed24f4bSMarc Zyngier * Note we don't need locking here as this is only called when the VM is
8649ed24f4bSMarc Zyngier * created, which can only be done once.
8659ed24f4bSMarc Zyngier */
kvm_init_stage2_mmu(struct kvm * kvm,struct kvm_s2_mmu * mmu,unsigned long type)866315775ffSQuentin Perret int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
8679ed24f4bSMarc Zyngier {
868315775ffSQuentin Perret u32 kvm_ipa_limit = get_kvm_ipa_limit();
86971233d05SWill Deacon int cpu, err;
87071233d05SWill Deacon struct kvm_pgtable *pgt;
871315775ffSQuentin Perret u64 mmfr0, mmfr1;
872315775ffSQuentin Perret u32 phys_shift;
873315775ffSQuentin Perret
874315775ffSQuentin Perret if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
875315775ffSQuentin Perret return -EINVAL;
876315775ffSQuentin Perret
877315775ffSQuentin Perret phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
87860dfe093SQuentin Perret if (is_protected_kvm_enabled()) {
87960dfe093SQuentin Perret phys_shift = kvm_ipa_limit;
88060dfe093SQuentin Perret } else if (phys_shift) {
881315775ffSQuentin Perret if (phys_shift > kvm_ipa_limit ||
882315775ffSQuentin Perret phys_shift < ARM64_MIN_PARANGE_BITS)
883315775ffSQuentin Perret return -EINVAL;
884315775ffSQuentin Perret } else {
885315775ffSQuentin Perret phys_shift = KVM_PHYS_SHIFT;
886315775ffSQuentin Perret if (phys_shift > kvm_ipa_limit) {
887315775ffSQuentin Perret pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
888315775ffSQuentin Perret current->comm);
889315775ffSQuentin Perret return -EINVAL;
890315775ffSQuentin Perret }
891315775ffSQuentin Perret }
892315775ffSQuentin Perret
893315775ffSQuentin Perret mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
894315775ffSQuentin Perret mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
895315775ffSQuentin Perret kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
8969ed24f4bSMarc Zyngier
89771233d05SWill Deacon if (mmu->pgt != NULL) {
8989ed24f4bSMarc Zyngier kvm_err("kvm_arch already initialized?\n");
8999ed24f4bSMarc Zyngier return -EINVAL;
9009ed24f4bSMarc Zyngier }
9019ed24f4bSMarc Zyngier
902115bae92SJia He pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
90371233d05SWill Deacon if (!pgt)
9049ed24f4bSMarc Zyngier return -ENOMEM;
9059ed24f4bSMarc Zyngier
9069d8604b2SMarc Zyngier mmu->arch = &kvm->arch;
9079d8604b2SMarc Zyngier err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
90871233d05SWill Deacon if (err)
90971233d05SWill Deacon goto out_free_pgtable;
9109ed24f4bSMarc Zyngier
911a0e50aa3SChristoffer Dall mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
912a0e50aa3SChristoffer Dall if (!mmu->last_vcpu_ran) {
91371233d05SWill Deacon err = -ENOMEM;
91471233d05SWill Deacon goto out_destroy_pgtable;
915a0e50aa3SChristoffer Dall }
916a0e50aa3SChristoffer Dall
917a0e50aa3SChristoffer Dall for_each_possible_cpu(cpu)
918a0e50aa3SChristoffer Dall *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
919a0e50aa3SChristoffer Dall
9202f440b72SRicardo Koller /* The eager page splitting is disabled by default */
9212f440b72SRicardo Koller mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
9222f440b72SRicardo Koller mmu->split_page_cache.gfp_zero = __GFP_ZERO;
9232f440b72SRicardo Koller
92471233d05SWill Deacon mmu->pgt = pgt;
92571233d05SWill Deacon mmu->pgd_phys = __pa(pgt->pgd);
9269ed24f4bSMarc Zyngier return 0;
92771233d05SWill Deacon
92871233d05SWill Deacon out_destroy_pgtable:
92971233d05SWill Deacon kvm_pgtable_stage2_destroy(pgt);
93071233d05SWill Deacon out_free_pgtable:
93171233d05SWill Deacon kfree(pgt);
93271233d05SWill Deacon return err;
9339ed24f4bSMarc Zyngier }
9349ed24f4bSMarc Zyngier
kvm_uninit_stage2_mmu(struct kvm * kvm)935ce2b6022SRicardo Koller void kvm_uninit_stage2_mmu(struct kvm *kvm)
936ce2b6022SRicardo Koller {
937ce2b6022SRicardo Koller kvm_free_stage2_pgd(&kvm->arch.mmu);
938e7bf7a49SRicardo Koller kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
939ce2b6022SRicardo Koller }
940ce2b6022SRicardo Koller
stage2_unmap_memslot(struct kvm * kvm,struct kvm_memory_slot * memslot)9419ed24f4bSMarc Zyngier static void stage2_unmap_memslot(struct kvm *kvm,
9429ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot)
9439ed24f4bSMarc Zyngier {
9449ed24f4bSMarc Zyngier hva_t hva = memslot->userspace_addr;
9459ed24f4bSMarc Zyngier phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
9469ed24f4bSMarc Zyngier phys_addr_t size = PAGE_SIZE * memslot->npages;
9479ed24f4bSMarc Zyngier hva_t reg_end = hva + size;
9489ed24f4bSMarc Zyngier
9499ed24f4bSMarc Zyngier /*
9509ed24f4bSMarc Zyngier * A memory region could potentially cover multiple VMAs, and any holes
9519ed24f4bSMarc Zyngier * between them, so iterate over all of them to find out if we should
9529ed24f4bSMarc Zyngier * unmap any of them.
9539ed24f4bSMarc Zyngier *
9549ed24f4bSMarc Zyngier * +--------------------------------------------+
9559ed24f4bSMarc Zyngier * +---------------+----------------+ +----------------+
9569ed24f4bSMarc Zyngier * | : VMA 1 | VMA 2 | | VMA 3 : |
9579ed24f4bSMarc Zyngier * +---------------+----------------+ +----------------+
9589ed24f4bSMarc Zyngier * | memory region |
9599ed24f4bSMarc Zyngier * +--------------------------------------------+
9609ed24f4bSMarc Zyngier */
9619ed24f4bSMarc Zyngier do {
962c728fd4cSGavin Shan struct vm_area_struct *vma;
9639ed24f4bSMarc Zyngier hva_t vm_start, vm_end;
9649ed24f4bSMarc Zyngier
965c728fd4cSGavin Shan vma = find_vma_intersection(current->mm, hva, reg_end);
966c728fd4cSGavin Shan if (!vma)
9679ed24f4bSMarc Zyngier break;
9689ed24f4bSMarc Zyngier
9699ed24f4bSMarc Zyngier /*
9709ed24f4bSMarc Zyngier * Take the intersection of this VMA with the memory region
9719ed24f4bSMarc Zyngier */
9729ed24f4bSMarc Zyngier vm_start = max(hva, vma->vm_start);
9739ed24f4bSMarc Zyngier vm_end = min(reg_end, vma->vm_end);
9749ed24f4bSMarc Zyngier
9759ed24f4bSMarc Zyngier if (!(vma->vm_flags & VM_PFNMAP)) {
9769ed24f4bSMarc Zyngier gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
977a0e50aa3SChristoffer Dall unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
9789ed24f4bSMarc Zyngier }
9799ed24f4bSMarc Zyngier hva = vm_end;
9809ed24f4bSMarc Zyngier } while (hva < reg_end);
9819ed24f4bSMarc Zyngier }
9829ed24f4bSMarc Zyngier
9839ed24f4bSMarc Zyngier /**
9849ed24f4bSMarc Zyngier * stage2_unmap_vm - Unmap Stage-2 RAM mappings
9859ed24f4bSMarc Zyngier * @kvm: The struct kvm pointer
9869ed24f4bSMarc Zyngier *
987656012c7SFuad Tabba * Go through the memregions and unmap any regular RAM
9889ed24f4bSMarc Zyngier * backing memory already mapped to the VM.
9899ed24f4bSMarc Zyngier */
stage2_unmap_vm(struct kvm * kvm)9909ed24f4bSMarc Zyngier void stage2_unmap_vm(struct kvm *kvm)
9919ed24f4bSMarc Zyngier {
9929ed24f4bSMarc Zyngier struct kvm_memslots *slots;
9939ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot;
994a54d8066SMaciej S. Szmigiero int idx, bkt;
9959ed24f4bSMarc Zyngier
9969ed24f4bSMarc Zyngier idx = srcu_read_lock(&kvm->srcu);
99789154dd5SMichel Lespinasse mmap_read_lock(current->mm);
998fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
9999ed24f4bSMarc Zyngier
10009ed24f4bSMarc Zyngier slots = kvm_memslots(kvm);
1001a54d8066SMaciej S. Szmigiero kvm_for_each_memslot(memslot, bkt, slots)
10029ed24f4bSMarc Zyngier stage2_unmap_memslot(kvm, memslot);
10039ed24f4bSMarc Zyngier
1004fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
100589154dd5SMichel Lespinasse mmap_read_unlock(current->mm);
10069ed24f4bSMarc Zyngier srcu_read_unlock(&kvm->srcu, idx);
10079ed24f4bSMarc Zyngier }
10089ed24f4bSMarc Zyngier
kvm_free_stage2_pgd(struct kvm_s2_mmu * mmu)1009a0e50aa3SChristoffer Dall void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
10109ed24f4bSMarc Zyngier {
1011cfb1a98dSQuentin Perret struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
101271233d05SWill Deacon struct kvm_pgtable *pgt = NULL;
10139ed24f4bSMarc Zyngier
1014fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
101571233d05SWill Deacon pgt = mmu->pgt;
101671233d05SWill Deacon if (pgt) {
101771233d05SWill Deacon mmu->pgd_phys = 0;
101871233d05SWill Deacon mmu->pgt = NULL;
101971233d05SWill Deacon free_percpu(mmu->last_vcpu_ran);
10209ed24f4bSMarc Zyngier }
1021fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
10229ed24f4bSMarc Zyngier
102371233d05SWill Deacon if (pgt) {
102471233d05SWill Deacon kvm_pgtable_stage2_destroy(pgt);
102571233d05SWill Deacon kfree(pgt);
1026a0e50aa3SChristoffer Dall }
10279ed24f4bSMarc Zyngier }
10289ed24f4bSMarc Zyngier
hyp_mc_free_fn(void * addr,void * unused)1029717a7eebSQuentin Perret static void hyp_mc_free_fn(void *addr, void *unused)
1030717a7eebSQuentin Perret {
1031717a7eebSQuentin Perret free_page((unsigned long)addr);
1032717a7eebSQuentin Perret }
1033717a7eebSQuentin Perret
hyp_mc_alloc_fn(void * unused)1034717a7eebSQuentin Perret static void *hyp_mc_alloc_fn(void *unused)
1035717a7eebSQuentin Perret {
1036717a7eebSQuentin Perret return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
1037717a7eebSQuentin Perret }
1038717a7eebSQuentin Perret
free_hyp_memcache(struct kvm_hyp_memcache * mc)1039717a7eebSQuentin Perret void free_hyp_memcache(struct kvm_hyp_memcache *mc)
1040717a7eebSQuentin Perret {
1041717a7eebSQuentin Perret if (is_protected_kvm_enabled())
1042717a7eebSQuentin Perret __free_hyp_memcache(mc, hyp_mc_free_fn,
1043717a7eebSQuentin Perret kvm_host_va, NULL);
1044717a7eebSQuentin Perret }
1045717a7eebSQuentin Perret
topup_hyp_memcache(struct kvm_hyp_memcache * mc,unsigned long min_pages)1046717a7eebSQuentin Perret int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
1047717a7eebSQuentin Perret {
1048717a7eebSQuentin Perret if (!is_protected_kvm_enabled())
1049717a7eebSQuentin Perret return 0;
1050717a7eebSQuentin Perret
1051717a7eebSQuentin Perret return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
1052717a7eebSQuentin Perret kvm_host_pa, NULL);
1053717a7eebSQuentin Perret }
1054717a7eebSQuentin Perret
10559ed24f4bSMarc Zyngier /**
10569ed24f4bSMarc Zyngier * kvm_phys_addr_ioremap - map a device range to guest IPA
10579ed24f4bSMarc Zyngier *
10589ed24f4bSMarc Zyngier * @kvm: The KVM pointer
10599ed24f4bSMarc Zyngier * @guest_ipa: The IPA at which to insert the mapping
10609ed24f4bSMarc Zyngier * @pa: The physical address of the device
10619ed24f4bSMarc Zyngier * @size: The size of the mapping
1062c9c0279cSXiaofei Tan * @writable: Whether or not to create a writable mapping
10639ed24f4bSMarc Zyngier */
kvm_phys_addr_ioremap(struct kvm * kvm,phys_addr_t guest_ipa,phys_addr_t pa,unsigned long size,bool writable)10649ed24f4bSMarc Zyngier int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
10659ed24f4bSMarc Zyngier phys_addr_t pa, unsigned long size, bool writable)
10669ed24f4bSMarc Zyngier {
106702bbd374SWill Deacon phys_addr_t addr;
10689ed24f4bSMarc Zyngier int ret = 0;
1069837f66c7SDavid Matlack struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
107002bbd374SWill Deacon struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
107102bbd374SWill Deacon enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
107202bbd374SWill Deacon KVM_PGTABLE_PROT_R |
107302bbd374SWill Deacon (writable ? KVM_PGTABLE_PROT_W : 0);
10749ed24f4bSMarc Zyngier
1075bff01cb6SQuentin Perret if (is_protected_kvm_enabled())
1076bff01cb6SQuentin Perret return -EPERM;
1077bff01cb6SQuentin Perret
107802bbd374SWill Deacon size += offset_in_page(guest_ipa);
107902bbd374SWill Deacon guest_ipa &= PAGE_MASK;
10809ed24f4bSMarc Zyngier
108102bbd374SWill Deacon for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
1082c1a33aebSSean Christopherson ret = kvm_mmu_topup_memory_cache(&cache,
108361ffb3a5SSean Christopherson kvm_mmu_cache_min_pages(kvm));
10849ed24f4bSMarc Zyngier if (ret)
108502bbd374SWill Deacon break;
108602bbd374SWill Deacon
1087fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
108802bbd374SWill Deacon ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
10891577cb58SOliver Upton &cache, 0);
1090fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
10919ed24f4bSMarc Zyngier if (ret)
109202bbd374SWill Deacon break;
10939ed24f4bSMarc Zyngier
109402bbd374SWill Deacon pa += PAGE_SIZE;
10959ed24f4bSMarc Zyngier }
10969ed24f4bSMarc Zyngier
1097c1a33aebSSean Christopherson kvm_mmu_free_memory_cache(&cache);
10989ed24f4bSMarc Zyngier return ret;
10999ed24f4bSMarc Zyngier }
11009ed24f4bSMarc Zyngier
11019ed24f4bSMarc Zyngier /**
11029ed24f4bSMarc Zyngier * stage2_wp_range() - write protect stage2 memory region range
1103c9c0279cSXiaofei Tan * @mmu: The KVM stage-2 MMU pointer
11049ed24f4bSMarc Zyngier * @addr: Start address of range
11059ed24f4bSMarc Zyngier * @end: End address of range
11069ed24f4bSMarc Zyngier */
stage2_wp_range(struct kvm_s2_mmu * mmu,phys_addr_t addr,phys_addr_t end)1107a0e50aa3SChristoffer Dall static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
11089ed24f4bSMarc Zyngier {
11098531bd63SMarc Zyngier stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
11109ed24f4bSMarc Zyngier }
11119ed24f4bSMarc Zyngier
11129ed24f4bSMarc Zyngier /**
11139ed24f4bSMarc Zyngier * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
11149ed24f4bSMarc Zyngier * @kvm: The KVM pointer
11159ed24f4bSMarc Zyngier * @slot: The memory slot to write protect
11169ed24f4bSMarc Zyngier *
11179ed24f4bSMarc Zyngier * Called to start logging dirty pages after memory region
11189ed24f4bSMarc Zyngier * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
11199ed24f4bSMarc Zyngier * all present PUD, PMD and PTEs are write protected in the memory region.
11209ed24f4bSMarc Zyngier * Afterwards read of dirty page log can be called.
11219ed24f4bSMarc Zyngier *
11229ed24f4bSMarc Zyngier * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
11239ed24f4bSMarc Zyngier * serializing operations for VM memory regions.
11249ed24f4bSMarc Zyngier */
kvm_mmu_wp_memory_region(struct kvm * kvm,int slot)1125eab62148SGavin Shan static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
11269ed24f4bSMarc Zyngier {
11279ed24f4bSMarc Zyngier struct kvm_memslots *slots = kvm_memslots(kvm);
11289ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
11299ed24f4bSMarc Zyngier phys_addr_t start, end;
11309ed24f4bSMarc Zyngier
11319ed24f4bSMarc Zyngier if (WARN_ON_ONCE(!memslot))
11329ed24f4bSMarc Zyngier return;
11339ed24f4bSMarc Zyngier
11349ed24f4bSMarc Zyngier start = memslot->base_gfn << PAGE_SHIFT;
11359ed24f4bSMarc Zyngier end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
11369ed24f4bSMarc Zyngier
1137fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
1138a0e50aa3SChristoffer Dall stage2_wp_range(&kvm->arch.mmu, start, end);
1139fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
11403756b6f2SRaghavendra Rao Ananta kvm_flush_remote_tlbs_memslot(kvm, memslot);
11419ed24f4bSMarc Zyngier }
11429ed24f4bSMarc Zyngier
11439ed24f4bSMarc Zyngier /**
1144e7bf7a49SRicardo Koller * kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE
1145e7bf7a49SRicardo Koller * pages for memory slot
1146e7bf7a49SRicardo Koller * @kvm: The KVM pointer
1147e7bf7a49SRicardo Koller * @slot: The memory slot to split
1148e7bf7a49SRicardo Koller *
1149e7bf7a49SRicardo Koller * Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired,
1150e7bf7a49SRicardo Koller * serializing operations for VM memory regions.
1151e7bf7a49SRicardo Koller */
kvm_mmu_split_memory_region(struct kvm * kvm,int slot)1152e7bf7a49SRicardo Koller static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot)
1153e7bf7a49SRicardo Koller {
1154e7bf7a49SRicardo Koller struct kvm_memslots *slots;
1155e7bf7a49SRicardo Koller struct kvm_memory_slot *memslot;
1156e7bf7a49SRicardo Koller phys_addr_t start, end;
1157e7bf7a49SRicardo Koller
1158e7bf7a49SRicardo Koller lockdep_assert_held(&kvm->slots_lock);
1159e7bf7a49SRicardo Koller
1160e7bf7a49SRicardo Koller slots = kvm_memslots(kvm);
1161e7bf7a49SRicardo Koller memslot = id_to_memslot(slots, slot);
1162e7bf7a49SRicardo Koller
1163e7bf7a49SRicardo Koller start = memslot->base_gfn << PAGE_SHIFT;
1164e7bf7a49SRicardo Koller end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1165e7bf7a49SRicardo Koller
1166e7bf7a49SRicardo Koller write_lock(&kvm->mmu_lock);
1167e7bf7a49SRicardo Koller kvm_mmu_split_huge_pages(kvm, start, end);
1168e7bf7a49SRicardo Koller write_unlock(&kvm->mmu_lock);
1169e7bf7a49SRicardo Koller }
1170e7bf7a49SRicardo Koller
11719ed24f4bSMarc Zyngier /*
11723005f6f2SRicardo Koller * kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages.
11733005f6f2SRicardo Koller * @kvm: The KVM pointer
11743005f6f2SRicardo Koller * @slot: The memory slot associated with mask
11753005f6f2SRicardo Koller * @gfn_offset: The gfn offset in memory slot
11763005f6f2SRicardo Koller * @mask: The mask of pages at offset 'gfn_offset' in this memory
11773005f6f2SRicardo Koller * slot to enable dirty logging on
11789ed24f4bSMarc Zyngier *
11796acf5166SRicardo Koller * Writes protect selected pages to enable dirty logging, and then
11806acf5166SRicardo Koller * splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock.
11819ed24f4bSMarc Zyngier */
kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm * kvm,struct kvm_memory_slot * slot,gfn_t gfn_offset,unsigned long mask)11829ed24f4bSMarc Zyngier void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
11839ed24f4bSMarc Zyngier struct kvm_memory_slot *slot,
11849ed24f4bSMarc Zyngier gfn_t gfn_offset, unsigned long mask)
11859ed24f4bSMarc Zyngier {
11863005f6f2SRicardo Koller phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
11873005f6f2SRicardo Koller phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
11883005f6f2SRicardo Koller phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
11893005f6f2SRicardo Koller
11903005f6f2SRicardo Koller lockdep_assert_held_write(&kvm->mmu_lock);
11913005f6f2SRicardo Koller
11923005f6f2SRicardo Koller stage2_wp_range(&kvm->arch.mmu, start, end);
11936acf5166SRicardo Koller
11946acf5166SRicardo Koller /*
11956acf5166SRicardo Koller * Eager-splitting is done when manual-protect is set. We
11966acf5166SRicardo Koller * also check for initially-all-set because we can avoid
11976acf5166SRicardo Koller * eager-splitting if initially-all-set is false.
11986acf5166SRicardo Koller * Initially-all-set equal false implies that huge-pages were
11996acf5166SRicardo Koller * already split when enabling dirty logging: no need to do it
12006acf5166SRicardo Koller * again.
12016acf5166SRicardo Koller */
12026acf5166SRicardo Koller if (kvm_dirty_log_manual_protect_and_init_set(kvm))
12036acf5166SRicardo Koller kvm_mmu_split_huge_pages(kvm, start, end);
12049ed24f4bSMarc Zyngier }
12059ed24f4bSMarc Zyngier
kvm_send_hwpoison_signal(unsigned long address,short lsb)12069ed24f4bSMarc Zyngier static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
12079ed24f4bSMarc Zyngier {
12089ed24f4bSMarc Zyngier send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
12099ed24f4bSMarc Zyngier }
12109ed24f4bSMarc Zyngier
fault_supports_stage2_huge_mapping(struct kvm_memory_slot * memslot,unsigned long hva,unsigned long map_size)12119ed24f4bSMarc Zyngier static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
12129ed24f4bSMarc Zyngier unsigned long hva,
12139ed24f4bSMarc Zyngier unsigned long map_size)
12149ed24f4bSMarc Zyngier {
12159ed24f4bSMarc Zyngier gpa_t gpa_start;
12169ed24f4bSMarc Zyngier hva_t uaddr_start, uaddr_end;
12179ed24f4bSMarc Zyngier size_t size;
12189ed24f4bSMarc Zyngier
12199f283614SSuzuki K Poulose /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
12209f283614SSuzuki K Poulose if (map_size == PAGE_SIZE)
12219f283614SSuzuki K Poulose return true;
12229f283614SSuzuki K Poulose
12239ed24f4bSMarc Zyngier size = memslot->npages * PAGE_SIZE;
12249ed24f4bSMarc Zyngier
12259ed24f4bSMarc Zyngier gpa_start = memslot->base_gfn << PAGE_SHIFT;
12269ed24f4bSMarc Zyngier
12279ed24f4bSMarc Zyngier uaddr_start = memslot->userspace_addr;
12289ed24f4bSMarc Zyngier uaddr_end = uaddr_start + size;
12299ed24f4bSMarc Zyngier
12309ed24f4bSMarc Zyngier /*
12319ed24f4bSMarc Zyngier * Pages belonging to memslots that don't have the same alignment
12329ed24f4bSMarc Zyngier * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
12339ed24f4bSMarc Zyngier * PMD/PUD entries, because we'll end up mapping the wrong pages.
12349ed24f4bSMarc Zyngier *
12359ed24f4bSMarc Zyngier * Consider a layout like the following:
12369ed24f4bSMarc Zyngier *
12379ed24f4bSMarc Zyngier * memslot->userspace_addr:
12389ed24f4bSMarc Zyngier * +-----+--------------------+--------------------+---+
12399ed24f4bSMarc Zyngier * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
12409ed24f4bSMarc Zyngier * +-----+--------------------+--------------------+---+
12419ed24f4bSMarc Zyngier *
12429f283614SSuzuki K Poulose * memslot->base_gfn << PAGE_SHIFT:
12439ed24f4bSMarc Zyngier * +---+--------------------+--------------------+-----+
12449ed24f4bSMarc Zyngier * |abc|def Stage-2 block | Stage-2 block |tvxyz|
12459ed24f4bSMarc Zyngier * +---+--------------------+--------------------+-----+
12469ed24f4bSMarc Zyngier *
12479ed24f4bSMarc Zyngier * If we create those stage-2 blocks, we'll end up with this incorrect
12489ed24f4bSMarc Zyngier * mapping:
12499ed24f4bSMarc Zyngier * d -> f
12509ed24f4bSMarc Zyngier * e -> g
12519ed24f4bSMarc Zyngier * f -> h
12529ed24f4bSMarc Zyngier */
12539ed24f4bSMarc Zyngier if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
12549ed24f4bSMarc Zyngier return false;
12559ed24f4bSMarc Zyngier
12569ed24f4bSMarc Zyngier /*
12579ed24f4bSMarc Zyngier * Next, let's make sure we're not trying to map anything not covered
12589ed24f4bSMarc Zyngier * by the memslot. This means we have to prohibit block size mappings
12599ed24f4bSMarc Zyngier * for the beginning and end of a non-block aligned and non-block sized
12609ed24f4bSMarc Zyngier * memory slot (illustrated by the head and tail parts of the
12619ed24f4bSMarc Zyngier * userspace view above containing pages 'abcde' and 'xyz',
12629ed24f4bSMarc Zyngier * respectively).
12639ed24f4bSMarc Zyngier *
12649ed24f4bSMarc Zyngier * Note that it doesn't matter if we do the check using the
12659ed24f4bSMarc Zyngier * userspace_addr or the base_gfn, as both are equally aligned (per
12669ed24f4bSMarc Zyngier * the check above) and equally sized.
12679ed24f4bSMarc Zyngier */
12689ed24f4bSMarc Zyngier return (hva & ~(map_size - 1)) >= uaddr_start &&
12699ed24f4bSMarc Zyngier (hva & ~(map_size - 1)) + map_size <= uaddr_end;
12709ed24f4bSMarc Zyngier }
12719ed24f4bSMarc Zyngier
12720529c902SSuzuki K Poulose /*
12730529c902SSuzuki K Poulose * Check if the given hva is backed by a transparent huge page (THP) and
12740529c902SSuzuki K Poulose * whether it can be mapped using block mapping in stage2. If so, adjust
12750529c902SSuzuki K Poulose * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
12760529c902SSuzuki K Poulose * supported. This will need to be updated to support other THP sizes.
12770529c902SSuzuki K Poulose *
12780529c902SSuzuki K Poulose * Returns the size of the mapping.
12790529c902SSuzuki K Poulose */
1280e86fc1a3SMarc Zyngier static long
transparent_hugepage_adjust(struct kvm * kvm,struct kvm_memory_slot * memslot,unsigned long hva,kvm_pfn_t * pfnp,phys_addr_t * ipap)12816011cf68SMarc Zyngier transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
12820529c902SSuzuki K Poulose unsigned long hva, kvm_pfn_t *pfnp,
12830529c902SSuzuki K Poulose phys_addr_t *ipap)
12840529c902SSuzuki K Poulose {
12850529c902SSuzuki K Poulose kvm_pfn_t pfn = *pfnp;
12860529c902SSuzuki K Poulose
12870529c902SSuzuki K Poulose /*
12880529c902SSuzuki K Poulose * Make sure the adjustment is done only for THP pages. Also make
12890529c902SSuzuki K Poulose * sure that the HVA and IPA are sufficiently aligned and that the
12900529c902SSuzuki K Poulose * block map is contained within the memslot.
12910529c902SSuzuki K Poulose */
1292e86fc1a3SMarc Zyngier if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
1293e86fc1a3SMarc Zyngier int sz = get_user_mapping_size(kvm, hva);
1294e86fc1a3SMarc Zyngier
1295e86fc1a3SMarc Zyngier if (sz < 0)
1296e86fc1a3SMarc Zyngier return sz;
1297e86fc1a3SMarc Zyngier
1298e86fc1a3SMarc Zyngier if (sz < PMD_SIZE)
1299e86fc1a3SMarc Zyngier return PAGE_SIZE;
1300e86fc1a3SMarc Zyngier
13010529c902SSuzuki K Poulose /*
13020529c902SSuzuki K Poulose * The address we faulted on is backed by a transparent huge
13030529c902SSuzuki K Poulose * page. However, because we map the compound huge page and
13040529c902SSuzuki K Poulose * not the individual tail page, we need to transfer the
13050529c902SSuzuki K Poulose * refcount to the head page. We have to be careful that the
13060529c902SSuzuki K Poulose * THP doesn't start to split while we are adjusting the
13070529c902SSuzuki K Poulose * refcounts.
13080529c902SSuzuki K Poulose *
130920ec3ebdSChao Peng * We are sure this doesn't happen, because mmu_invalidate_retry
13100529c902SSuzuki K Poulose * was successful and we are holding the mmu_lock, so if this
13110529c902SSuzuki K Poulose * THP is trying to split, it will be blocked in the mmu
13120529c902SSuzuki K Poulose * notifier before touching any of the pages, specifically
13130529c902SSuzuki K Poulose * before being able to call __split_huge_page_refcount().
13140529c902SSuzuki K Poulose *
13150529c902SSuzuki K Poulose * We can therefore safely transfer the refcount from PG_tail
13160529c902SSuzuki K Poulose * to PG_head and switch the pfn from a tail page to the head
13170529c902SSuzuki K Poulose * page accordingly.
13180529c902SSuzuki K Poulose */
13190529c902SSuzuki K Poulose *ipap &= PMD_MASK;
13200529c902SSuzuki K Poulose kvm_release_pfn_clean(pfn);
13210529c902SSuzuki K Poulose pfn &= ~(PTRS_PER_PMD - 1);
13220fe49630SMarc Zyngier get_page(pfn_to_page(pfn));
13230529c902SSuzuki K Poulose *pfnp = pfn;
13240529c902SSuzuki K Poulose
13250529c902SSuzuki K Poulose return PMD_SIZE;
13260529c902SSuzuki K Poulose }
13270529c902SSuzuki K Poulose
13280529c902SSuzuki K Poulose /* Use page mapping if we cannot use block mapping. */
13290529c902SSuzuki K Poulose return PAGE_SIZE;
13300529c902SSuzuki K Poulose }
13310529c902SSuzuki K Poulose
get_vma_page_shift(struct vm_area_struct * vma,unsigned long hva)13322aa53d68SKeqian Zhu static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
13332aa53d68SKeqian Zhu {
13342aa53d68SKeqian Zhu unsigned long pa;
13352aa53d68SKeqian Zhu
13362aa53d68SKeqian Zhu if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
13372aa53d68SKeqian Zhu return huge_page_shift(hstate_vma(vma));
13382aa53d68SKeqian Zhu
13392aa53d68SKeqian Zhu if (!(vma->vm_flags & VM_PFNMAP))
13402aa53d68SKeqian Zhu return PAGE_SHIFT;
13412aa53d68SKeqian Zhu
13422aa53d68SKeqian Zhu VM_BUG_ON(is_vm_hugetlb_page(vma));
13432aa53d68SKeqian Zhu
13442aa53d68SKeqian Zhu pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
13452aa53d68SKeqian Zhu
13462aa53d68SKeqian Zhu #ifndef __PAGETABLE_PMD_FOLDED
13472aa53d68SKeqian Zhu if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
13482aa53d68SKeqian Zhu ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
13492aa53d68SKeqian Zhu ALIGN(hva, PUD_SIZE) <= vma->vm_end)
13502aa53d68SKeqian Zhu return PUD_SHIFT;
13512aa53d68SKeqian Zhu #endif
13522aa53d68SKeqian Zhu
13532aa53d68SKeqian Zhu if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
13542aa53d68SKeqian Zhu ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
13552aa53d68SKeqian Zhu ALIGN(hva, PMD_SIZE) <= vma->vm_end)
13562aa53d68SKeqian Zhu return PMD_SHIFT;
13572aa53d68SKeqian Zhu
13582aa53d68SKeqian Zhu return PAGE_SHIFT;
13592aa53d68SKeqian Zhu }
13602aa53d68SKeqian Zhu
1361ea7fc1bbSSteven Price /*
1362ea7fc1bbSSteven Price * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
1363ea7fc1bbSSteven Price * able to see the page's tags and therefore they must be initialised first. If
1364ea7fc1bbSSteven Price * PG_mte_tagged is set, tags have already been initialised.
1365ea7fc1bbSSteven Price *
1366ea7fc1bbSSteven Price * The race in the test/set of the PG_mte_tagged flag is handled by:
1367ea7fc1bbSSteven Price * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
1368ea7fc1bbSSteven Price * racing to santise the same page
1369ea7fc1bbSSteven Price * - mmap_lock protects between a VM faulting a page in and the VMM performing
1370ea7fc1bbSSteven Price * an mprotect() to add VM_MTE
1371ea7fc1bbSSteven Price */
sanitise_mte_tags(struct kvm * kvm,kvm_pfn_t pfn,unsigned long size)13722dbf12aeSCatalin Marinas static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
1373ea7fc1bbSSteven Price unsigned long size)
1374ea7fc1bbSSteven Price {
1375ea7fc1bbSSteven Price unsigned long i, nr_pages = size >> PAGE_SHIFT;
13762dbf12aeSCatalin Marinas struct page *page = pfn_to_page(pfn);
1377ea7fc1bbSSteven Price
1378ea7fc1bbSSteven Price if (!kvm_has_mte(kvm))
13792dbf12aeSCatalin Marinas return;
1380ea7fc1bbSSteven Price
1381ea7fc1bbSSteven Price for (i = 0; i < nr_pages; i++, page++) {
1382d77e59a8SCatalin Marinas if (try_page_mte_tagging(page)) {
1383ea7fc1bbSSteven Price mte_clear_page_tags(page_address(page));
1384e059853dSCatalin Marinas set_page_mte_tagged(page);
1385ea7fc1bbSSteven Price }
1386ea7fc1bbSSteven Price }
1387ea7fc1bbSSteven Price }
1388ea7fc1bbSSteven Price
kvm_vma_mte_allowed(struct vm_area_struct * vma)1389d89585fbSPeter Collingbourne static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
1390d89585fbSPeter Collingbourne {
1391d89585fbSPeter Collingbourne return vma->vm_flags & VM_MTE_ALLOWED;
1392ea7fc1bbSSteven Price }
1393ea7fc1bbSSteven Price
user_mem_abort(struct kvm_vcpu * vcpu,phys_addr_t fault_ipa,struct kvm_memory_slot * memslot,unsigned long hva,unsigned long fault_status)13949ed24f4bSMarc Zyngier static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
13959ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot, unsigned long hva,
13969ed24f4bSMarc Zyngier unsigned long fault_status)
13979ed24f4bSMarc Zyngier {
1398ffd1b63aSWill Deacon int ret = 0;
13999ed24f4bSMarc Zyngier bool write_fault, writable, force_pte = false;
14008c2e8ac8SMarc Zyngier bool exec_fault, mte_allowed;
14016f745f1bSWill Deacon bool device = false;
14029ed24f4bSMarc Zyngier unsigned long mmu_seq;
14039ed24f4bSMarc Zyngier struct kvm *kvm = vcpu->kvm;
14049ed24f4bSMarc Zyngier struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
14059ed24f4bSMarc Zyngier struct vm_area_struct *vma;
14069ed24f4bSMarc Zyngier short vma_shift;
14076f745f1bSWill Deacon gfn_t gfn;
14089ed24f4bSMarc Zyngier kvm_pfn_t pfn;
14099ed24f4bSMarc Zyngier bool logging_active = memslot_is_logging(memslot);
14107d894834SYanan Wang unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
1411e86fc1a3SMarc Zyngier long vma_pagesize, fault_granule;
14126f745f1bSWill Deacon enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
14136f745f1bSWill Deacon struct kvm_pgtable *pgt;
14149ed24f4bSMarc Zyngier
14157d894834SYanan Wang fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
14169ed24f4bSMarc Zyngier write_fault = kvm_is_write_fault(vcpu);
1417c4ad98e4SMarc Zyngier exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
14189ed24f4bSMarc Zyngier VM_BUG_ON(write_fault && exec_fault);
14199ed24f4bSMarc Zyngier
1420b0803ba7SMarc Zyngier if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
14219ed24f4bSMarc Zyngier kvm_err("Unexpected L2 read permission error\n");
14229ed24f4bSMarc Zyngier return -EFAULT;
14239ed24f4bSMarc Zyngier }
14249ed24f4bSMarc Zyngier
14252aa53d68SKeqian Zhu /*
142613ec9308SDavid Matlack * Permission faults just need to update the existing leaf entry,
142713ec9308SDavid Matlack * and so normally don't require allocations from the memcache. The
142813ec9308SDavid Matlack * only exception to this is when dirty logging is enabled at runtime
142913ec9308SDavid Matlack * and a write fault needs to collapse a block entry into a table.
143013ec9308SDavid Matlack */
143113ec9308SDavid Matlack if (fault_status != ESR_ELx_FSC_PERM ||
143213ec9308SDavid Matlack (logging_active && write_fault)) {
143313ec9308SDavid Matlack ret = kvm_mmu_topup_memory_cache(memcache,
143413ec9308SDavid Matlack kvm_mmu_cache_min_pages(kvm));
143513ec9308SDavid Matlack if (ret)
143613ec9308SDavid Matlack return ret;
143713ec9308SDavid Matlack }
143813ec9308SDavid Matlack
143913ec9308SDavid Matlack /*
14402aa53d68SKeqian Zhu * Let's check if we will get back a huge page backed by hugetlbfs, or
14412aa53d68SKeqian Zhu * get block mapping for device MMIO region.
14422aa53d68SKeqian Zhu */
144389154dd5SMichel Lespinasse mmap_read_lock(current->mm);
144409eef83aSLiam Howlett vma = vma_lookup(current->mm, hva);
14459ed24f4bSMarc Zyngier if (unlikely(!vma)) {
14469ed24f4bSMarc Zyngier kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
144789154dd5SMichel Lespinasse mmap_read_unlock(current->mm);
14489ed24f4bSMarc Zyngier return -EFAULT;
14499ed24f4bSMarc Zyngier }
14509ed24f4bSMarc Zyngier
14512aa53d68SKeqian Zhu /*
14522aa53d68SKeqian Zhu * logging_active is guaranteed to never be true for VM_PFNMAP
14532aa53d68SKeqian Zhu * memslots.
14542aa53d68SKeqian Zhu */
14552aa53d68SKeqian Zhu if (logging_active) {
14569ed24f4bSMarc Zyngier force_pte = true;
1457523b3999SAlexandru Elisei vma_shift = PAGE_SHIFT;
14582aa53d68SKeqian Zhu } else {
14592aa53d68SKeqian Zhu vma_shift = get_vma_page_shift(vma, hva);
14609ed24f4bSMarc Zyngier }
14619ed24f4bSMarc Zyngier
14622f40c460SGavin Shan switch (vma_shift) {
1463faf00039SGavin Shan #ifndef __PAGETABLE_PMD_FOLDED
14642f40c460SGavin Shan case PUD_SHIFT:
14652f40c460SGavin Shan if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
14662f40c460SGavin Shan break;
14672f40c460SGavin Shan fallthrough;
1468faf00039SGavin Shan #endif
14692f40c460SGavin Shan case CONT_PMD_SHIFT:
1470523b3999SAlexandru Elisei vma_shift = PMD_SHIFT;
14712f40c460SGavin Shan fallthrough;
14722f40c460SGavin Shan case PMD_SHIFT:
14732f40c460SGavin Shan if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE))
14742f40c460SGavin Shan break;
14752f40c460SGavin Shan fallthrough;
14762f40c460SGavin Shan case CONT_PTE_SHIFT:
1477523b3999SAlexandru Elisei vma_shift = PAGE_SHIFT;
14782f40c460SGavin Shan force_pte = true;
14792f40c460SGavin Shan fallthrough;
14802f40c460SGavin Shan case PAGE_SHIFT:
14812f40c460SGavin Shan break;
14822f40c460SGavin Shan default:
14832f40c460SGavin Shan WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);
1484523b3999SAlexandru Elisei }
1485523b3999SAlexandru Elisei
1486523b3999SAlexandru Elisei vma_pagesize = 1UL << vma_shift;
14876f745f1bSWill Deacon if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
1488523b3999SAlexandru Elisei fault_ipa &= ~(vma_pagesize - 1);
14896f745f1bSWill Deacon
14906f745f1bSWill Deacon gfn = fault_ipa >> PAGE_SHIFT;
14918c2e8ac8SMarc Zyngier mte_allowed = kvm_vma_mte_allowed(vma);
14928c2e8ac8SMarc Zyngier
14938c2e8ac8SMarc Zyngier /* Don't use the VMA after the unlock -- it may have vanished */
14948c2e8ac8SMarc Zyngier vma = NULL;
14959ed24f4bSMarc Zyngier
14966f745f1bSWill Deacon /*
149713ec9308SDavid Matlack * Read mmu_invalidate_seq so that KVM can detect if the results of
149813ec9308SDavid Matlack * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
149913ec9308SDavid Matlack * acquiring kvm->mmu_lock.
150010ba2d17SGavin Shan *
150113ec9308SDavid Matlack * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
150213ec9308SDavid Matlack * with the smp_wmb() in kvm_mmu_invalidate_end().
15039ed24f4bSMarc Zyngier */
150413ec9308SDavid Matlack mmu_seq = vcpu->kvm->mmu_invalidate_seq;
150513ec9308SDavid Matlack mmap_read_unlock(current->mm);
15069ed24f4bSMarc Zyngier
1507c8b88b33SPeter Xu pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
150810ba2d17SGavin Shan write_fault, &writable, NULL);
15099ed24f4bSMarc Zyngier if (pfn == KVM_PFN_ERR_HWPOISON) {
15109ed24f4bSMarc Zyngier kvm_send_hwpoison_signal(hva, vma_shift);
15119ed24f4bSMarc Zyngier return 0;
15129ed24f4bSMarc Zyngier }
15139ed24f4bSMarc Zyngier if (is_error_noslot_pfn(pfn))
15149ed24f4bSMarc Zyngier return -EFAULT;
15159ed24f4bSMarc Zyngier
15169ed24f4bSMarc Zyngier if (kvm_is_device_pfn(pfn)) {
15172aa53d68SKeqian Zhu /*
15182aa53d68SKeqian Zhu * If the page was identified as device early by looking at
15192aa53d68SKeqian Zhu * the VMA flags, vma_pagesize is already representing the
15202aa53d68SKeqian Zhu * largest quantity we can map. If instead it was mapped
15212aa53d68SKeqian Zhu * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
15222aa53d68SKeqian Zhu * and must not be upgraded.
15232aa53d68SKeqian Zhu *
15242aa53d68SKeqian Zhu * In both cases, we don't let transparent_hugepage_adjust()
15252aa53d68SKeqian Zhu * change things at the last minute.
15262aa53d68SKeqian Zhu */
15276f745f1bSWill Deacon device = true;
15286f745f1bSWill Deacon } else if (logging_active && !write_fault) {
15299ed24f4bSMarc Zyngier /*
15309ed24f4bSMarc Zyngier * Only actually map the page as writable if this was a write
15319ed24f4bSMarc Zyngier * fault.
15329ed24f4bSMarc Zyngier */
15339ed24f4bSMarc Zyngier writable = false;
15349ed24f4bSMarc Zyngier }
15359ed24f4bSMarc Zyngier
15366f745f1bSWill Deacon if (exec_fault && device)
15379ed24f4bSMarc Zyngier return -ENOEXEC;
15389ed24f4bSMarc Zyngier
1539f783ef1cSJing Zhang read_lock(&kvm->mmu_lock);
15406f745f1bSWill Deacon pgt = vcpu->arch.hw_mmu->pgt;
154120ec3ebdSChao Peng if (mmu_invalidate_retry(kvm, mmu_seq))
15429ed24f4bSMarc Zyngier goto out_unlock;
15439ed24f4bSMarc Zyngier
15449ed24f4bSMarc Zyngier /*
15450529c902SSuzuki K Poulose * If we are not forced to use page mapping, check if we are
15460529c902SSuzuki K Poulose * backed by a THP and thus use block mapping if possible.
15479ed24f4bSMarc Zyngier */
1548f2cc3273SMarc Zyngier if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
1549b0803ba7SMarc Zyngier if (fault_status == ESR_ELx_FSC_PERM &&
1550b0803ba7SMarc Zyngier fault_granule > PAGE_SIZE)
1551f2cc3273SMarc Zyngier vma_pagesize = fault_granule;
1552f2cc3273SMarc Zyngier else
1553f2cc3273SMarc Zyngier vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
1554f2cc3273SMarc Zyngier hva, &pfn,
1555f2cc3273SMarc Zyngier &fault_ipa);
1556e86fc1a3SMarc Zyngier
1557e86fc1a3SMarc Zyngier if (vma_pagesize < 0) {
1558e86fc1a3SMarc Zyngier ret = vma_pagesize;
1559e86fc1a3SMarc Zyngier goto out_unlock;
1560e86fc1a3SMarc Zyngier }
1561f2cc3273SMarc Zyngier }
15629f03db66SMarc Zyngier
1563b0803ba7SMarc Zyngier if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
1564d89585fbSPeter Collingbourne /* Check the VMM hasn't introduced a new disallowed VMA */
15658c2e8ac8SMarc Zyngier if (mte_allowed) {
15662dbf12aeSCatalin Marinas sanitise_mte_tags(kvm, pfn, vma_pagesize);
15672dbf12aeSCatalin Marinas } else {
15689f03db66SMarc Zyngier ret = -EFAULT;
15699f03db66SMarc Zyngier goto out_unlock;
15709f03db66SMarc Zyngier }
15712dbf12aeSCatalin Marinas }
15729f03db66SMarc Zyngier
1573509552e6SYanan Wang if (writable)
15746f745f1bSWill Deacon prot |= KVM_PGTABLE_PROT_W;
15759ed24f4bSMarc Zyngier
157625aa2869SYanan Wang if (exec_fault)
15776f745f1bSWill Deacon prot |= KVM_PGTABLE_PROT_X;
15786f745f1bSWill Deacon
15796f745f1bSWill Deacon if (device)
15806f745f1bSWill Deacon prot |= KVM_PGTABLE_PROT_DEVICE;
15816f745f1bSWill Deacon else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
15826f745f1bSWill Deacon prot |= KVM_PGTABLE_PROT_X;
15836f745f1bSWill Deacon
15847d894834SYanan Wang /*
15857d894834SYanan Wang * Under the premise of getting a FSC_PERM fault, we just need to relax
15867d894834SYanan Wang * permissions only if vma_pagesize equals fault_granule. Otherwise,
15877d894834SYanan Wang * kvm_pgtable_stage2_map() should be called to change block size.
15887d894834SYanan Wang */
1589b0803ba7SMarc Zyngier if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
15906f745f1bSWill Deacon ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
15911577cb58SOliver Upton else
15926f745f1bSWill Deacon ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
15936f745f1bSWill Deacon __pfn_to_phys(pfn), prot,
1594ddcadb29SOliver Upton memcache,
1595ddcadb29SOliver Upton KVM_PGTABLE_WALK_HANDLE_FAULT |
1596ddcadb29SOliver Upton KVM_PGTABLE_WALK_SHARED);
15979ed24f4bSMarc Zyngier
1598509552e6SYanan Wang /* Mark the page dirty only if the fault is handled successfully */
1599509552e6SYanan Wang if (writable && !ret) {
1600509552e6SYanan Wang kvm_set_pfn_dirty(pfn);
160110ba2d17SGavin Shan mark_page_dirty_in_slot(kvm, memslot, gfn);
1602509552e6SYanan Wang }
1603509552e6SYanan Wang
16049ed24f4bSMarc Zyngier out_unlock:
1605f783ef1cSJing Zhang read_unlock(&kvm->mmu_lock);
16069ed24f4bSMarc Zyngier kvm_release_pfn_clean(pfn);
1607509552e6SYanan Wang return ret != -EAGAIN ? ret : 0;
16089ed24f4bSMarc Zyngier }
16099ed24f4bSMarc Zyngier
1610ee8efad7SWill Deacon /* Resolve the access fault by making the page young again. */
handle_access_fault(struct kvm_vcpu * vcpu,phys_addr_t fault_ipa)16119ed24f4bSMarc Zyngier static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
16129ed24f4bSMarc Zyngier {
16139a7ad19aSOliver Upton kvm_pte_t pte;
1614ee8efad7SWill Deacon struct kvm_s2_mmu *mmu;
16159ed24f4bSMarc Zyngier
16169ed24f4bSMarc Zyngier trace_kvm_access_fault(fault_ipa);
16179ed24f4bSMarc Zyngier
1618fc61f554SOliver Upton read_lock(&vcpu->kvm->mmu_lock);
1619ee8efad7SWill Deacon mmu = vcpu->arch.hw_mmu;
16209a7ad19aSOliver Upton pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
1621fc61f554SOliver Upton read_unlock(&vcpu->kvm->mmu_lock);
1622ee8efad7SWill Deacon
16239a7ad19aSOliver Upton if (kvm_pte_valid(pte))
16249a7ad19aSOliver Upton kvm_set_pfn_accessed(kvm_pte_to_pfn(pte));
16259ed24f4bSMarc Zyngier }
16269ed24f4bSMarc Zyngier
16279ed24f4bSMarc Zyngier /**
16289ed24f4bSMarc Zyngier * kvm_handle_guest_abort - handles all 2nd stage aborts
16299ed24f4bSMarc Zyngier * @vcpu: the VCPU pointer
16309ed24f4bSMarc Zyngier *
16319ed24f4bSMarc Zyngier * Any abort that gets to the host is almost guaranteed to be caused by a
16329ed24f4bSMarc Zyngier * missing second stage translation table entry, which can mean that either the
16339ed24f4bSMarc Zyngier * guest simply needs more memory and we must allocate an appropriate page or it
16349ed24f4bSMarc Zyngier * can mean that the guest tried to access I/O memory, which is emulated by user
16359ed24f4bSMarc Zyngier * space. The distinction is based on the IPA causing the fault and whether this
16369ed24f4bSMarc Zyngier * memory region has been registered as standard RAM by user space.
16379ed24f4bSMarc Zyngier */
kvm_handle_guest_abort(struct kvm_vcpu * vcpu)163874cc7e0cSTianjia Zhang int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
16399ed24f4bSMarc Zyngier {
16409ed24f4bSMarc Zyngier unsigned long fault_status;
16419ed24f4bSMarc Zyngier phys_addr_t fault_ipa;
16429ed24f4bSMarc Zyngier struct kvm_memory_slot *memslot;
16439ed24f4bSMarc Zyngier unsigned long hva;
16449ed24f4bSMarc Zyngier bool is_iabt, write_fault, writable;
16459ed24f4bSMarc Zyngier gfn_t gfn;
16469ed24f4bSMarc Zyngier int ret, idx;
16479ed24f4bSMarc Zyngier
16489ed24f4bSMarc Zyngier fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
16499ed24f4bSMarc Zyngier
16509ed24f4bSMarc Zyngier fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
16519ed24f4bSMarc Zyngier is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
16529ed24f4bSMarc Zyngier
1653b0803ba7SMarc Zyngier if (fault_status == ESR_ELx_FSC_FAULT) {
165485ea6b1eSMarc Zyngier /* Beyond sanitised PARange (which is the IPA limit) */
165585ea6b1eSMarc Zyngier if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
165685ea6b1eSMarc Zyngier kvm_inject_size_fault(vcpu);
165785ea6b1eSMarc Zyngier return 1;
165885ea6b1eSMarc Zyngier }
165985ea6b1eSMarc Zyngier
166085ea6b1eSMarc Zyngier /* Falls between the IPA range and the PARange? */
166185ea6b1eSMarc Zyngier if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
166285ea6b1eSMarc Zyngier fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
166385ea6b1eSMarc Zyngier
166485ea6b1eSMarc Zyngier if (is_iabt)
166585ea6b1eSMarc Zyngier kvm_inject_pabt(vcpu, fault_ipa);
166685ea6b1eSMarc Zyngier else
166785ea6b1eSMarc Zyngier kvm_inject_dabt(vcpu, fault_ipa);
166885ea6b1eSMarc Zyngier return 1;
166985ea6b1eSMarc Zyngier }
167085ea6b1eSMarc Zyngier }
167185ea6b1eSMarc Zyngier
16729ed24f4bSMarc Zyngier /* Synchronous External Abort? */
1673c9a636f2SWill Deacon if (kvm_vcpu_abt_issea(vcpu)) {
16749ed24f4bSMarc Zyngier /*
16759ed24f4bSMarc Zyngier * For RAS the host kernel may handle this abort.
16769ed24f4bSMarc Zyngier * There is no need to pass the error into the guest.
16779ed24f4bSMarc Zyngier */
167884b951a8SWill Deacon if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
16799ed24f4bSMarc Zyngier kvm_inject_vabt(vcpu);
168084b951a8SWill Deacon
16819ed24f4bSMarc Zyngier return 1;
16829ed24f4bSMarc Zyngier }
16839ed24f4bSMarc Zyngier
16843a949f4cSGavin Shan trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
16859ed24f4bSMarc Zyngier kvm_vcpu_get_hfar(vcpu), fault_ipa);
16869ed24f4bSMarc Zyngier
16879ed24f4bSMarc Zyngier /* Check the stage-2 fault is trans. fault or write fault */
1688b0803ba7SMarc Zyngier if (fault_status != ESR_ELx_FSC_FAULT &&
1689b0803ba7SMarc Zyngier fault_status != ESR_ELx_FSC_PERM &&
1690b0803ba7SMarc Zyngier fault_status != ESR_ELx_FSC_ACCESS) {
16919ed24f4bSMarc Zyngier kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
16929ed24f4bSMarc Zyngier kvm_vcpu_trap_get_class(vcpu),
16939ed24f4bSMarc Zyngier (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
16943a949f4cSGavin Shan (unsigned long)kvm_vcpu_get_esr(vcpu));
16959ed24f4bSMarc Zyngier return -EFAULT;
16969ed24f4bSMarc Zyngier }
16979ed24f4bSMarc Zyngier
16989ed24f4bSMarc Zyngier idx = srcu_read_lock(&vcpu->kvm->srcu);
16999ed24f4bSMarc Zyngier
17009ed24f4bSMarc Zyngier gfn = fault_ipa >> PAGE_SHIFT;
17019ed24f4bSMarc Zyngier memslot = gfn_to_memslot(vcpu->kvm, gfn);
17029ed24f4bSMarc Zyngier hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
17039ed24f4bSMarc Zyngier write_fault = kvm_is_write_fault(vcpu);
17049ed24f4bSMarc Zyngier if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1705022c8328SWill Deacon /*
1706022c8328SWill Deacon * The guest has put either its instructions or its page-tables
1707022c8328SWill Deacon * somewhere it shouldn't have. Userspace won't be able to do
1708022c8328SWill Deacon * anything about this (there's no syndrome for a start), so
1709022c8328SWill Deacon * re-inject the abort back into the guest.
1710022c8328SWill Deacon */
17119ed24f4bSMarc Zyngier if (is_iabt) {
17129ed24f4bSMarc Zyngier ret = -ENOEXEC;
17139ed24f4bSMarc Zyngier goto out;
17149ed24f4bSMarc Zyngier }
17159ed24f4bSMarc Zyngier
1716c4ad98e4SMarc Zyngier if (kvm_vcpu_abt_iss1tw(vcpu)) {
1717022c8328SWill Deacon kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1718022c8328SWill Deacon ret = 1;
1719022c8328SWill Deacon goto out_unlock;
1720022c8328SWill Deacon }
1721022c8328SWill Deacon
17229ed24f4bSMarc Zyngier /*
17239ed24f4bSMarc Zyngier * Check for a cache maintenance operation. Since we
17249ed24f4bSMarc Zyngier * ended-up here, we know it is outside of any memory
17259ed24f4bSMarc Zyngier * slot. But we can't find out if that is for a device,
17269ed24f4bSMarc Zyngier * or if the guest is just being stupid. The only thing
17279ed24f4bSMarc Zyngier * we know for sure is that this range cannot be cached.
17289ed24f4bSMarc Zyngier *
17299ed24f4bSMarc Zyngier * So let's assume that the guest is just being
17309ed24f4bSMarc Zyngier * cautious, and skip the instruction.
17319ed24f4bSMarc Zyngier */
173254dc0d24SWill Deacon if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
1733cdb5e02eSMarc Zyngier kvm_incr_pc(vcpu);
17349ed24f4bSMarc Zyngier ret = 1;
17359ed24f4bSMarc Zyngier goto out_unlock;
17369ed24f4bSMarc Zyngier }
17379ed24f4bSMarc Zyngier
17389ed24f4bSMarc Zyngier /*
17399ed24f4bSMarc Zyngier * The IPA is reported as [MAX:12], so we need to
17409ed24f4bSMarc Zyngier * complement it with the bottom 12 bits from the
17419ed24f4bSMarc Zyngier * faulting VA. This is always 12 bits, irrespective
17429ed24f4bSMarc Zyngier * of the page size.
17439ed24f4bSMarc Zyngier */
17449ed24f4bSMarc Zyngier fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
174574cc7e0cSTianjia Zhang ret = io_mem_abort(vcpu, fault_ipa);
17469ed24f4bSMarc Zyngier goto out_unlock;
17479ed24f4bSMarc Zyngier }
17489ed24f4bSMarc Zyngier
17499ed24f4bSMarc Zyngier /* Userspace should not be able to register out-of-bounds IPAs */
17509ed24f4bSMarc Zyngier VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
17519ed24f4bSMarc Zyngier
1752b0803ba7SMarc Zyngier if (fault_status == ESR_ELx_FSC_ACCESS) {
17539ed24f4bSMarc Zyngier handle_access_fault(vcpu, fault_ipa);
17549ed24f4bSMarc Zyngier ret = 1;
17559ed24f4bSMarc Zyngier goto out_unlock;
17569ed24f4bSMarc Zyngier }
17579ed24f4bSMarc Zyngier
17589ed24f4bSMarc Zyngier ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
17599ed24f4bSMarc Zyngier if (ret == 0)
17609ed24f4bSMarc Zyngier ret = 1;
17619ed24f4bSMarc Zyngier out:
17629ed24f4bSMarc Zyngier if (ret == -ENOEXEC) {
17639ed24f4bSMarc Zyngier kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
17649ed24f4bSMarc Zyngier ret = 1;
17659ed24f4bSMarc Zyngier }
17669ed24f4bSMarc Zyngier out_unlock:
17679ed24f4bSMarc Zyngier srcu_read_unlock(&vcpu->kvm->srcu, idx);
17689ed24f4bSMarc Zyngier return ret;
17699ed24f4bSMarc Zyngier }
17709ed24f4bSMarc Zyngier
kvm_unmap_gfn_range(struct kvm * kvm,struct kvm_gfn_range * range)1771cd4c7183SSean Christopherson bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
17729ed24f4bSMarc Zyngier {
1773063deeb1SWill Deacon if (!kvm->arch.mmu.pgt)
1774fcb82839Skernel test robot return false;
17759ed24f4bSMarc Zyngier
1776cd4c7183SSean Christopherson __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
1777cd4c7183SSean Christopherson (range->end - range->start) << PAGE_SHIFT,
1778cd4c7183SSean Christopherson range->may_block);
1779cd4c7183SSean Christopherson
1780fcb82839Skernel test robot return false;
17819ed24f4bSMarc Zyngier }
17829ed24f4bSMarc Zyngier
kvm_set_spte_gfn(struct kvm * kvm,struct kvm_gfn_range * range)1783cd4c7183SSean Christopherson bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
17849ed24f4bSMarc Zyngier {
17853e1efe2bSSean Christopherson kvm_pfn_t pfn = pte_pfn(range->arg.pte);
17869ed24f4bSMarc Zyngier
1787e9edb17aSWill Deacon if (!kvm->arch.mmu.pgt)
1788fcb82839Skernel test robot return false;
17899ed24f4bSMarc Zyngier
1790cd4c7183SSean Christopherson WARN_ON(range->end - range->start != 1);
1791cd4c7183SSean Christopherson
17922dbf12aeSCatalin Marinas /*
17932dbf12aeSCatalin Marinas * If the page isn't tagged, defer to user_mem_abort() for sanitising
17942dbf12aeSCatalin Marinas * the MTE tags. The S2 pte should have been unmapped by
17952dbf12aeSCatalin Marinas * mmu_notifier_invalidate_range_end().
17962dbf12aeSCatalin Marinas */
17972dbf12aeSCatalin Marinas if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
1798ea7fc1bbSSteven Price return false;
1799ea7fc1bbSSteven Price
18009ed24f4bSMarc Zyngier /*
180125aa2869SYanan Wang * We've moved a page around, probably through CoW, so let's treat
180225aa2869SYanan Wang * it just like a translation fault and the map handler will clean
180325aa2869SYanan Wang * the cache to the PoC.
180425aa2869SYanan Wang *
1805cd4c7183SSean Christopherson * The MMU notifiers will have unmapped a huge PMD before calling
1806cd4c7183SSean Christopherson * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
1807cd4c7183SSean Christopherson * therefore we never need to clear out a huge PMD through this
1808cd4c7183SSean Christopherson * calling path and a memcache is not required.
1809cd4c7183SSean Christopherson */
1810cd4c7183SSean Christopherson kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
1811cd4c7183SSean Christopherson PAGE_SIZE, __pfn_to_phys(pfn),
18121577cb58SOliver Upton KVM_PGTABLE_PROT_R, NULL, 0);
1813cd4c7183SSean Christopherson
1814fcb82839Skernel test robot return false;
18159ed24f4bSMarc Zyngier }
18169ed24f4bSMarc Zyngier
kvm_age_gfn(struct kvm * kvm,struct kvm_gfn_range * range)1817cd4c7183SSean Christopherson bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
18189ed24f4bSMarc Zyngier {
1819cd4c7183SSean Christopherson u64 size = (range->end - range->start) << PAGE_SHIFT;
1820cd4c7183SSean Christopherson
1821cd4c7183SSean Christopherson if (!kvm->arch.mmu.pgt)
1822fcb82839Skernel test robot return false;
18239ed24f4bSMarc Zyngier
1824df6556adSOliver Upton return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
1825df6556adSOliver Upton range->start << PAGE_SHIFT,
1826df6556adSOliver Upton size, true);
18279ed24f4bSMarc Zyngier }
18289ed24f4bSMarc Zyngier
kvm_test_age_gfn(struct kvm * kvm,struct kvm_gfn_range * range)1829cd4c7183SSean Christopherson bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
18309ed24f4bSMarc Zyngier {
1831df6556adSOliver Upton u64 size = (range->end - range->start) << PAGE_SHIFT;
1832df6556adSOliver Upton
1833063deeb1SWill Deacon if (!kvm->arch.mmu.pgt)
1834fcb82839Skernel test robot return false;
1835501b9185SSean Christopherson
1836df6556adSOliver Upton return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
1837df6556adSOliver Upton range->start << PAGE_SHIFT,
1838df6556adSOliver Upton size, false);
18399ed24f4bSMarc Zyngier }
18409ed24f4bSMarc Zyngier
kvm_mmu_get_httbr(void)18419ed24f4bSMarc Zyngier phys_addr_t kvm_mmu_get_httbr(void)
18429ed24f4bSMarc Zyngier {
18430f9d09b8SWill Deacon return __pa(hyp_pgtable->pgd);
18449ed24f4bSMarc Zyngier }
18459ed24f4bSMarc Zyngier
kvm_get_idmap_vector(void)18469ed24f4bSMarc Zyngier phys_addr_t kvm_get_idmap_vector(void)
18479ed24f4bSMarc Zyngier {
18489ed24f4bSMarc Zyngier return hyp_idmap_vector;
18499ed24f4bSMarc Zyngier }
18509ed24f4bSMarc Zyngier
kvm_map_idmap_text(void)18510f9d09b8SWill Deacon static int kvm_map_idmap_text(void)
18529ed24f4bSMarc Zyngier {
18530f9d09b8SWill Deacon unsigned long size = hyp_idmap_end - hyp_idmap_start;
18540f9d09b8SWill Deacon int err = __create_hyp_mappings(hyp_idmap_start, size, hyp_idmap_start,
18559ed24f4bSMarc Zyngier PAGE_HYP_EXEC);
18569ed24f4bSMarc Zyngier if (err)
18579ed24f4bSMarc Zyngier kvm_err("Failed to idmap %lx-%lx\n",
18589ed24f4bSMarc Zyngier hyp_idmap_start, hyp_idmap_end);
18599ed24f4bSMarc Zyngier
18609ed24f4bSMarc Zyngier return err;
18619ed24f4bSMarc Zyngier }
18629ed24f4bSMarc Zyngier
kvm_hyp_zalloc_page(void * arg)18637aef0cbcSQuentin Perret static void *kvm_hyp_zalloc_page(void *arg)
18647aef0cbcSQuentin Perret {
18657aef0cbcSQuentin Perret return (void *)get_zeroed_page(GFP_KERNEL);
18667aef0cbcSQuentin Perret }
18677aef0cbcSQuentin Perret
18687aef0cbcSQuentin Perret static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
18697aef0cbcSQuentin Perret .zalloc_page = kvm_hyp_zalloc_page,
18707aef0cbcSQuentin Perret .get_page = kvm_host_get_page,
18717aef0cbcSQuentin Perret .put_page = kvm_host_put_page,
18727aef0cbcSQuentin Perret .phys_to_virt = kvm_host_va,
18737aef0cbcSQuentin Perret .virt_to_phys = kvm_host_pa,
18747aef0cbcSQuentin Perret };
18757aef0cbcSQuentin Perret
kvm_mmu_init(u32 * hyp_va_bits)18768d20bd63SSean Christopherson int __init kvm_mmu_init(u32 *hyp_va_bits)
18779ed24f4bSMarc Zyngier {
18789ed24f4bSMarc Zyngier int err;
1879579d7ebeSRyan Roberts u32 idmap_bits;
1880579d7ebeSRyan Roberts u32 kernel_bits;
18819ed24f4bSMarc Zyngier
18820a78791cSAndrew Scull hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
18839ed24f4bSMarc Zyngier hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
18840a78791cSAndrew Scull hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
18859ed24f4bSMarc Zyngier hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
18860a78791cSAndrew Scull hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
18879ed24f4bSMarc Zyngier
18889ed24f4bSMarc Zyngier /*
18899ed24f4bSMarc Zyngier * We rely on the linker script to ensure at build time that the HYP
18909ed24f4bSMarc Zyngier * init code does not cross a page boundary.
18919ed24f4bSMarc Zyngier */
18929ed24f4bSMarc Zyngier BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
18939ed24f4bSMarc Zyngier
1894579d7ebeSRyan Roberts /*
1895579d7ebeSRyan Roberts * The ID map may be configured to use an extended virtual address
1896579d7ebeSRyan Roberts * range. This is only the case if system RAM is out of range for the
1897579d7ebeSRyan Roberts * currently configured page size and VA_BITS_MIN, in which case we will
1898579d7ebeSRyan Roberts * also need the extended virtual range for the HYP ID map, or we won't
1899579d7ebeSRyan Roberts * be able to enable the EL2 MMU.
1900579d7ebeSRyan Roberts *
1901579d7ebeSRyan Roberts * However, in some cases the ID map may be configured for fewer than
1902579d7ebeSRyan Roberts * the number of VA bits used by the regular kernel stage 1. This
1903579d7ebeSRyan Roberts * happens when VA_BITS=52 and the kernel image is placed in PA space
1904579d7ebeSRyan Roberts * below 48 bits.
1905579d7ebeSRyan Roberts *
1906579d7ebeSRyan Roberts * At EL2, there is only one TTBR register, and we can't switch between
1907579d7ebeSRyan Roberts * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
1908579d7ebeSRyan Roberts * line: we need to use the extended range with *both* our translation
1909579d7ebeSRyan Roberts * tables.
1910579d7ebeSRyan Roberts *
1911579d7ebeSRyan Roberts * So use the maximum of the idmap VA bits and the regular kernel stage
1912579d7ebeSRyan Roberts * 1 VA bits to assure that the hypervisor can both ID map its code page
1913579d7ebeSRyan Roberts * and map any kernel memory.
1914579d7ebeSRyan Roberts */
1915579d7ebeSRyan Roberts idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
1916579d7ebeSRyan Roberts kernel_bits = vabits_actual;
1917579d7ebeSRyan Roberts *hyp_va_bits = max(idmap_bits, kernel_bits);
1918579d7ebeSRyan Roberts
1919bfa79a80SQuentin Perret kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
19209ed24f4bSMarc Zyngier kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
19219ed24f4bSMarc Zyngier kvm_debug("HYP VA range: %lx:%lx\n",
19229ed24f4bSMarc Zyngier kern_hyp_va(PAGE_OFFSET),
19239ed24f4bSMarc Zyngier kern_hyp_va((unsigned long)high_memory - 1));
19249ed24f4bSMarc Zyngier
19259ed24f4bSMarc Zyngier if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
19269ed24f4bSMarc Zyngier hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
19279ed24f4bSMarc Zyngier hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
19289ed24f4bSMarc Zyngier /*
19299ed24f4bSMarc Zyngier * The idmap page is intersecting with the VA space,
19309ed24f4bSMarc Zyngier * it is not safe to continue further.
19319ed24f4bSMarc Zyngier */
19329ed24f4bSMarc Zyngier kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
19339ed24f4bSMarc Zyngier err = -EINVAL;
19349ed24f4bSMarc Zyngier goto out;
19359ed24f4bSMarc Zyngier }
19369ed24f4bSMarc Zyngier
19370f9d09b8SWill Deacon hyp_pgtable = kzalloc(sizeof(*hyp_pgtable), GFP_KERNEL);
19380f9d09b8SWill Deacon if (!hyp_pgtable) {
19390f9d09b8SWill Deacon kvm_err("Hyp mode page-table not allocated\n");
19409ed24f4bSMarc Zyngier err = -ENOMEM;
19419ed24f4bSMarc Zyngier goto out;
19429ed24f4bSMarc Zyngier }
19439ed24f4bSMarc Zyngier
1944bfa79a80SQuentin Perret err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops);
19459ed24f4bSMarc Zyngier if (err)
19460f9d09b8SWill Deacon goto out_free_pgtable;
19479ed24f4bSMarc Zyngier
19480f9d09b8SWill Deacon err = kvm_map_idmap_text();
19499ed24f4bSMarc Zyngier if (err)
19500f9d09b8SWill Deacon goto out_destroy_pgtable;
19519ed24f4bSMarc Zyngier
19529ed24f4bSMarc Zyngier io_map_base = hyp_idmap_start;
19539ed24f4bSMarc Zyngier return 0;
19540f9d09b8SWill Deacon
19550f9d09b8SWill Deacon out_destroy_pgtable:
19560f9d09b8SWill Deacon kvm_pgtable_hyp_destroy(hyp_pgtable);
19570f9d09b8SWill Deacon out_free_pgtable:
19580f9d09b8SWill Deacon kfree(hyp_pgtable);
19590f9d09b8SWill Deacon hyp_pgtable = NULL;
19609ed24f4bSMarc Zyngier out:
19619ed24f4bSMarc Zyngier return err;
19629ed24f4bSMarc Zyngier }
19639ed24f4bSMarc Zyngier
kvm_arch_commit_memory_region(struct kvm * kvm,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)19649ed24f4bSMarc Zyngier void kvm_arch_commit_memory_region(struct kvm *kvm,
19659ed24f4bSMarc Zyngier struct kvm_memory_slot *old,
19669ed24f4bSMarc Zyngier const struct kvm_memory_slot *new,
19679ed24f4bSMarc Zyngier enum kvm_mr_change change)
19689ed24f4bSMarc Zyngier {
19696bd92b9dSRicardo Koller bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
19706bd92b9dSRicardo Koller
19719ed24f4bSMarc Zyngier /*
19729ed24f4bSMarc Zyngier * At this point memslot has been committed and there is an
1973656012c7SFuad Tabba * allocated dirty_bitmap[], dirty pages will be tracked while the
19749ed24f4bSMarc Zyngier * memory slot is write protected.
19759ed24f4bSMarc Zyngier */
19766bd92b9dSRicardo Koller if (log_dirty_pages) {
19776bd92b9dSRicardo Koller
19786bd92b9dSRicardo Koller if (change == KVM_MR_DELETE)
19796bd92b9dSRicardo Koller return;
19806bd92b9dSRicardo Koller
1981c862626eSKeqian Zhu /*
1982e7bf7a49SRicardo Koller * Huge and normal pages are write-protected and split
1983e7bf7a49SRicardo Koller * on either of these two cases:
19846bd92b9dSRicardo Koller *
19856bd92b9dSRicardo Koller * 1. with initial-all-set: gradually with CLEAR ioctls,
1986c862626eSKeqian Zhu */
19876bd92b9dSRicardo Koller if (kvm_dirty_log_manual_protect_and_init_set(kvm))
19886bd92b9dSRicardo Koller return;
19896bd92b9dSRicardo Koller /*
19906bd92b9dSRicardo Koller * or
19916bd92b9dSRicardo Koller * 2. without initial-all-set: all in one shot when
19926bd92b9dSRicardo Koller * enabling dirty logging.
19936bd92b9dSRicardo Koller */
1994509c594cSSean Christopherson kvm_mmu_wp_memory_region(kvm, new->id);
1995e7bf7a49SRicardo Koller kvm_mmu_split_memory_region(kvm, new->id);
1996e7bf7a49SRicardo Koller } else {
1997e7bf7a49SRicardo Koller /*
1998e7bf7a49SRicardo Koller * Free any leftovers from the eager page splitting cache. Do
1999e7bf7a49SRicardo Koller * this when deleting, moving, disabling dirty logging, or
2000e7bf7a49SRicardo Koller * creating the memslot (a nop). Doing it for deletes makes
2001e7bf7a49SRicardo Koller * sure we don't leak memory, and there's no need to keep the
2002e7bf7a49SRicardo Koller * cache around for any of the other cases.
2003e7bf7a49SRicardo Koller */
2004e7bf7a49SRicardo Koller kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
20059ed24f4bSMarc Zyngier }
2006c862626eSKeqian Zhu }
20079ed24f4bSMarc Zyngier
kvm_arch_prepare_memory_region(struct kvm * kvm,const struct kvm_memory_slot * old,struct kvm_memory_slot * new,enum kvm_mr_change change)20089ed24f4bSMarc Zyngier int kvm_arch_prepare_memory_region(struct kvm *kvm,
2009537a17b3SSean Christopherson const struct kvm_memory_slot *old,
2010537a17b3SSean Christopherson struct kvm_memory_slot *new,
20119ed24f4bSMarc Zyngier enum kvm_mr_change change)
20129ed24f4bSMarc Zyngier {
2013509c594cSSean Christopherson hva_t hva, reg_end;
20149ed24f4bSMarc Zyngier int ret = 0;
20159ed24f4bSMarc Zyngier
20169ed24f4bSMarc Zyngier if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
20179ed24f4bSMarc Zyngier change != KVM_MR_FLAGS_ONLY)
20189ed24f4bSMarc Zyngier return 0;
20199ed24f4bSMarc Zyngier
20209ed24f4bSMarc Zyngier /*
20219ed24f4bSMarc Zyngier * Prevent userspace from creating a memory region outside of the IPA
20229ed24f4bSMarc Zyngier * space addressable by the KVM guest IPA space.
20239ed24f4bSMarc Zyngier */
2024537a17b3SSean Christopherson if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
20259ed24f4bSMarc Zyngier return -EFAULT;
20269ed24f4bSMarc Zyngier
2027509c594cSSean Christopherson hva = new->userspace_addr;
2028509c594cSSean Christopherson reg_end = hva + (new->npages << PAGE_SHIFT);
2029509c594cSSean Christopherson
203089154dd5SMichel Lespinasse mmap_read_lock(current->mm);
20319ed24f4bSMarc Zyngier /*
20329ed24f4bSMarc Zyngier * A memory region could potentially cover multiple VMAs, and any holes
2033fd6f17baSKeqian Zhu * between them, so iterate over all of them.
20349ed24f4bSMarc Zyngier *
20359ed24f4bSMarc Zyngier * +--------------------------------------------+
20369ed24f4bSMarc Zyngier * +---------------+----------------+ +----------------+
20379ed24f4bSMarc Zyngier * | : VMA 1 | VMA 2 | | VMA 3 : |
20389ed24f4bSMarc Zyngier * +---------------+----------------+ +----------------+
20399ed24f4bSMarc Zyngier * | memory region |
20409ed24f4bSMarc Zyngier * +--------------------------------------------+
20419ed24f4bSMarc Zyngier */
20429ed24f4bSMarc Zyngier do {
2043c728fd4cSGavin Shan struct vm_area_struct *vma;
20449ed24f4bSMarc Zyngier
2045c728fd4cSGavin Shan vma = find_vma_intersection(current->mm, hva, reg_end);
2046c728fd4cSGavin Shan if (!vma)
20479ed24f4bSMarc Zyngier break;
20489ed24f4bSMarc Zyngier
2049d89585fbSPeter Collingbourne if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
20506e6a8ef0SQuentin Perret ret = -EINVAL;
20516e6a8ef0SQuentin Perret break;
20526e6a8ef0SQuentin Perret }
2053ea7fc1bbSSteven Price
20549ed24f4bSMarc Zyngier if (vma->vm_flags & VM_PFNMAP) {
20559ed24f4bSMarc Zyngier /* IO region dirty page logging not allowed */
2056537a17b3SSean Christopherson if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
20579ed24f4bSMarc Zyngier ret = -EINVAL;
20589ed24f4bSMarc Zyngier break;
20599ed24f4bSMarc Zyngier }
2060fd6f17baSKeqian Zhu }
2061fd6f17baSKeqian Zhu hva = min(reg_end, vma->vm_end);
20629ed24f4bSMarc Zyngier } while (hva < reg_end);
20639ed24f4bSMarc Zyngier
206489154dd5SMichel Lespinasse mmap_read_unlock(current->mm);
20659ed24f4bSMarc Zyngier return ret;
20669ed24f4bSMarc Zyngier }
20679ed24f4bSMarc Zyngier
kvm_arch_free_memslot(struct kvm * kvm,struct kvm_memory_slot * slot)20689ed24f4bSMarc Zyngier void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
20699ed24f4bSMarc Zyngier {
20709ed24f4bSMarc Zyngier }
20719ed24f4bSMarc Zyngier
kvm_arch_memslots_updated(struct kvm * kvm,u64 gen)20729ed24f4bSMarc Zyngier void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
20739ed24f4bSMarc Zyngier {
20749ed24f4bSMarc Zyngier }
20759ed24f4bSMarc Zyngier
kvm_arch_flush_shadow_all(struct kvm * kvm)20769ed24f4bSMarc Zyngier void kvm_arch_flush_shadow_all(struct kvm *kvm)
20779ed24f4bSMarc Zyngier {
2078ce2b6022SRicardo Koller kvm_uninit_stage2_mmu(kvm);
20799ed24f4bSMarc Zyngier }
20809ed24f4bSMarc Zyngier
kvm_arch_flush_shadow_memslot(struct kvm * kvm,struct kvm_memory_slot * slot)20819ed24f4bSMarc Zyngier void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
20829ed24f4bSMarc Zyngier struct kvm_memory_slot *slot)
20839ed24f4bSMarc Zyngier {
20849ed24f4bSMarc Zyngier gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
20859ed24f4bSMarc Zyngier phys_addr_t size = slot->npages << PAGE_SHIFT;
20869ed24f4bSMarc Zyngier
2087fcc5bf89SJing Zhang write_lock(&kvm->mmu_lock);
2088a0e50aa3SChristoffer Dall unmap_stage2_range(&kvm->arch.mmu, gpa, size);
2089fcc5bf89SJing Zhang write_unlock(&kvm->mmu_lock);
20909ed24f4bSMarc Zyngier }
20919ed24f4bSMarc Zyngier
20929ed24f4bSMarc Zyngier /*
20939ed24f4bSMarc Zyngier * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
20949ed24f4bSMarc Zyngier *
20959ed24f4bSMarc Zyngier * Main problems:
20969ed24f4bSMarc Zyngier * - S/W ops are local to a CPU (not broadcast)
20979ed24f4bSMarc Zyngier * - We have line migration behind our back (speculation)
20989ed24f4bSMarc Zyngier * - System caches don't support S/W at all (damn!)
20999ed24f4bSMarc Zyngier *
21009ed24f4bSMarc Zyngier * In the face of the above, the best we can do is to try and convert
21019ed24f4bSMarc Zyngier * S/W ops to VA ops. Because the guest is not allowed to infer the
21029ed24f4bSMarc Zyngier * S/W to PA mapping, it can only use S/W to nuke the whole cache,
21039ed24f4bSMarc Zyngier * which is a rather good thing for us.
21049ed24f4bSMarc Zyngier *
21059ed24f4bSMarc Zyngier * Also, it is only used when turning caches on/off ("The expected
21069ed24f4bSMarc Zyngier * usage of the cache maintenance instructions that operate by set/way
21079ed24f4bSMarc Zyngier * is associated with the cache maintenance instructions associated
21089ed24f4bSMarc Zyngier * with the powerdown and powerup of caches, if this is required by
21099ed24f4bSMarc Zyngier * the implementation.").
21109ed24f4bSMarc Zyngier *
21119ed24f4bSMarc Zyngier * We use the following policy:
21129ed24f4bSMarc Zyngier *
21139ed24f4bSMarc Zyngier * - If we trap a S/W operation, we enable VM trapping to detect
21149ed24f4bSMarc Zyngier * caches being turned on/off, and do a full clean.
21159ed24f4bSMarc Zyngier *
21169ed24f4bSMarc Zyngier * - We flush the caches on both caches being turned on and off.
21179ed24f4bSMarc Zyngier *
21189ed24f4bSMarc Zyngier * - Once the caches are enabled, we stop trapping VM ops.
21199ed24f4bSMarc Zyngier */
kvm_set_way_flush(struct kvm_vcpu * vcpu)21209ed24f4bSMarc Zyngier void kvm_set_way_flush(struct kvm_vcpu *vcpu)
21219ed24f4bSMarc Zyngier {
21229ed24f4bSMarc Zyngier unsigned long hcr = *vcpu_hcr(vcpu);
21239ed24f4bSMarc Zyngier
21249ed24f4bSMarc Zyngier /*
21259ed24f4bSMarc Zyngier * If this is the first time we do a S/W operation
21269ed24f4bSMarc Zyngier * (i.e. HCR_TVM not set) flush the whole memory, and set the
21279ed24f4bSMarc Zyngier * VM trapping.
21289ed24f4bSMarc Zyngier *
21299ed24f4bSMarc Zyngier * Otherwise, rely on the VM trapping to wait for the MMU +
21309ed24f4bSMarc Zyngier * Caches to be turned off. At that point, we'll be able to
21319ed24f4bSMarc Zyngier * clean the caches again.
21329ed24f4bSMarc Zyngier */
21339ed24f4bSMarc Zyngier if (!(hcr & HCR_TVM)) {
21349ed24f4bSMarc Zyngier trace_kvm_set_way_flush(*vcpu_pc(vcpu),
21359ed24f4bSMarc Zyngier vcpu_has_cache_enabled(vcpu));
21369ed24f4bSMarc Zyngier stage2_flush_vm(vcpu->kvm);
21379ed24f4bSMarc Zyngier *vcpu_hcr(vcpu) = hcr | HCR_TVM;
21389ed24f4bSMarc Zyngier }
21399ed24f4bSMarc Zyngier }
21409ed24f4bSMarc Zyngier
kvm_toggle_cache(struct kvm_vcpu * vcpu,bool was_enabled)21419ed24f4bSMarc Zyngier void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
21429ed24f4bSMarc Zyngier {
21439ed24f4bSMarc Zyngier bool now_enabled = vcpu_has_cache_enabled(vcpu);
21449ed24f4bSMarc Zyngier
21459ed24f4bSMarc Zyngier /*
21469ed24f4bSMarc Zyngier * If switching the MMU+caches on, need to invalidate the caches.
21479ed24f4bSMarc Zyngier * If switching it off, need to clean the caches.
21489ed24f4bSMarc Zyngier * Clean + invalidate does the trick always.
21499ed24f4bSMarc Zyngier */
21509ed24f4bSMarc Zyngier if (now_enabled != was_enabled)
21519ed24f4bSMarc Zyngier stage2_flush_vm(vcpu->kvm);
21529ed24f4bSMarc Zyngier
21539ed24f4bSMarc Zyngier /* Caches are now on, stop trapping VM ops (until a S/W op) */
21549ed24f4bSMarc Zyngier if (now_enabled)
21559ed24f4bSMarc Zyngier *vcpu_hcr(vcpu) &= ~HCR_TVM;
21569ed24f4bSMarc Zyngier
21579ed24f4bSMarc Zyngier trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
21589ed24f4bSMarc Zyngier }
2159