1f320bc74SQuentin Perret // SPDX-License-Identifier: GPL-2.0-only 2f320bc74SQuentin Perret /* 3f320bc74SQuentin Perret * Copyright (C) 2020 Google LLC 4f320bc74SQuentin Perret * Author: Quentin Perret <qperret@google.com> 5f320bc74SQuentin Perret */ 6f320bc74SQuentin Perret 7f320bc74SQuentin Perret #include <linux/kvm_host.h> 8f320bc74SQuentin Perret #include <asm/kvm_hyp.h> 9f320bc74SQuentin Perret #include <asm/kvm_mmu.h> 10f320bc74SQuentin Perret #include <asm/kvm_pgtable.h> 119429f4b0SWill Deacon #include <asm/kvm_pkvm.h> 12f320bc74SQuentin Perret #include <asm/spectre.h> 13f320bc74SQuentin Perret 14f320bc74SQuentin Perret #include <nvhe/early_alloc.h> 15f320bc74SQuentin Perret #include <nvhe/gfp.h> 16f320bc74SQuentin Perret #include <nvhe/memory.h> 17*aa6948f8SQuentin Perret #include <nvhe/mem_protect.h> 18f320bc74SQuentin Perret #include <nvhe/mm.h> 19f320bc74SQuentin Perret #include <nvhe/spinlock.h> 20f320bc74SQuentin Perret 21f320bc74SQuentin Perret struct kvm_pgtable pkvm_pgtable; 22f320bc74SQuentin Perret hyp_spinlock_t pkvm_pgd_lock; 23f320bc74SQuentin Perret 24f320bc74SQuentin Perret struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; 25f320bc74SQuentin Perret unsigned int hyp_memblock_nr; 26f320bc74SQuentin Perret 27473a3efbSQuentin Perret static u64 __io_map_base; 28473a3efbSQuentin Perret 29*aa6948f8SQuentin Perret struct hyp_fixmap_slot { 30*aa6948f8SQuentin Perret u64 addr; 31*aa6948f8SQuentin Perret kvm_pte_t *ptep; 32*aa6948f8SQuentin Perret }; 33*aa6948f8SQuentin Perret static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots); 34*aa6948f8SQuentin Perret 3564a80fb7SQuentin Perret static int __pkvm_create_mappings(unsigned long start, unsigned long size, 36f320bc74SQuentin Perret unsigned long phys, enum kvm_pgtable_prot prot) 37f320bc74SQuentin Perret { 38f320bc74SQuentin Perret int err; 39f320bc74SQuentin Perret 40f320bc74SQuentin Perret hyp_spin_lock(&pkvm_pgd_lock); 41f320bc74SQuentin Perret err = kvm_pgtable_hyp_map(&pkvm_pgtable, start, size, phys, prot); 42f320bc74SQuentin Perret hyp_spin_unlock(&pkvm_pgd_lock); 43f320bc74SQuentin Perret 44f320bc74SQuentin Perret return err; 45f320bc74SQuentin Perret } 46f320bc74SQuentin Perret 47f922c13eSKalesh Singh /** 48f922c13eSKalesh Singh * pkvm_alloc_private_va_range - Allocates a private VA range. 49f922c13eSKalesh Singh * @size: The size of the VA range to reserve. 50f922c13eSKalesh Singh * @haddr: The hypervisor virtual start address of the allocation. 51f922c13eSKalesh Singh * 52f922c13eSKalesh Singh * The private virtual address (VA) range is allocated above __io_map_base 53f922c13eSKalesh Singh * and aligned based on the order of @size. 54f922c13eSKalesh Singh * 55f922c13eSKalesh Singh * Return: 0 on success or negative error code on failure. 56f922c13eSKalesh Singh */ 57f922c13eSKalesh Singh int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) 58f922c13eSKalesh Singh { 59f922c13eSKalesh Singh unsigned long base, addr; 60f922c13eSKalesh Singh int ret = 0; 61f922c13eSKalesh Singh 62f922c13eSKalesh Singh hyp_spin_lock(&pkvm_pgd_lock); 63f922c13eSKalesh Singh 64f922c13eSKalesh Singh /* Align the allocation based on the order of its size */ 65f922c13eSKalesh Singh addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); 66f922c13eSKalesh Singh 67f922c13eSKalesh Singh /* The allocated size is always a multiple of PAGE_SIZE */ 68f922c13eSKalesh Singh base = addr + PAGE_ALIGN(size); 69f922c13eSKalesh Singh 70f922c13eSKalesh Singh /* Are we overflowing on the vmemmap ? */ 71f922c13eSKalesh Singh if (!addr || base > __hyp_vmemmap) 72f922c13eSKalesh Singh ret = -ENOMEM; 73f922c13eSKalesh Singh else { 74f922c13eSKalesh Singh __io_map_base = base; 75f922c13eSKalesh Singh *haddr = addr; 76f922c13eSKalesh Singh } 77f922c13eSKalesh Singh 78f922c13eSKalesh Singh hyp_spin_unlock(&pkvm_pgd_lock); 79f922c13eSKalesh Singh 80f922c13eSKalesh Singh return ret; 81f922c13eSKalesh Singh } 82f922c13eSKalesh Singh 83f922c13eSKalesh Singh int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, 84f922c13eSKalesh Singh enum kvm_pgtable_prot prot, 85f922c13eSKalesh Singh unsigned long *haddr) 86f320bc74SQuentin Perret { 87f320bc74SQuentin Perret unsigned long addr; 88f320bc74SQuentin Perret int err; 89f320bc74SQuentin Perret 90f320bc74SQuentin Perret size = PAGE_ALIGN(size + offset_in_page(phys)); 91f922c13eSKalesh Singh err = pkvm_alloc_private_va_range(size, &addr); 92f922c13eSKalesh Singh if (err) 93f922c13eSKalesh Singh return err; 94f320bc74SQuentin Perret 95f922c13eSKalesh Singh err = __pkvm_create_mappings(addr, size, phys, prot); 96f922c13eSKalesh Singh if (err) 97f922c13eSKalesh Singh return err; 98f320bc74SQuentin Perret 99f922c13eSKalesh Singh *haddr = addr + offset_in_page(phys); 100f922c13eSKalesh Singh return err; 101f320bc74SQuentin Perret } 102f320bc74SQuentin Perret 103f9370010SQuentin Perret int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot) 104f320bc74SQuentin Perret { 105f320bc74SQuentin Perret unsigned long start = (unsigned long)from; 106f320bc74SQuentin Perret unsigned long end = (unsigned long)to; 107f320bc74SQuentin Perret unsigned long virt_addr; 108f320bc74SQuentin Perret phys_addr_t phys; 109f320bc74SQuentin Perret 110f9370010SQuentin Perret hyp_assert_lock_held(&pkvm_pgd_lock); 111f9370010SQuentin Perret 112f320bc74SQuentin Perret start = start & PAGE_MASK; 113f320bc74SQuentin Perret end = PAGE_ALIGN(end); 114f320bc74SQuentin Perret 115f320bc74SQuentin Perret for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { 116f320bc74SQuentin Perret int err; 117f320bc74SQuentin Perret 118f320bc74SQuentin Perret phys = hyp_virt_to_phys((void *)virt_addr); 119f9370010SQuentin Perret err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE, 120f9370010SQuentin Perret phys, prot); 121f320bc74SQuentin Perret if (err) 122f320bc74SQuentin Perret return err; 123f320bc74SQuentin Perret } 124f320bc74SQuentin Perret 125f320bc74SQuentin Perret return 0; 126f320bc74SQuentin Perret } 127f320bc74SQuentin Perret 128f9370010SQuentin Perret int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) 129f9370010SQuentin Perret { 130f9370010SQuentin Perret int ret; 131f9370010SQuentin Perret 132f9370010SQuentin Perret hyp_spin_lock(&pkvm_pgd_lock); 133f9370010SQuentin Perret ret = pkvm_create_mappings_locked(from, to, prot); 134f9370010SQuentin Perret hyp_spin_unlock(&pkvm_pgd_lock); 135f9370010SQuentin Perret 136f9370010SQuentin Perret return ret; 137f9370010SQuentin Perret } 138f9370010SQuentin Perret 1398e6bcc3aSQuentin Perret int hyp_back_vmemmap(phys_addr_t back) 140f320bc74SQuentin Perret { 1418e6bcc3aSQuentin Perret unsigned long i, start, size, end = 0; 1428e6bcc3aSQuentin Perret int ret; 143f320bc74SQuentin Perret 1448e6bcc3aSQuentin Perret for (i = 0; i < hyp_memblock_nr; i++) { 1458e6bcc3aSQuentin Perret start = hyp_memory[i].base; 1468e6bcc3aSQuentin Perret start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); 1478e6bcc3aSQuentin Perret /* 1488e6bcc3aSQuentin Perret * The begining of the hyp_vmemmap region for the current 1498e6bcc3aSQuentin Perret * memblock may already be backed by the page backing the end 1508e6bcc3aSQuentin Perret * the previous region, so avoid mapping it twice. 1518e6bcc3aSQuentin Perret */ 1528e6bcc3aSQuentin Perret start = max(start, end); 153f320bc74SQuentin Perret 1548e6bcc3aSQuentin Perret end = hyp_memory[i].base + hyp_memory[i].size; 1558e6bcc3aSQuentin Perret end = PAGE_ALIGN((u64)hyp_phys_to_page(end)); 1568e6bcc3aSQuentin Perret if (start >= end) 1578e6bcc3aSQuentin Perret continue; 1588e6bcc3aSQuentin Perret 1598e6bcc3aSQuentin Perret size = end - start; 1608e6bcc3aSQuentin Perret ret = __pkvm_create_mappings(start, size, back, PAGE_HYP); 1618e6bcc3aSQuentin Perret if (ret) 1628e6bcc3aSQuentin Perret return ret; 1638e6bcc3aSQuentin Perret 1648e6bcc3aSQuentin Perret memset(hyp_phys_to_virt(back), 0, size); 1658e6bcc3aSQuentin Perret back += size; 1668e6bcc3aSQuentin Perret } 1678e6bcc3aSQuentin Perret 1688e6bcc3aSQuentin Perret return 0; 169f320bc74SQuentin Perret } 170f320bc74SQuentin Perret 171f320bc74SQuentin Perret static void *__hyp_bp_vect_base; 172f320bc74SQuentin Perret int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot) 173f320bc74SQuentin Perret { 174f320bc74SQuentin Perret void *vector; 175f320bc74SQuentin Perret 176f320bc74SQuentin Perret switch (slot) { 177f320bc74SQuentin Perret case HYP_VECTOR_DIRECT: { 178f320bc74SQuentin Perret vector = __kvm_hyp_vector; 179f320bc74SQuentin Perret break; 180f320bc74SQuentin Perret } 181f320bc74SQuentin Perret case HYP_VECTOR_SPECTRE_DIRECT: { 182f320bc74SQuentin Perret vector = __bp_harden_hyp_vecs; 183f320bc74SQuentin Perret break; 184f320bc74SQuentin Perret } 185f320bc74SQuentin Perret case HYP_VECTOR_INDIRECT: 186f320bc74SQuentin Perret case HYP_VECTOR_SPECTRE_INDIRECT: { 187f320bc74SQuentin Perret vector = (void *)__hyp_bp_vect_base; 188f320bc74SQuentin Perret break; 189f320bc74SQuentin Perret } 190f320bc74SQuentin Perret default: 191f320bc74SQuentin Perret return -EINVAL; 192f320bc74SQuentin Perret } 193f320bc74SQuentin Perret 194f320bc74SQuentin Perret vector = __kvm_vector_slot2addr(vector, slot); 195f320bc74SQuentin Perret *this_cpu_ptr(&kvm_hyp_vector) = (unsigned long)vector; 196f320bc74SQuentin Perret 197f320bc74SQuentin Perret return 0; 198f320bc74SQuentin Perret } 199f320bc74SQuentin Perret 200f320bc74SQuentin Perret int hyp_map_vectors(void) 201f320bc74SQuentin Perret { 202f320bc74SQuentin Perret phys_addr_t phys; 203f922c13eSKalesh Singh unsigned long bp_base; 204f922c13eSKalesh Singh int ret; 205f320bc74SQuentin Perret 2065bdf3437SJames Morse if (!kvm_system_needs_idmapped_vectors()) { 2075bdf3437SJames Morse __hyp_bp_vect_base = __bp_harden_hyp_vecs; 208f320bc74SQuentin Perret return 0; 2095bdf3437SJames Morse } 210f320bc74SQuentin Perret 211f320bc74SQuentin Perret phys = __hyp_pa(__bp_harden_hyp_vecs); 212f922c13eSKalesh Singh ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ, 213f922c13eSKalesh Singh PAGE_HYP_EXEC, &bp_base); 214f922c13eSKalesh Singh if (ret) 215f922c13eSKalesh Singh return ret; 216f320bc74SQuentin Perret 217f922c13eSKalesh Singh __hyp_bp_vect_base = (void *)bp_base; 218f320bc74SQuentin Perret 219f320bc74SQuentin Perret return 0; 220f320bc74SQuentin Perret } 221f320bc74SQuentin Perret 222*aa6948f8SQuentin Perret void *hyp_fixmap_map(phys_addr_t phys) 223*aa6948f8SQuentin Perret { 224*aa6948f8SQuentin Perret struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); 225*aa6948f8SQuentin Perret kvm_pte_t pte, *ptep = slot->ptep; 226*aa6948f8SQuentin Perret 227*aa6948f8SQuentin Perret pte = *ptep; 228*aa6948f8SQuentin Perret pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); 229*aa6948f8SQuentin Perret pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID; 230*aa6948f8SQuentin Perret WRITE_ONCE(*ptep, pte); 231*aa6948f8SQuentin Perret dsb(ishst); 232*aa6948f8SQuentin Perret 233*aa6948f8SQuentin Perret return (void *)slot->addr; 234*aa6948f8SQuentin Perret } 235*aa6948f8SQuentin Perret 236*aa6948f8SQuentin Perret static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) 237*aa6948f8SQuentin Perret { 238*aa6948f8SQuentin Perret kvm_pte_t *ptep = slot->ptep; 239*aa6948f8SQuentin Perret u64 addr = slot->addr; 240*aa6948f8SQuentin Perret 241*aa6948f8SQuentin Perret WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); 242*aa6948f8SQuentin Perret 243*aa6948f8SQuentin Perret /* 244*aa6948f8SQuentin Perret * Irritatingly, the architecture requires that we use inner-shareable 245*aa6948f8SQuentin Perret * broadcast TLB invalidation here in case another CPU speculates 246*aa6948f8SQuentin Perret * through our fixmap and decides to create an "amalagamation of the 247*aa6948f8SQuentin Perret * values held in the TLB" due to the apparent lack of a 248*aa6948f8SQuentin Perret * break-before-make sequence. 249*aa6948f8SQuentin Perret * 250*aa6948f8SQuentin Perret * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 251*aa6948f8SQuentin Perret */ 252*aa6948f8SQuentin Perret dsb(ishst); 253*aa6948f8SQuentin Perret __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); 254*aa6948f8SQuentin Perret dsb(ish); 255*aa6948f8SQuentin Perret isb(); 256*aa6948f8SQuentin Perret } 257*aa6948f8SQuentin Perret 258*aa6948f8SQuentin Perret void hyp_fixmap_unmap(void) 259*aa6948f8SQuentin Perret { 260*aa6948f8SQuentin Perret fixmap_clear_slot(this_cpu_ptr(&fixmap_slots)); 261*aa6948f8SQuentin Perret } 262*aa6948f8SQuentin Perret 263*aa6948f8SQuentin Perret static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 264*aa6948f8SQuentin Perret enum kvm_pgtable_walk_flags flag, 265*aa6948f8SQuentin Perret void * const arg) 266*aa6948f8SQuentin Perret { 267*aa6948f8SQuentin Perret struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg); 268*aa6948f8SQuentin Perret 269*aa6948f8SQuentin Perret if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) 270*aa6948f8SQuentin Perret return -EINVAL; 271*aa6948f8SQuentin Perret 272*aa6948f8SQuentin Perret slot->addr = addr; 273*aa6948f8SQuentin Perret slot->ptep = ptep; 274*aa6948f8SQuentin Perret 275*aa6948f8SQuentin Perret /* 276*aa6948f8SQuentin Perret * Clear the PTE, but keep the page-table page refcount elevated to 277*aa6948f8SQuentin Perret * prevent it from ever being freed. This lets us manipulate the PTEs 278*aa6948f8SQuentin Perret * by hand safely without ever needing to allocate memory. 279*aa6948f8SQuentin Perret */ 280*aa6948f8SQuentin Perret fixmap_clear_slot(slot); 281*aa6948f8SQuentin Perret 282*aa6948f8SQuentin Perret return 0; 283*aa6948f8SQuentin Perret } 284*aa6948f8SQuentin Perret 285*aa6948f8SQuentin Perret static int create_fixmap_slot(u64 addr, u64 cpu) 286*aa6948f8SQuentin Perret { 287*aa6948f8SQuentin Perret struct kvm_pgtable_walker walker = { 288*aa6948f8SQuentin Perret .cb = __create_fixmap_slot_cb, 289*aa6948f8SQuentin Perret .flags = KVM_PGTABLE_WALK_LEAF, 290*aa6948f8SQuentin Perret .arg = (void *)cpu, 291*aa6948f8SQuentin Perret }; 292*aa6948f8SQuentin Perret 293*aa6948f8SQuentin Perret return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); 294*aa6948f8SQuentin Perret } 295*aa6948f8SQuentin Perret 296*aa6948f8SQuentin Perret int hyp_create_pcpu_fixmap(void) 297*aa6948f8SQuentin Perret { 298*aa6948f8SQuentin Perret unsigned long addr, i; 299*aa6948f8SQuentin Perret int ret; 300*aa6948f8SQuentin Perret 301*aa6948f8SQuentin Perret for (i = 0; i < hyp_nr_cpus; i++) { 302*aa6948f8SQuentin Perret ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); 303*aa6948f8SQuentin Perret if (ret) 304*aa6948f8SQuentin Perret return ret; 305*aa6948f8SQuentin Perret 306*aa6948f8SQuentin Perret ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, 307*aa6948f8SQuentin Perret __hyp_pa(__hyp_bss_start), PAGE_HYP); 308*aa6948f8SQuentin Perret if (ret) 309*aa6948f8SQuentin Perret return ret; 310*aa6948f8SQuentin Perret 311*aa6948f8SQuentin Perret ret = create_fixmap_slot(addr, i); 312*aa6948f8SQuentin Perret if (ret) 313*aa6948f8SQuentin Perret return ret; 314*aa6948f8SQuentin Perret } 315*aa6948f8SQuentin Perret 316*aa6948f8SQuentin Perret return 0; 317*aa6948f8SQuentin Perret } 318*aa6948f8SQuentin Perret 319f320bc74SQuentin Perret int hyp_create_idmap(u32 hyp_va_bits) 320f320bc74SQuentin Perret { 321f320bc74SQuentin Perret unsigned long start, end; 322f320bc74SQuentin Perret 323f320bc74SQuentin Perret start = hyp_virt_to_phys((void *)__hyp_idmap_text_start); 324f320bc74SQuentin Perret start = ALIGN_DOWN(start, PAGE_SIZE); 325f320bc74SQuentin Perret 326f320bc74SQuentin Perret end = hyp_virt_to_phys((void *)__hyp_idmap_text_end); 327f320bc74SQuentin Perret end = ALIGN(end, PAGE_SIZE); 328f320bc74SQuentin Perret 329f320bc74SQuentin Perret /* 330f320bc74SQuentin Perret * One half of the VA space is reserved to linearly map portions of 331f320bc74SQuentin Perret * memory -- see va_layout.c for more details. The other half of the VA 332f320bc74SQuentin Perret * space contains the trampoline page, and needs some care. Split that 333f320bc74SQuentin Perret * second half in two and find the quarter of VA space not conflicting 334f320bc74SQuentin Perret * with the idmap to place the IOs and the vmemmap. IOs use the lower 335f320bc74SQuentin Perret * half of the quarter and the vmemmap the upper half. 336f320bc74SQuentin Perret */ 337f320bc74SQuentin Perret __io_map_base = start & BIT(hyp_va_bits - 2); 338f320bc74SQuentin Perret __io_map_base ^= BIT(hyp_va_bits - 2); 339f320bc74SQuentin Perret __hyp_vmemmap = __io_map_base | BIT(hyp_va_bits - 3); 340f320bc74SQuentin Perret 341f320bc74SQuentin Perret return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); 342f320bc74SQuentin Perret } 343