1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2020 - Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/init.h> 8 #include <linux/kmemleak.h> 9 #include <linux/kvm_host.h> 10 #include <linux/memblock.h> 11 #include <linux/mutex.h> 12 #include <linux/sort.h> 13 14 #include <asm/kvm_pkvm.h> 15 16 #include "hyp_constants.h" 17 18 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); 19 20 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); 21 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); 22 23 phys_addr_t hyp_mem_base; 24 phys_addr_t hyp_mem_size; 25 26 static int cmp_hyp_memblock(const void *p1, const void *p2) 27 { 28 const struct memblock_region *r1 = p1; 29 const struct memblock_region *r2 = p2; 30 31 return r1->base < r2->base ? -1 : (r1->base > r2->base); 32 } 33 34 static void __init sort_memblock_regions(void) 35 { 36 sort(hyp_memory, 37 *hyp_memblock_nr_ptr, 38 sizeof(struct memblock_region), 39 cmp_hyp_memblock, 40 NULL); 41 } 42 43 static int __init register_memblock_regions(void) 44 { 45 struct memblock_region *reg; 46 47 for_each_mem_region(reg) { 48 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) 49 return -ENOMEM; 50 51 hyp_memory[*hyp_memblock_nr_ptr] = *reg; 52 (*hyp_memblock_nr_ptr)++; 53 } 54 sort_memblock_regions(); 55 56 return 0; 57 } 58 59 void __init kvm_hyp_reserve(void) 60 { 61 u64 hyp_mem_pages = 0; 62 int ret; 63 64 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) 65 return; 66 67 if (kvm_get_mode() != KVM_MODE_PROTECTED) 68 return; 69 70 ret = register_memblock_regions(); 71 if (ret) { 72 *hyp_memblock_nr_ptr = 0; 73 kvm_err("Failed to register hyp memblocks: %d\n", ret); 74 return; 75 } 76 77 hyp_mem_pages += hyp_s1_pgtable_pages(); 78 hyp_mem_pages += host_s2_pgtable_pages(); 79 hyp_mem_pages += hyp_vm_table_pages(); 80 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); 81 82 /* 83 * Try to allocate a PMD-aligned region to reduce TLB pressure once 84 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. 85 */ 86 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; 87 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), 88 PMD_SIZE); 89 if (!hyp_mem_base) 90 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); 91 else 92 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); 93 94 if (!hyp_mem_base) { 95 kvm_err("Failed to reserve hyp memory\n"); 96 return; 97 } 98 99 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, 100 hyp_mem_base); 101 } 102 103 /* 104 * Allocates and donates memory for hypervisor VM structs at EL2. 105 * 106 * Allocates space for the VM state, which includes the hyp vm as well as 107 * the hyp vcpus. 108 * 109 * Stores an opaque handler in the kvm struct for future reference. 110 * 111 * Return 0 on success, negative error code on failure. 112 */ 113 static int __pkvm_create_hyp_vm(struct kvm *host_kvm) 114 { 115 size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; 116 struct kvm_vcpu *host_vcpu; 117 pkvm_handle_t handle; 118 void *pgd, *hyp_vm; 119 unsigned long idx; 120 int ret; 121 122 if (host_kvm->created_vcpus < 1) 123 return -EINVAL; 124 125 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); 126 127 /* 128 * The PGD pages will be reclaimed using a hyp_memcache which implies 129 * page granularity. So, use alloc_pages_exact() to get individual 130 * refcounts. 131 */ 132 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); 133 if (!pgd) 134 return -ENOMEM; 135 136 /* Allocate memory to donate to hyp for vm and vcpu pointers. */ 137 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, 138 size_mul(sizeof(void *), 139 host_kvm->created_vcpus))); 140 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); 141 if (!hyp_vm) { 142 ret = -ENOMEM; 143 goto free_pgd; 144 } 145 146 /* Donate the VM memory to hyp and let hyp initialize it. */ 147 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); 148 if (ret < 0) 149 goto free_vm; 150 151 handle = ret; 152 153 host_kvm->arch.pkvm.handle = handle; 154 155 /* Donate memory for the vcpus at hyp and initialize it. */ 156 hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); 157 kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { 158 void *hyp_vcpu; 159 160 /* Indexing of the vcpus to be sequential starting at 0. */ 161 if (WARN_ON(host_vcpu->vcpu_idx != idx)) { 162 ret = -EINVAL; 163 goto destroy_vm; 164 } 165 166 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); 167 if (!hyp_vcpu) { 168 ret = -ENOMEM; 169 goto destroy_vm; 170 } 171 172 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, 173 hyp_vcpu); 174 if (ret) { 175 free_pages_exact(hyp_vcpu, hyp_vcpu_sz); 176 goto destroy_vm; 177 } 178 } 179 180 return 0; 181 182 destroy_vm: 183 pkvm_destroy_hyp_vm(host_kvm); 184 return ret; 185 free_vm: 186 free_pages_exact(hyp_vm, hyp_vm_sz); 187 free_pgd: 188 free_pages_exact(pgd, pgd_sz); 189 return ret; 190 } 191 192 int pkvm_create_hyp_vm(struct kvm *host_kvm) 193 { 194 int ret = 0; 195 196 mutex_lock(&host_kvm->lock); 197 if (!host_kvm->arch.pkvm.handle) 198 ret = __pkvm_create_hyp_vm(host_kvm); 199 mutex_unlock(&host_kvm->lock); 200 201 return ret; 202 } 203 204 void pkvm_destroy_hyp_vm(struct kvm *host_kvm) 205 { 206 if (host_kvm->arch.pkvm.handle) { 207 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, 208 host_kvm->arch.pkvm.handle)); 209 } 210 211 host_kvm->arch.pkvm.handle = 0; 212 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); 213 } 214 215 int pkvm_init_host_vm(struct kvm *host_kvm) 216 { 217 mutex_init(&host_kvm->lock); 218 return 0; 219 } 220 221 static void __init _kvm_host_prot_finalize(void *arg) 222 { 223 int *err = arg; 224 225 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) 226 WRITE_ONCE(*err, -EINVAL); 227 } 228 229 static int __init pkvm_drop_host_privileges(void) 230 { 231 int ret = 0; 232 233 /* 234 * Flip the static key upfront as that may no longer be possible 235 * once the host stage 2 is installed. 236 */ 237 static_branch_enable(&kvm_protected_mode_initialized); 238 on_each_cpu(_kvm_host_prot_finalize, &ret, 1); 239 return ret; 240 } 241 242 static int __init finalize_pkvm(void) 243 { 244 int ret; 245 246 if (!is_protected_kvm_enabled()) 247 return 0; 248 249 /* 250 * Exclude HYP sections from kmemleak so that they don't get peeked 251 * at, which would end badly once inaccessible. 252 */ 253 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); 254 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); 255 256 ret = pkvm_drop_host_privileges(); 257 if (ret) 258 pr_err("Failed to finalize Hyp protection: %d\n", ret); 259 260 return ret; 261 } 262 device_initcall_sync(finalize_pkvm); 263