1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <asm/kvm_emulate.h> 9 #include <asm/kvm_hyp.h> 10 #include <asm/kvm_mmu.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/stage2_pgtable.h> 13 14 #include <hyp/switch.h> 15 16 #include <nvhe/gfp.h> 17 #include <nvhe/memory.h> 18 #include <nvhe/mem_protect.h> 19 #include <nvhe/mm.h> 20 21 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) 22 23 extern unsigned long hyp_nr_cpus; 24 struct host_kvm host_kvm; 25 26 static struct hyp_pool host_s2_pool; 27 28 /* 29 * Copies of the host's CPU features registers holding sanitized values. 30 */ 31 u64 id_aa64mmfr0_el1_sys_val; 32 u64 id_aa64mmfr1_el1_sys_val; 33 34 const u8 pkvm_hyp_id = 1; 35 36 static void *host_s2_zalloc_pages_exact(size_t size) 37 { 38 return hyp_alloc_pages(&host_s2_pool, get_order(size)); 39 } 40 41 static void *host_s2_zalloc_page(void *pool) 42 { 43 return hyp_alloc_pages(pool, 0); 44 } 45 46 static void host_s2_get_page(void *addr) 47 { 48 hyp_get_page(&host_s2_pool, addr); 49 } 50 51 static void host_s2_put_page(void *addr) 52 { 53 hyp_put_page(&host_s2_pool, addr); 54 } 55 56 static int prepare_s2_pool(void *pgt_pool_base) 57 { 58 unsigned long nr_pages, pfn; 59 int ret; 60 61 pfn = hyp_virt_to_pfn(pgt_pool_base); 62 nr_pages = host_s2_pgtable_pages(); 63 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); 64 if (ret) 65 return ret; 66 67 host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { 68 .zalloc_pages_exact = host_s2_zalloc_pages_exact, 69 .zalloc_page = host_s2_zalloc_page, 70 .phys_to_virt = hyp_phys_to_virt, 71 .virt_to_phys = hyp_virt_to_phys, 72 .page_count = hyp_page_count, 73 .get_page = host_s2_get_page, 74 .put_page = host_s2_put_page, 75 }; 76 77 return 0; 78 } 79 80 static void prepare_host_vtcr(void) 81 { 82 u32 parange, phys_shift; 83 84 /* The host stage 2 is id-mapped, so use parange for T0SZ */ 85 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); 86 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); 87 88 host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, 89 id_aa64mmfr1_el1_sys_val, phys_shift); 90 } 91 92 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); 93 94 int kvm_host_prepare_stage2(void *pgt_pool_base) 95 { 96 struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; 97 int ret; 98 99 prepare_host_vtcr(); 100 hyp_spin_lock_init(&host_kvm.lock); 101 102 ret = prepare_s2_pool(pgt_pool_base); 103 if (ret) 104 return ret; 105 106 ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, 107 &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, 108 host_stage2_force_pte_cb); 109 if (ret) 110 return ret; 111 112 mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); 113 mmu->arch = &host_kvm.arch; 114 mmu->pgt = &host_kvm.pgt; 115 WRITE_ONCE(mmu->vmid.vmid_gen, 0); 116 WRITE_ONCE(mmu->vmid.vmid, 0); 117 118 return 0; 119 } 120 121 int __pkvm_prot_finalize(void) 122 { 123 struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; 124 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 125 126 params->vttbr = kvm_get_vttbr(mmu); 127 params->vtcr = host_kvm.arch.vtcr; 128 params->hcr_el2 |= HCR_VM; 129 kvm_flush_dcache_to_poc(params, sizeof(*params)); 130 131 write_sysreg(params->hcr_el2, hcr_el2); 132 __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); 133 134 /* 135 * Make sure to have an ISB before the TLB maintenance below but only 136 * when __load_stage2() doesn't include one already. 137 */ 138 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 139 140 /* Invalidate stale HCR bits that may be cached in TLBs */ 141 __tlbi(vmalls12e1); 142 dsb(nsh); 143 isb(); 144 145 return 0; 146 } 147 148 static int host_stage2_unmap_dev_all(void) 149 { 150 struct kvm_pgtable *pgt = &host_kvm.pgt; 151 struct memblock_region *reg; 152 u64 addr = 0; 153 int i, ret; 154 155 /* Unmap all non-memory regions to recycle the pages */ 156 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { 157 reg = &hyp_memory[i]; 158 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); 159 if (ret) 160 return ret; 161 } 162 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 163 } 164 165 struct kvm_mem_range { 166 u64 start; 167 u64 end; 168 }; 169 170 static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) 171 { 172 int cur, left = 0, right = hyp_memblock_nr; 173 struct memblock_region *reg; 174 phys_addr_t end; 175 176 range->start = 0; 177 range->end = ULONG_MAX; 178 179 /* The list of memblock regions is sorted, binary search it */ 180 while (left < right) { 181 cur = (left + right) >> 1; 182 reg = &hyp_memory[cur]; 183 end = reg->base + reg->size; 184 if (addr < reg->base) { 185 right = cur; 186 range->end = reg->base; 187 } else if (addr >= end) { 188 left = cur + 1; 189 range->start = end; 190 } else { 191 range->start = reg->base; 192 range->end = end; 193 return true; 194 } 195 } 196 197 return false; 198 } 199 200 bool addr_is_memory(phys_addr_t phys) 201 { 202 struct kvm_mem_range range; 203 204 return find_mem_range(phys, &range); 205 } 206 207 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 208 { 209 return range->start <= addr && addr < range->end; 210 } 211 212 static bool range_is_memory(u64 start, u64 end) 213 { 214 struct kvm_mem_range r; 215 216 if (!find_mem_range(start, &r)) 217 return false; 218 219 return is_in_mem_range(end - 1, &r); 220 } 221 222 static inline int __host_stage2_idmap(u64 start, u64 end, 223 enum kvm_pgtable_prot prot) 224 { 225 return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, 226 prot, &host_s2_pool); 227 } 228 229 /* 230 * The pool has been provided with enough pages to cover all of memory with 231 * page granularity, but it is difficult to know how much of the MMIO range 232 * we will need to cover upfront, so we may need to 'recycle' the pages if we 233 * run out. 234 */ 235 #define host_stage2_try(fn, ...) \ 236 ({ \ 237 int __ret; \ 238 hyp_assert_lock_held(&host_kvm.lock); \ 239 __ret = fn(__VA_ARGS__); \ 240 if (__ret == -ENOMEM) { \ 241 __ret = host_stage2_unmap_dev_all(); \ 242 if (!__ret) \ 243 __ret = fn(__VA_ARGS__); \ 244 } \ 245 __ret; \ 246 }) 247 248 static inline bool range_included(struct kvm_mem_range *child, 249 struct kvm_mem_range *parent) 250 { 251 return parent->start <= child->start && child->end <= parent->end; 252 } 253 254 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) 255 { 256 struct kvm_mem_range cur; 257 kvm_pte_t pte; 258 u32 level; 259 int ret; 260 261 hyp_assert_lock_held(&host_kvm.lock); 262 ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); 263 if (ret) 264 return ret; 265 266 if (kvm_pte_valid(pte)) 267 return -EAGAIN; 268 269 if (pte) 270 return -EPERM; 271 272 do { 273 u64 granule = kvm_granule_size(level); 274 cur.start = ALIGN_DOWN(addr, granule); 275 cur.end = cur.start + granule; 276 level++; 277 } while ((level < KVM_PGTABLE_MAX_LEVELS) && 278 !(kvm_level_supports_block_mapping(level) && 279 range_included(&cur, range))); 280 281 *range = cur; 282 283 return 0; 284 } 285 286 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 287 enum kvm_pgtable_prot prot) 288 { 289 hyp_assert_lock_held(&host_kvm.lock); 290 291 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 292 } 293 294 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 295 { 296 hyp_assert_lock_held(&host_kvm.lock); 297 298 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, 299 addr, size, &host_s2_pool, owner_id); 300 } 301 302 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) 303 { 304 /* 305 * Block mappings must be used with care in the host stage-2 as a 306 * kvm_pgtable_stage2_map() operation targeting a page in the range of 307 * an existing block will delete the block under the assumption that 308 * mappings in the rest of the block range can always be rebuilt lazily. 309 * That assumption is correct for the host stage-2 with RWX mappings 310 * targeting memory or RW mappings targeting MMIO ranges (see 311 * host_stage2_idmap() below which implements some of the host memory 312 * abort logic). However, this is not safe for any other mappings where 313 * the host stage-2 page-table is in fact the only place where this 314 * state is stored. In all those cases, it is safer to use page-level 315 * mappings, hence avoiding to lose the state because of side-effects in 316 * kvm_pgtable_stage2_map(). 317 */ 318 if (range_is_memory(addr, end)) 319 return prot != PKVM_HOST_MEM_PROT; 320 else 321 return prot != PKVM_HOST_MMIO_PROT; 322 } 323 324 static int host_stage2_idmap(u64 addr) 325 { 326 struct kvm_mem_range range; 327 bool is_memory = find_mem_range(addr, &range); 328 enum kvm_pgtable_prot prot; 329 int ret; 330 331 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; 332 333 hyp_spin_lock(&host_kvm.lock); 334 ret = host_stage2_adjust_range(addr, &range); 335 if (ret) 336 goto unlock; 337 338 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); 339 unlock: 340 hyp_spin_unlock(&host_kvm.lock); 341 342 return ret; 343 } 344 345 static inline bool check_prot(enum kvm_pgtable_prot prot, 346 enum kvm_pgtable_prot required, 347 enum kvm_pgtable_prot denied) 348 { 349 return (prot & (required | denied)) == required; 350 } 351 352 int __pkvm_host_share_hyp(u64 pfn) 353 { 354 phys_addr_t addr = hyp_pfn_to_phys(pfn); 355 enum kvm_pgtable_prot prot, cur; 356 void *virt = __hyp_va(addr); 357 enum pkvm_page_state state; 358 kvm_pte_t pte; 359 int ret; 360 361 if (!addr_is_memory(addr)) 362 return -EINVAL; 363 364 hyp_spin_lock(&host_kvm.lock); 365 hyp_spin_lock(&pkvm_pgd_lock); 366 367 ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); 368 if (ret) 369 goto unlock; 370 if (!pte) 371 goto map_shared; 372 373 /* 374 * Check attributes in the host stage-2 PTE. We need the page to be: 375 * - mapped RWX as we're sharing memory; 376 * - not borrowed, as that implies absence of ownership. 377 * Otherwise, we can't let it got through 378 */ 379 cur = kvm_pgtable_stage2_pte_prot(pte); 380 prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); 381 if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { 382 ret = -EPERM; 383 goto unlock; 384 } 385 386 state = pkvm_getstate(cur); 387 if (state == PKVM_PAGE_OWNED) 388 goto map_shared; 389 390 /* 391 * Tolerate double-sharing the same page, but this requires 392 * cross-checking the hypervisor stage-1. 393 */ 394 if (state != PKVM_PAGE_SHARED_OWNED) { 395 ret = -EPERM; 396 goto unlock; 397 } 398 399 ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); 400 if (ret) 401 goto unlock; 402 403 /* 404 * If the page has been shared with the hypervisor, it must be 405 * already mapped as SHARED_BORROWED in its stage-1. 406 */ 407 cur = kvm_pgtable_hyp_pte_prot(pte); 408 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 409 if (!check_prot(cur, prot, ~prot)) 410 ret = -EPERM; 411 goto unlock; 412 413 map_shared: 414 /* 415 * If the page is not yet shared, adjust mappings in both page-tables 416 * while both locks are held. 417 */ 418 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 419 ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); 420 BUG_ON(ret); 421 422 prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); 423 ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); 424 BUG_ON(ret); 425 426 unlock: 427 hyp_spin_unlock(&pkvm_pgd_lock); 428 hyp_spin_unlock(&host_kvm.lock); 429 430 return ret; 431 } 432 433 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) 434 { 435 struct kvm_vcpu_fault_info fault; 436 u64 esr, addr; 437 int ret = 0; 438 439 esr = read_sysreg_el2(SYS_ESR); 440 BUG_ON(!__get_fault_info(esr, &fault)); 441 442 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; 443 ret = host_stage2_idmap(addr); 444 BUG_ON(ret && ret != -EAGAIN); 445 } 446