xref: /openbmc/linux/arch/arm64/kvm/pkvm.c (revision c595db6d7c8bcf87ef42204391fa890e5950e566)
19429f4b0SWill Deacon // SPDX-License-Identifier: GPL-2.0
29429f4b0SWill Deacon /*
39429f4b0SWill Deacon  * Copyright (C) 2020 - Google LLC
49429f4b0SWill Deacon  * Author: Quentin Perret <qperret@google.com>
59429f4b0SWill Deacon  */
69429f4b0SWill Deacon 
787727ba2SWill Deacon #include <linux/init.h>
887727ba2SWill Deacon #include <linux/kmemleak.h>
99429f4b0SWill Deacon #include <linux/kvm_host.h>
109429f4b0SWill Deacon #include <linux/memblock.h>
119d0c063aSFuad Tabba #include <linux/mutex.h>
129429f4b0SWill Deacon #include <linux/sort.h>
139429f4b0SWill Deacon 
149429f4b0SWill Deacon #include <asm/kvm_pkvm.h>
159429f4b0SWill Deacon 
169429f4b0SWill Deacon #include "hyp_constants.h"
179429f4b0SWill Deacon 
1887727ba2SWill Deacon DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
1987727ba2SWill Deacon 
209429f4b0SWill Deacon static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
219429f4b0SWill Deacon static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
229429f4b0SWill Deacon 
239429f4b0SWill Deacon phys_addr_t hyp_mem_base;
249429f4b0SWill Deacon phys_addr_t hyp_mem_size;
259429f4b0SWill Deacon 
cmp_hyp_memblock(const void * p1,const void * p2)269429f4b0SWill Deacon static int cmp_hyp_memblock(const void *p1, const void *p2)
279429f4b0SWill Deacon {
289429f4b0SWill Deacon 	const struct memblock_region *r1 = p1;
299429f4b0SWill Deacon 	const struct memblock_region *r2 = p2;
309429f4b0SWill Deacon 
319429f4b0SWill Deacon 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
329429f4b0SWill Deacon }
339429f4b0SWill Deacon 
sort_memblock_regions(void)349429f4b0SWill Deacon static void __init sort_memblock_regions(void)
359429f4b0SWill Deacon {
369429f4b0SWill Deacon 	sort(hyp_memory,
379429f4b0SWill Deacon 	     *hyp_memblock_nr_ptr,
389429f4b0SWill Deacon 	     sizeof(struct memblock_region),
399429f4b0SWill Deacon 	     cmp_hyp_memblock,
409429f4b0SWill Deacon 	     NULL);
419429f4b0SWill Deacon }
429429f4b0SWill Deacon 
register_memblock_regions(void)439429f4b0SWill Deacon static int __init register_memblock_regions(void)
449429f4b0SWill Deacon {
459429f4b0SWill Deacon 	struct memblock_region *reg;
469429f4b0SWill Deacon 
479429f4b0SWill Deacon 	for_each_mem_region(reg) {
489429f4b0SWill Deacon 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
499429f4b0SWill Deacon 			return -ENOMEM;
509429f4b0SWill Deacon 
519429f4b0SWill Deacon 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
529429f4b0SWill Deacon 		(*hyp_memblock_nr_ptr)++;
539429f4b0SWill Deacon 	}
549429f4b0SWill Deacon 	sort_memblock_regions();
559429f4b0SWill Deacon 
569429f4b0SWill Deacon 	return 0;
579429f4b0SWill Deacon }
589429f4b0SWill Deacon 
kvm_hyp_reserve(void)599429f4b0SWill Deacon void __init kvm_hyp_reserve(void)
609429f4b0SWill Deacon {
618e6bcc3aSQuentin Perret 	u64 hyp_mem_pages = 0;
629429f4b0SWill Deacon 	int ret;
639429f4b0SWill Deacon 
649429f4b0SWill Deacon 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
659429f4b0SWill Deacon 		return;
669429f4b0SWill Deacon 
679429f4b0SWill Deacon 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
689429f4b0SWill Deacon 		return;
699429f4b0SWill Deacon 
709429f4b0SWill Deacon 	ret = register_memblock_regions();
719429f4b0SWill Deacon 	if (ret) {
729429f4b0SWill Deacon 		*hyp_memblock_nr_ptr = 0;
739429f4b0SWill Deacon 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
749429f4b0SWill Deacon 		return;
759429f4b0SWill Deacon 	}
769429f4b0SWill Deacon 
779429f4b0SWill Deacon 	hyp_mem_pages += hyp_s1_pgtable_pages();
789429f4b0SWill Deacon 	hyp_mem_pages += host_s2_pgtable_pages();
79a1ec5c70SFuad Tabba 	hyp_mem_pages += hyp_vm_table_pages();
808e6bcc3aSQuentin Perret 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
81bc3888a0SWill Deacon 	hyp_mem_pages += hyp_ffa_proxy_pages();
829429f4b0SWill Deacon 
839429f4b0SWill Deacon 	/*
849429f4b0SWill Deacon 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
859429f4b0SWill Deacon 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
869429f4b0SWill Deacon 	 */
879429f4b0SWill Deacon 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
889429f4b0SWill Deacon 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
899429f4b0SWill Deacon 					   PMD_SIZE);
909429f4b0SWill Deacon 	if (!hyp_mem_base)
919429f4b0SWill Deacon 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
929429f4b0SWill Deacon 	else
939429f4b0SWill Deacon 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
949429f4b0SWill Deacon 
959429f4b0SWill Deacon 	if (!hyp_mem_base) {
969429f4b0SWill Deacon 		kvm_err("Failed to reserve hyp memory\n");
979429f4b0SWill Deacon 		return;
989429f4b0SWill Deacon 	}
999429f4b0SWill Deacon 
1009429f4b0SWill Deacon 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
1019429f4b0SWill Deacon 		 hyp_mem_base);
1029429f4b0SWill Deacon }
1039d0c063aSFuad Tabba 
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)104*3d16cebfSSebastian Ene static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
105*3d16cebfSSebastian Ene {
106*3d16cebfSSebastian Ene 	if (host_kvm->arch.pkvm.handle) {
107*3d16cebfSSebastian Ene 		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
108*3d16cebfSSebastian Ene 					  host_kvm->arch.pkvm.handle));
109*3d16cebfSSebastian Ene 	}
110*3d16cebfSSebastian Ene 
111*3d16cebfSSebastian Ene 	host_kvm->arch.pkvm.handle = 0;
112*3d16cebfSSebastian Ene 	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
113*3d16cebfSSebastian Ene }
114*3d16cebfSSebastian Ene 
1159d0c063aSFuad Tabba /*
1169d0c063aSFuad Tabba  * Allocates and donates memory for hypervisor VM structs at EL2.
1179d0c063aSFuad Tabba  *
1189d0c063aSFuad Tabba  * Allocates space for the VM state, which includes the hyp vm as well as
1199d0c063aSFuad Tabba  * the hyp vcpus.
1209d0c063aSFuad Tabba  *
1219d0c063aSFuad Tabba  * Stores an opaque handler in the kvm struct for future reference.
1229d0c063aSFuad Tabba  *
1239d0c063aSFuad Tabba  * Return 0 on success, negative error code on failure.
1249d0c063aSFuad Tabba  */
__pkvm_create_hyp_vm(struct kvm * host_kvm)1259d0c063aSFuad Tabba static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
1269d0c063aSFuad Tabba {
1279d0c063aSFuad Tabba 	size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
1289d0c063aSFuad Tabba 	struct kvm_vcpu *host_vcpu;
1299d0c063aSFuad Tabba 	pkvm_handle_t handle;
1309d0c063aSFuad Tabba 	void *pgd, *hyp_vm;
1319d0c063aSFuad Tabba 	unsigned long idx;
1329d0c063aSFuad Tabba 	int ret;
1339d0c063aSFuad Tabba 
1349d0c063aSFuad Tabba 	if (host_kvm->created_vcpus < 1)
1359d0c063aSFuad Tabba 		return -EINVAL;
1369d0c063aSFuad Tabba 
1379d0c063aSFuad Tabba 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
1389d0c063aSFuad Tabba 
1399d0c063aSFuad Tabba 	/*
1409d0c063aSFuad Tabba 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
1419d0c063aSFuad Tabba 	 * page granularity. So, use alloc_pages_exact() to get individual
1429d0c063aSFuad Tabba 	 * refcounts.
1439d0c063aSFuad Tabba 	 */
1449d0c063aSFuad Tabba 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
1459d0c063aSFuad Tabba 	if (!pgd)
1469d0c063aSFuad Tabba 		return -ENOMEM;
1479d0c063aSFuad Tabba 
1489d0c063aSFuad Tabba 	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
1499d0c063aSFuad Tabba 	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
1509d0c063aSFuad Tabba 					size_mul(sizeof(void *),
1519d0c063aSFuad Tabba 						 host_kvm->created_vcpus)));
1529d0c063aSFuad Tabba 	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
1539d0c063aSFuad Tabba 	if (!hyp_vm) {
1549d0c063aSFuad Tabba 		ret = -ENOMEM;
1559d0c063aSFuad Tabba 		goto free_pgd;
1569d0c063aSFuad Tabba 	}
1579d0c063aSFuad Tabba 
1589d0c063aSFuad Tabba 	/* Donate the VM memory to hyp and let hyp initialize it. */
1599d0c063aSFuad Tabba 	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
1609d0c063aSFuad Tabba 	if (ret < 0)
1619d0c063aSFuad Tabba 		goto free_vm;
1629d0c063aSFuad Tabba 
1639d0c063aSFuad Tabba 	handle = ret;
1649d0c063aSFuad Tabba 
1659d0c063aSFuad Tabba 	host_kvm->arch.pkvm.handle = handle;
1669d0c063aSFuad Tabba 
1679d0c063aSFuad Tabba 	/* Donate memory for the vcpus at hyp and initialize it. */
1689d0c063aSFuad Tabba 	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
1699d0c063aSFuad Tabba 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
1709d0c063aSFuad Tabba 		void *hyp_vcpu;
1719d0c063aSFuad Tabba 
1729d0c063aSFuad Tabba 		/* Indexing of the vcpus to be sequential starting at 0. */
1739d0c063aSFuad Tabba 		if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
1749d0c063aSFuad Tabba 			ret = -EINVAL;
1759d0c063aSFuad Tabba 			goto destroy_vm;
1769d0c063aSFuad Tabba 		}
1779d0c063aSFuad Tabba 
1789d0c063aSFuad Tabba 		hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
1799d0c063aSFuad Tabba 		if (!hyp_vcpu) {
1809d0c063aSFuad Tabba 			ret = -ENOMEM;
1819d0c063aSFuad Tabba 			goto destroy_vm;
1829d0c063aSFuad Tabba 		}
1839d0c063aSFuad Tabba 
1849d0c063aSFuad Tabba 		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
1859d0c063aSFuad Tabba 					hyp_vcpu);
186f41dff4eSQuentin Perret 		if (ret) {
187f41dff4eSQuentin Perret 			free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
1889d0c063aSFuad Tabba 			goto destroy_vm;
1899d0c063aSFuad Tabba 		}
190f41dff4eSQuentin Perret 	}
1919d0c063aSFuad Tabba 
1929d0c063aSFuad Tabba 	return 0;
1939d0c063aSFuad Tabba 
1949d0c063aSFuad Tabba destroy_vm:
195*3d16cebfSSebastian Ene 	__pkvm_destroy_hyp_vm(host_kvm);
1969d0c063aSFuad Tabba 	return ret;
1979d0c063aSFuad Tabba free_vm:
1989d0c063aSFuad Tabba 	free_pages_exact(hyp_vm, hyp_vm_sz);
1999d0c063aSFuad Tabba free_pgd:
2009d0c063aSFuad Tabba 	free_pages_exact(pgd, pgd_sz);
2019d0c063aSFuad Tabba 	return ret;
2029d0c063aSFuad Tabba }
2039d0c063aSFuad Tabba 
pkvm_create_hyp_vm(struct kvm * host_kvm)2049d0c063aSFuad Tabba int pkvm_create_hyp_vm(struct kvm *host_kvm)
2059d0c063aSFuad Tabba {
2069d0c063aSFuad Tabba 	int ret = 0;
2079d0c063aSFuad Tabba 
208*3d16cebfSSebastian Ene 	mutex_lock(&host_kvm->arch.config_lock);
2099d0c063aSFuad Tabba 	if (!host_kvm->arch.pkvm.handle)
2109d0c063aSFuad Tabba 		ret = __pkvm_create_hyp_vm(host_kvm);
211*3d16cebfSSebastian Ene 	mutex_unlock(&host_kvm->arch.config_lock);
2129d0c063aSFuad Tabba 
2139d0c063aSFuad Tabba 	return ret;
2149d0c063aSFuad Tabba }
2159d0c063aSFuad Tabba 
pkvm_destroy_hyp_vm(struct kvm * host_kvm)2169d0c063aSFuad Tabba void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
2179d0c063aSFuad Tabba {
218*3d16cebfSSebastian Ene 	mutex_lock(&host_kvm->arch.config_lock);
219*3d16cebfSSebastian Ene 	__pkvm_destroy_hyp_vm(host_kvm);
220*3d16cebfSSebastian Ene 	mutex_unlock(&host_kvm->arch.config_lock);
2219d0c063aSFuad Tabba }
2229d0c063aSFuad Tabba 
pkvm_init_host_vm(struct kvm * host_kvm)2239d0c063aSFuad Tabba int pkvm_init_host_vm(struct kvm *host_kvm)
2249d0c063aSFuad Tabba {
2259d0c063aSFuad Tabba 	mutex_init(&host_kvm->lock);
2269d0c063aSFuad Tabba 	return 0;
2279d0c063aSFuad Tabba }
22887727ba2SWill Deacon 
_kvm_host_prot_finalize(void * arg)22987727ba2SWill Deacon static void __init _kvm_host_prot_finalize(void *arg)
23087727ba2SWill Deacon {
23187727ba2SWill Deacon 	int *err = arg;
23287727ba2SWill Deacon 
23387727ba2SWill Deacon 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
23487727ba2SWill Deacon 		WRITE_ONCE(*err, -EINVAL);
23587727ba2SWill Deacon }
23687727ba2SWill Deacon 
pkvm_drop_host_privileges(void)23787727ba2SWill Deacon static int __init pkvm_drop_host_privileges(void)
23887727ba2SWill Deacon {
23987727ba2SWill Deacon 	int ret = 0;
24087727ba2SWill Deacon 
24187727ba2SWill Deacon 	/*
24287727ba2SWill Deacon 	 * Flip the static key upfront as that may no longer be possible
24387727ba2SWill Deacon 	 * once the host stage 2 is installed.
24487727ba2SWill Deacon 	 */
24587727ba2SWill Deacon 	static_branch_enable(&kvm_protected_mode_initialized);
24687727ba2SWill Deacon 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
24787727ba2SWill Deacon 	return ret;
24887727ba2SWill Deacon }
24987727ba2SWill Deacon 
finalize_pkvm(void)25087727ba2SWill Deacon static int __init finalize_pkvm(void)
25187727ba2SWill Deacon {
25287727ba2SWill Deacon 	int ret;
25387727ba2SWill Deacon 
254fa729bc7SSudeep Holla 	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
25587727ba2SWill Deacon 		return 0;
25687727ba2SWill Deacon 
25787727ba2SWill Deacon 	/*
25887727ba2SWill Deacon 	 * Exclude HYP sections from kmemleak so that they don't get peeked
25987727ba2SWill Deacon 	 * at, which would end badly once inaccessible.
26087727ba2SWill Deacon 	 */
26187727ba2SWill Deacon 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
26287727ba2SWill Deacon 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
26387727ba2SWill Deacon 
26487727ba2SWill Deacon 	ret = pkvm_drop_host_privileges();
26587727ba2SWill Deacon 	if (ret)
26687727ba2SWill Deacon 		pr_err("Failed to finalize Hyp protection: %d\n", ret);
26787727ba2SWill Deacon 
26887727ba2SWill Deacon 	return ret;
26987727ba2SWill Deacon }
27087727ba2SWill Deacon device_initcall_sync(finalize_pkvm);
271