1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
29e04ba69SPaul Mackerras /*
39e04ba69SPaul Mackerras  *
49e04ba69SPaul Mackerras  * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
59e04ba69SPaul Mackerras  */
69e04ba69SPaul Mackerras 
79e04ba69SPaul Mackerras #include <linux/types.h>
89e04ba69SPaul Mackerras #include <linux/string.h>
99e04ba69SPaul Mackerras #include <linux/kvm.h>
109e04ba69SPaul Mackerras #include <linux/kvm_host.h>
119a94d3eeSPaul Mackerras #include <linux/anon_inodes.h>
129a94d3eeSPaul Mackerras #include <linux/file.h>
139a94d3eeSPaul Mackerras #include <linux/debugfs.h>
1465fddcfcSMike Rapoport #include <linux/pgtable.h>
159e04ba69SPaul Mackerras 
169e04ba69SPaul Mackerras #include <asm/kvm_ppc.h>
179e04ba69SPaul Mackerras #include <asm/kvm_book3s.h>
18*abcaadd4SJordan Niethe #include "book3s_hv.h"
199e04ba69SPaul Mackerras #include <asm/page.h>
209e04ba69SPaul Mackerras #include <asm/mmu.h>
219e04ba69SPaul Mackerras #include <asm/pgalloc.h>
2294171b19SAneesh Kumar K.V #include <asm/pte-walk.h>
23008e359cSBharata B Rao #include <asm/ultravisor.h>
24008e359cSBharata B Rao #include <asm/kvm_book3s_uvmem.h>
2581468083SBharata B Rao #include <asm/plpar_wrappers.h>
2646d60bdbSChristophe Leroy #include <asm/firmware.h>
279e04ba69SPaul Mackerras 
289e04ba69SPaul Mackerras /*
299e04ba69SPaul Mackerras  * Supported radix tree geometry.
309e04ba69SPaul Mackerras  * Like p9, we support either 5 or 9 bits at the first (lowest) level,
319e04ba69SPaul Mackerras  * for a page size of 64k or 4k.
329e04ba69SPaul Mackerras  */
339e04ba69SPaul Mackerras static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
349e04ba69SPaul Mackerras 
__kvmhv_copy_tofrom_guest_radix(int lpid,int pid,gva_t eaddr,void * to,void * from,unsigned long n)356ff887b8SSuraj Jitindar Singh unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
36d7b45615SSuraj Jitindar Singh 					      gva_t eaddr, void *to, void *from,
37d7b45615SSuraj Jitindar Singh 					      unsigned long n)
38d7b45615SSuraj Jitindar Singh {
393f649ab7SKees Cook 	int old_pid, old_lpid;
40d7b45615SSuraj Jitindar Singh 	unsigned long quadrant, ret = n;
41d7b45615SSuraj Jitindar Singh 	bool is_load = !!to;
42d7b45615SSuraj Jitindar Singh 
4395d386c2SSuraj Jitindar Singh 	/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
4495d386c2SSuraj Jitindar Singh 	if (kvmhv_on_pseries())
4595d386c2SSuraj Jitindar Singh 		return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
46c1ed1754SAneesh Kumar K.V 					  (to != NULL) ? __pa(to): 0,
47c1ed1754SAneesh Kumar K.V 					  (from != NULL) ? __pa(from): 0, n);
48d7b45615SSuraj Jitindar Singh 
49c232461cSFabiano Rosas 	if (eaddr & (0xFFFUL << 52))
50c232461cSFabiano Rosas 		return ret;
51c232461cSFabiano Rosas 
52d7b45615SSuraj Jitindar Singh 	quadrant = 1;
53d7b45615SSuraj Jitindar Singh 	if (!pid)
54d7b45615SSuraj Jitindar Singh 		quadrant = 2;
55d7b45615SSuraj Jitindar Singh 	if (is_load)
56d7b45615SSuraj Jitindar Singh 		from = (void *) (eaddr | (quadrant << 62));
57d7b45615SSuraj Jitindar Singh 	else
58d7b45615SSuraj Jitindar Singh 		to = (void *) (eaddr | (quadrant << 62));
59d7b45615SSuraj Jitindar Singh 
60d7b45615SSuraj Jitindar Singh 	preempt_disable();
61d7b45615SSuraj Jitindar Singh 
62cf3b16cfSNicholas Piggin 	asm volatile("hwsync" ::: "memory");
63cf3b16cfSNicholas Piggin 	isync();
64d7b45615SSuraj Jitindar Singh 	/* switch the lpid first to avoid running host with unallocated pid */
65d7b45615SSuraj Jitindar Singh 	old_lpid = mfspr(SPRN_LPID);
66d7b45615SSuraj Jitindar Singh 	if (old_lpid != lpid)
67d7b45615SSuraj Jitindar Singh 		mtspr(SPRN_LPID, lpid);
68d7b45615SSuraj Jitindar Singh 	if (quadrant == 1) {
69d7b45615SSuraj Jitindar Singh 		old_pid = mfspr(SPRN_PID);
70d7b45615SSuraj Jitindar Singh 		if (old_pid != pid)
71d7b45615SSuraj Jitindar Singh 			mtspr(SPRN_PID, pid);
72d7b45615SSuraj Jitindar Singh 	}
73d7b45615SSuraj Jitindar Singh 	isync();
74d7b45615SSuraj Jitindar Singh 
755d7d6dacSFabiano Rosas 	pagefault_disable();
76d7b45615SSuraj Jitindar Singh 	if (is_load)
775d7d6dacSFabiano Rosas 		ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
78d7b45615SSuraj Jitindar Singh 	else
795d7d6dacSFabiano Rosas 		ret = __copy_to_user_inatomic((void __user *)to, from, n);
805d7d6dacSFabiano Rosas 	pagefault_enable();
81d7b45615SSuraj Jitindar Singh 
82cf3b16cfSNicholas Piggin 	asm volatile("hwsync" ::: "memory");
83cf3b16cfSNicholas Piggin 	isync();
84d7b45615SSuraj Jitindar Singh 	/* switch the pid first to avoid running host with unallocated pid */
85d7b45615SSuraj Jitindar Singh 	if (quadrant == 1 && pid != old_pid)
86d7b45615SSuraj Jitindar Singh 		mtspr(SPRN_PID, old_pid);
87d7b45615SSuraj Jitindar Singh 	if (lpid != old_lpid)
88d7b45615SSuraj Jitindar Singh 		mtspr(SPRN_LPID, old_lpid);
89d7b45615SSuraj Jitindar Singh 	isync();
90d7b45615SSuraj Jitindar Singh 
91d7b45615SSuraj Jitindar Singh 	preempt_enable();
92d7b45615SSuraj Jitindar Singh 
93d7b45615SSuraj Jitindar Singh 	return ret;
94d7b45615SSuraj Jitindar Singh }
95d7b45615SSuraj Jitindar Singh 
kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu * vcpu,gva_t eaddr,void * to,void * from,unsigned long n)96d7b45615SSuraj Jitindar Singh static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
97d7b45615SSuraj Jitindar Singh 					  void *to, void *from, unsigned long n)
98d7b45615SSuraj Jitindar Singh {
99d7b45615SSuraj Jitindar Singh 	int lpid = vcpu->kvm->arch.lpid;
100d7b45615SSuraj Jitindar Singh 	int pid = vcpu->arch.pid;
101d7b45615SSuraj Jitindar Singh 
102d7b45615SSuraj Jitindar Singh 	/* This would cause a data segment intr so don't allow the access */
103d7b45615SSuraj Jitindar Singh 	if (eaddr & (0x3FFUL << 52))
104d7b45615SSuraj Jitindar Singh 		return -EINVAL;
105d7b45615SSuraj Jitindar Singh 
106d7b45615SSuraj Jitindar Singh 	/* Should we be using the nested lpid */
107d7b45615SSuraj Jitindar Singh 	if (vcpu->arch.nested)
108d7b45615SSuraj Jitindar Singh 		lpid = vcpu->arch.nested->shadow_lpid;
109d7b45615SSuraj Jitindar Singh 
110d7b45615SSuraj Jitindar Singh 	/* If accessing quadrant 3 then pid is expected to be 0 */
111d7b45615SSuraj Jitindar Singh 	if (((eaddr >> 62) & 0x3) == 0x3)
112d7b45615SSuraj Jitindar Singh 		pid = 0;
113d7b45615SSuraj Jitindar Singh 
114d7b45615SSuraj Jitindar Singh 	eaddr &= ~(0xFFFUL << 52);
115d7b45615SSuraj Jitindar Singh 
116d7b45615SSuraj Jitindar Singh 	return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
117d7b45615SSuraj Jitindar Singh }
118d7b45615SSuraj Jitindar Singh 
kvmhv_copy_from_guest_radix(struct kvm_vcpu * vcpu,gva_t eaddr,void * to,unsigned long n)119d7b45615SSuraj Jitindar Singh long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
120d7b45615SSuraj Jitindar Singh 				 unsigned long n)
121d7b45615SSuraj Jitindar Singh {
122d7b45615SSuraj Jitindar Singh 	long ret;
123d7b45615SSuraj Jitindar Singh 
124d7b45615SSuraj Jitindar Singh 	ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
125d7b45615SSuraj Jitindar Singh 	if (ret > 0)
126d7b45615SSuraj Jitindar Singh 		memset(to + (n - ret), 0, ret);
127d7b45615SSuraj Jitindar Singh 
128d7b45615SSuraj Jitindar Singh 	return ret;
129d7b45615SSuraj Jitindar Singh }
130d7b45615SSuraj Jitindar Singh 
kvmhv_copy_to_guest_radix(struct kvm_vcpu * vcpu,gva_t eaddr,void * from,unsigned long n)131d7b45615SSuraj Jitindar Singh long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
132d7b45615SSuraj Jitindar Singh 			       unsigned long n)
133d7b45615SSuraj Jitindar Singh {
134d7b45615SSuraj Jitindar Singh 	return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
135d7b45615SSuraj Jitindar Singh }
136d7b45615SSuraj Jitindar Singh 
kvmppc_mmu_walk_radix_tree(struct kvm_vcpu * vcpu,gva_t eaddr,struct kvmppc_pte * gpte,u64 root,u64 * pte_ret_p)137fd10be25SSuraj Jitindar Singh int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
138fd10be25SSuraj Jitindar Singh 			       struct kvmppc_pte *gpte, u64 root,
139fd10be25SSuraj Jitindar Singh 			       u64 *pte_ret_p)
1409e04ba69SPaul Mackerras {
1419e04ba69SPaul Mackerras 	struct kvm *kvm = vcpu->kvm;
1429e04ba69SPaul Mackerras 	int ret, level, ps;
143fd10be25SSuraj Jitindar Singh 	unsigned long rts, bits, offset, index;
1449811c78eSSuraj Jitindar Singh 	u64 pte, base, gpa;
1459811c78eSSuraj Jitindar Singh 	__be64 rpte;
1469e04ba69SPaul Mackerras 
1479e04ba69SPaul Mackerras 	rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
1489e04ba69SPaul Mackerras 		((root & RTS2_MASK) >> RTS2_SHIFT);
1499e04ba69SPaul Mackerras 	bits = root & RPDS_MASK;
1509811c78eSSuraj Jitindar Singh 	base = root & RPDB_MASK;
1519e04ba69SPaul Mackerras 
1529e04ba69SPaul Mackerras 	offset = rts + 31;
1539e04ba69SPaul Mackerras 
1549811c78eSSuraj Jitindar Singh 	/* Current implementations only support 52-bit space */
1559e04ba69SPaul Mackerras 	if (offset != 52)
1569e04ba69SPaul Mackerras 		return -EINVAL;
1579e04ba69SPaul Mackerras 
1589811c78eSSuraj Jitindar Singh 	/* Walk each level of the radix tree */
1599e04ba69SPaul Mackerras 	for (level = 3; level >= 0; --level) {
160fd10be25SSuraj Jitindar Singh 		u64 addr;
1619811c78eSSuraj Jitindar Singh 		/* Check a valid size */
1629e04ba69SPaul Mackerras 		if (level && bits != p9_supported_radix_bits[level])
1639e04ba69SPaul Mackerras 			return -EINVAL;
1649e04ba69SPaul Mackerras 		if (level == 0 && !(bits == 5 || bits == 9))
1659e04ba69SPaul Mackerras 			return -EINVAL;
1669e04ba69SPaul Mackerras 		offset -= bits;
1679e04ba69SPaul Mackerras 		index = (eaddr >> offset) & ((1UL << bits) - 1);
1689811c78eSSuraj Jitindar Singh 		/* Check that low bits of page table base are zero */
1699811c78eSSuraj Jitindar Singh 		if (base & ((1UL << (bits + 3)) - 1))
1709e04ba69SPaul Mackerras 			return -EINVAL;
1719811c78eSSuraj Jitindar Singh 		/* Read the entry from guest memory */
172fd10be25SSuraj Jitindar Singh 		addr = base + (index * sizeof(rpte));
1732031f287SSean Christopherson 
1742031f287SSean Christopherson 		kvm_vcpu_srcu_read_lock(vcpu);
175fd10be25SSuraj Jitindar Singh 		ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
1762031f287SSean Christopherson 		kvm_vcpu_srcu_read_unlock(vcpu);
177fd10be25SSuraj Jitindar Singh 		if (ret) {
178fd10be25SSuraj Jitindar Singh 			if (pte_ret_p)
179fd10be25SSuraj Jitindar Singh 				*pte_ret_p = addr;
1809e04ba69SPaul Mackerras 			return ret;
181fd10be25SSuraj Jitindar Singh 		}
1829e04ba69SPaul Mackerras 		pte = __be64_to_cpu(rpte);
1839e04ba69SPaul Mackerras 		if (!(pte & _PAGE_PRESENT))
1849e04ba69SPaul Mackerras 			return -ENOENT;
1859811c78eSSuraj Jitindar Singh 		/* Check if a leaf entry */
1869e04ba69SPaul Mackerras 		if (pte & _PAGE_PTE)
1879e04ba69SPaul Mackerras 			break;
1889811c78eSSuraj Jitindar Singh 		/* Get ready to walk the next level */
1899811c78eSSuraj Jitindar Singh 		base = pte & RPDB_MASK;
1909811c78eSSuraj Jitindar Singh 		bits = pte & RPDS_MASK;
1919e04ba69SPaul Mackerras 	}
1929811c78eSSuraj Jitindar Singh 
1939811c78eSSuraj Jitindar Singh 	/* Need a leaf at lowest level; 512GB pages not supported */
1949e04ba69SPaul Mackerras 	if (level < 0 || level == 3)
1959e04ba69SPaul Mackerras 		return -EINVAL;
1969e04ba69SPaul Mackerras 
1979811c78eSSuraj Jitindar Singh 	/* We found a valid leaf PTE */
1989811c78eSSuraj Jitindar Singh 	/* Offset is now log base 2 of the page size */
1999e04ba69SPaul Mackerras 	gpa = pte & 0x01fffffffffff000ul;
2009e04ba69SPaul Mackerras 	if (gpa & ((1ul << offset) - 1))
2019e04ba69SPaul Mackerras 		return -EINVAL;
2029811c78eSSuraj Jitindar Singh 	gpa |= eaddr & ((1ul << offset) - 1);
2039e04ba69SPaul Mackerras 	for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
2049e04ba69SPaul Mackerras 		if (offset == mmu_psize_defs[ps].shift)
2059e04ba69SPaul Mackerras 			break;
2069e04ba69SPaul Mackerras 	gpte->page_size = ps;
207fd10be25SSuraj Jitindar Singh 	gpte->page_shift = offset;
2089e04ba69SPaul Mackerras 
2099e04ba69SPaul Mackerras 	gpte->eaddr = eaddr;
2109e04ba69SPaul Mackerras 	gpte->raddr = gpa;
2119e04ba69SPaul Mackerras 
2129e04ba69SPaul Mackerras 	/* Work out permissions */
2139e04ba69SPaul Mackerras 	gpte->may_read = !!(pte & _PAGE_READ);
2149e04ba69SPaul Mackerras 	gpte->may_write = !!(pte & _PAGE_WRITE);
2159e04ba69SPaul Mackerras 	gpte->may_execute = !!(pte & _PAGE_EXEC);
2169811c78eSSuraj Jitindar Singh 
217fd10be25SSuraj Jitindar Singh 	gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
218fd10be25SSuraj Jitindar Singh 
2199811c78eSSuraj Jitindar Singh 	if (pte_ret_p)
2209811c78eSSuraj Jitindar Singh 		*pte_ret_p = pte;
2219811c78eSSuraj Jitindar Singh 
2229811c78eSSuraj Jitindar Singh 	return 0;
2239811c78eSSuraj Jitindar Singh }
2249811c78eSSuraj Jitindar Singh 
225fd10be25SSuraj Jitindar Singh /*
226fd10be25SSuraj Jitindar Singh  * Used to walk a partition or process table radix tree in guest memory
227fd10be25SSuraj Jitindar Singh  * Note: We exploit the fact that a partition table and a process
228fd10be25SSuraj Jitindar Singh  * table have the same layout, a partition-scoped page table and a
229fd10be25SSuraj Jitindar Singh  * process-scoped page table have the same layout, and the 2nd
230fd10be25SSuraj Jitindar Singh  * doubleword of a partition table entry has the same layout as
231fd10be25SSuraj Jitindar Singh  * the PTCR register.
232fd10be25SSuraj Jitindar Singh  */
kvmppc_mmu_radix_translate_table(struct kvm_vcpu * vcpu,gva_t eaddr,struct kvmppc_pte * gpte,u64 table,int table_index,u64 * pte_ret_p)233fd10be25SSuraj Jitindar Singh int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
234fd10be25SSuraj Jitindar Singh 				     struct kvmppc_pte *gpte, u64 table,
235fd10be25SSuraj Jitindar Singh 				     int table_index, u64 *pte_ret_p)
236fd10be25SSuraj Jitindar Singh {
237fd10be25SSuraj Jitindar Singh 	struct kvm *kvm = vcpu->kvm;
238fd10be25SSuraj Jitindar Singh 	int ret;
239fd10be25SSuraj Jitindar Singh 	unsigned long size, ptbl, root;
240fd10be25SSuraj Jitindar Singh 	struct prtb_entry entry;
241fd10be25SSuraj Jitindar Singh 
242fd10be25SSuraj Jitindar Singh 	if ((table & PRTS_MASK) > 24)
243fd10be25SSuraj Jitindar Singh 		return -EINVAL;
244fd10be25SSuraj Jitindar Singh 	size = 1ul << ((table & PRTS_MASK) + 12);
245fd10be25SSuraj Jitindar Singh 
246fd10be25SSuraj Jitindar Singh 	/* Is the table big enough to contain this entry? */
247fd10be25SSuraj Jitindar Singh 	if ((table_index * sizeof(entry)) >= size)
248fd10be25SSuraj Jitindar Singh 		return -EINVAL;
249fd10be25SSuraj Jitindar Singh 
250fd10be25SSuraj Jitindar Singh 	/* Read the table to find the root of the radix tree */
251fd10be25SSuraj Jitindar Singh 	ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
2522031f287SSean Christopherson 	kvm_vcpu_srcu_read_lock(vcpu);
253fd10be25SSuraj Jitindar Singh 	ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
2542031f287SSean Christopherson 	kvm_vcpu_srcu_read_unlock(vcpu);
255fd10be25SSuraj Jitindar Singh 	if (ret)
256fd10be25SSuraj Jitindar Singh 		return ret;
257fd10be25SSuraj Jitindar Singh 
258fd10be25SSuraj Jitindar Singh 	/* Root is stored in the first double word */
259fd10be25SSuraj Jitindar Singh 	root = be64_to_cpu(entry.prtb0);
260fd10be25SSuraj Jitindar Singh 
261fd10be25SSuraj Jitindar Singh 	return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
262fd10be25SSuraj Jitindar Singh }
263fd10be25SSuraj Jitindar Singh 
kvmppc_mmu_radix_xlate(struct kvm_vcpu * vcpu,gva_t eaddr,struct kvmppc_pte * gpte,bool data,bool iswrite)2649e04ba69SPaul Mackerras int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
2659e04ba69SPaul Mackerras 			   struct kvmppc_pte *gpte, bool data, bool iswrite)
2669e04ba69SPaul Mackerras {
2679e04ba69SPaul Mackerras 	u32 pid;
2689811c78eSSuraj Jitindar Singh 	u64 pte;
2699811c78eSSuraj Jitindar Singh 	int ret;
2709e04ba69SPaul Mackerras 
2719e04ba69SPaul Mackerras 	/* Work out effective PID */
2729e04ba69SPaul Mackerras 	switch (eaddr >> 62) {
2739e04ba69SPaul Mackerras 	case 0:
2749e04ba69SPaul Mackerras 		pid = vcpu->arch.pid;
2759e04ba69SPaul Mackerras 		break;
2769e04ba69SPaul Mackerras 	case 3:
2779e04ba69SPaul Mackerras 		pid = 0;
2789e04ba69SPaul Mackerras 		break;
2799e04ba69SPaul Mackerras 	default:
2809e04ba69SPaul Mackerras 		return -EINVAL;
2819e04ba69SPaul Mackerras 	}
2829e04ba69SPaul Mackerras 
2839811c78eSSuraj Jitindar Singh 	ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
2849811c78eSSuraj Jitindar Singh 				vcpu->kvm->arch.process_table, pid, &pte);
2859e04ba69SPaul Mackerras 	if (ret)
2869e04ba69SPaul Mackerras 		return ret;
2879e04ba69SPaul Mackerras 
2889811c78eSSuraj Jitindar Singh 	/* Check privilege (applies only to process scoped translations) */
2899e04ba69SPaul Mackerras 	if (kvmppc_get_msr(vcpu) & MSR_PR) {
2909e04ba69SPaul Mackerras 		if (pte & _PAGE_PRIVILEGED) {
2919e04ba69SPaul Mackerras 			gpte->may_read = 0;
2929e04ba69SPaul Mackerras 			gpte->may_write = 0;
2939e04ba69SPaul Mackerras 			gpte->may_execute = 0;
2949e04ba69SPaul Mackerras 		}
2959e04ba69SPaul Mackerras 	} else {
2969e04ba69SPaul Mackerras 		if (!(pte & _PAGE_PRIVILEGED)) {
2979e04ba69SPaul Mackerras 			/* Check AMR/IAMR to see if strict mode is in force */
298*abcaadd4SJordan Niethe 			if (kvmppc_get_amr_hv(vcpu) & (1ul << 62))
2999e04ba69SPaul Mackerras 				gpte->may_read = 0;
300*abcaadd4SJordan Niethe 			if (kvmppc_get_amr_hv(vcpu) & (1ul << 63))
3019e04ba69SPaul Mackerras 				gpte->may_write = 0;
3029e04ba69SPaul Mackerras 			if (vcpu->arch.iamr & (1ul << 62))
3039e04ba69SPaul Mackerras 				gpte->may_execute = 0;
3049e04ba69SPaul Mackerras 		}
3059e04ba69SPaul Mackerras 	}
3069e04ba69SPaul Mackerras 
3079e04ba69SPaul Mackerras 	return 0;
3089e04ba69SPaul Mackerras }
3099e04ba69SPaul Mackerras 
kvmppc_radix_tlbie_page(struct kvm * kvm,unsigned long addr,unsigned int pshift,unsigned int lpid)31090165d3dSSuraj Jitindar Singh void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
311fd10be25SSuraj Jitindar Singh 			     unsigned int pshift, unsigned int lpid)
3125a319350SPaul Mackerras {
313d91cb39fSNicholas Piggin 	unsigned long psize = PAGE_SIZE;
314690ed4caSPaul Mackerras 	int psi;
315690ed4caSPaul Mackerras 	long rc;
316690ed4caSPaul Mackerras 	unsigned long rb;
3175a319350SPaul Mackerras 
318d91cb39fSNicholas Piggin 	if (pshift)
319d91cb39fSNicholas Piggin 		psize = 1UL << pshift;
320690ed4caSPaul Mackerras 	else
321690ed4caSPaul Mackerras 		pshift = PAGE_SHIFT;
322d91cb39fSNicholas Piggin 
323d91cb39fSNicholas Piggin 	addr &= ~(psize - 1);
324690ed4caSPaul Mackerras 
325690ed4caSPaul Mackerras 	if (!kvmhv_on_pseries()) {
326fd10be25SSuraj Jitindar Singh 		radix__flush_tlb_lpid_page(lpid, addr, psize);
327690ed4caSPaul Mackerras 		return;
3285a319350SPaul Mackerras 	}
3295a319350SPaul Mackerras 
330690ed4caSPaul Mackerras 	psi = shift_to_mmu_psize(pshift);
33181468083SBharata B Rao 
33281468083SBharata B Rao 	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
333690ed4caSPaul Mackerras 		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
334690ed4caSPaul Mackerras 		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
335690ed4caSPaul Mackerras 					lpid, rb);
33681468083SBharata B Rao 	} else {
33781468083SBharata B Rao 		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
33881468083SBharata B Rao 					    H_RPTI_TYPE_NESTED |
33981468083SBharata B Rao 					    H_RPTI_TYPE_TLB,
34081468083SBharata B Rao 					    psize_to_rpti_pgsize(psi),
34181468083SBharata B Rao 					    addr, addr + psize);
34281468083SBharata B Rao 	}
34381468083SBharata B Rao 
344690ed4caSPaul Mackerras 	if (rc)
345690ed4caSPaul Mackerras 		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
3465a319350SPaul Mackerras }
3475a319350SPaul Mackerras 
kvmppc_radix_flush_pwc(struct kvm * kvm,unsigned int lpid)348fd10be25SSuraj Jitindar Singh static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
349c4c8a764SPaul Mackerras {
350690ed4caSPaul Mackerras 	long rc;
351690ed4caSPaul Mackerras 
352690ed4caSPaul Mackerras 	if (!kvmhv_on_pseries()) {
353fd10be25SSuraj Jitindar Singh 		radix__flush_pwc_lpid(lpid);
354690ed4caSPaul Mackerras 		return;
355690ed4caSPaul Mackerras 	}
356690ed4caSPaul Mackerras 
35781468083SBharata B Rao 	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
358690ed4caSPaul Mackerras 		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
359690ed4caSPaul Mackerras 					lpid, TLBIEL_INVAL_SET_LPID);
36081468083SBharata B Rao 	else
36181468083SBharata B Rao 		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
36281468083SBharata B Rao 					    H_RPTI_TYPE_NESTED |
36381468083SBharata B Rao 					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
36481468083SBharata B Rao 					    0, -1UL);
365690ed4caSPaul Mackerras 	if (rc)
366690ed4caSPaul Mackerras 		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
367c4c8a764SPaul Mackerras }
368c4c8a764SPaul Mackerras 
kvmppc_radix_update_pte(struct kvm * kvm,pte_t * ptep,unsigned long clr,unsigned long set,unsigned long addr,unsigned int shift)369878cf2bbSNicholas Piggin static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
3708f7b79b8SPaul Mackerras 				      unsigned long clr, unsigned long set,
3718f7b79b8SPaul Mackerras 				      unsigned long addr, unsigned int shift)
3725a319350SPaul Mackerras {
3732bf1071aSNicholas Piggin 	return __radix_pte_update(ptep, clr, set);
3745a319350SPaul Mackerras }
3755a319350SPaul Mackerras 
kvmppc_radix_set_pte_at(struct kvm * kvm,unsigned long addr,pte_t * ptep,pte_t pte)376cf59eb13SWang Wensheng static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
3775a319350SPaul Mackerras 			     pte_t *ptep, pte_t pte)
3785a319350SPaul Mackerras {
3795a319350SPaul Mackerras 	radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
3805a319350SPaul Mackerras }
3815a319350SPaul Mackerras 
3825a319350SPaul Mackerras static struct kmem_cache *kvm_pte_cache;
38321828c99SAneesh Kumar K.V static struct kmem_cache *kvm_pmd_cache;
3845a319350SPaul Mackerras 
kvmppc_pte_alloc(void)3855a319350SPaul Mackerras static pte_t *kvmppc_pte_alloc(void)
3865a319350SPaul Mackerras {
3870aca8a55SQian Cai 	pte_t *pte;
3880aca8a55SQian Cai 
3890aca8a55SQian Cai 	pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
3900aca8a55SQian Cai 	/* pmd_populate() will only reference _pa(pte). */
3910aca8a55SQian Cai 	kmemleak_ignore(pte);
3920aca8a55SQian Cai 
3930aca8a55SQian Cai 	return pte;
3945a319350SPaul Mackerras }
3955a319350SPaul Mackerras 
kvmppc_pte_free(pte_t * ptep)3965a319350SPaul Mackerras static void kvmppc_pte_free(pte_t *ptep)
3975a319350SPaul Mackerras {
3985a319350SPaul Mackerras 	kmem_cache_free(kvm_pte_cache, ptep);
3995a319350SPaul Mackerras }
4005a319350SPaul Mackerras 
kvmppc_pmd_alloc(void)40121828c99SAneesh Kumar K.V static pmd_t *kvmppc_pmd_alloc(void)
40221828c99SAneesh Kumar K.V {
4030aca8a55SQian Cai 	pmd_t *pmd;
4040aca8a55SQian Cai 
4050aca8a55SQian Cai 	pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
4060aca8a55SQian Cai 	/* pud_populate() will only reference _pa(pmd). */
4070aca8a55SQian Cai 	kmemleak_ignore(pmd);
4080aca8a55SQian Cai 
4090aca8a55SQian Cai 	return pmd;
41021828c99SAneesh Kumar K.V }
41121828c99SAneesh Kumar K.V 
kvmppc_pmd_free(pmd_t * pmdp)41221828c99SAneesh Kumar K.V static void kvmppc_pmd_free(pmd_t *pmdp)
41321828c99SAneesh Kumar K.V {
41421828c99SAneesh Kumar K.V 	kmem_cache_free(kvm_pmd_cache, pmdp);
41521828c99SAneesh Kumar K.V }
41621828c99SAneesh Kumar K.V 
4178cf531edSSuraj Jitindar Singh /* Called with kvm->mmu_lock held */
kvmppc_unmap_pte(struct kvm * kvm,pte_t * pte,unsigned long gpa,unsigned int shift,const struct kvm_memory_slot * memslot,unsigned int lpid)4188cf531edSSuraj Jitindar Singh void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
419c43c3a86SPaul Mackerras 		      unsigned int shift,
420c43c3a86SPaul Mackerras 		      const struct kvm_memory_slot *memslot,
421fd10be25SSuraj Jitindar Singh 		      unsigned int lpid)
422a5fad1e9SNicholas Piggin 
423a5fad1e9SNicholas Piggin {
424a5fad1e9SNicholas Piggin 	unsigned long old;
4258cf531edSSuraj Jitindar Singh 	unsigned long gfn = gpa >> PAGE_SHIFT;
4268cf531edSSuraj Jitindar Singh 	unsigned long page_size = PAGE_SIZE;
4278cf531edSSuraj Jitindar Singh 	unsigned long hpa;
428a5fad1e9SNicholas Piggin 
429a5fad1e9SNicholas Piggin 	old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
430fd10be25SSuraj Jitindar Singh 	kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
431a5fad1e9SNicholas Piggin 
4328cf531edSSuraj Jitindar Singh 	/* The following only applies to L1 entries */
4338cf531edSSuraj Jitindar Singh 	if (lpid != kvm->arch.lpid)
4348cf531edSSuraj Jitindar Singh 		return;
4358cf531edSSuraj Jitindar Singh 
4368cf531edSSuraj Jitindar Singh 	if (!memslot) {
437a5fad1e9SNicholas Piggin 		memslot = gfn_to_memslot(kvm, gfn);
4388cf531edSSuraj Jitindar Singh 		if (!memslot)
4398cf531edSSuraj Jitindar Singh 			return;
440a5fad1e9SNicholas Piggin 	}
4418f1f7b9bSSuraj Jitindar Singh 	if (shift) { /* 1GB or 2MB page */
442f0f825f0SPaul Mackerras 		page_size = 1ul << shift;
4438f1f7b9bSSuraj Jitindar Singh 		if (shift == PMD_SHIFT)
4448f1f7b9bSSuraj Jitindar Singh 			kvm->stat.num_2M_pages--;
4458f1f7b9bSSuraj Jitindar Singh 		else if (shift == PUD_SHIFT)
4468f1f7b9bSSuraj Jitindar Singh 			kvm->stat.num_1G_pages--;
4478f1f7b9bSSuraj Jitindar Singh 	}
4488cf531edSSuraj Jitindar Singh 
4498cf531edSSuraj Jitindar Singh 	gpa &= ~(page_size - 1);
4508cf531edSSuraj Jitindar Singh 	hpa = old & PTE_RPN_MASK;
4518cf531edSSuraj Jitindar Singh 	kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
4528cf531edSSuraj Jitindar Singh 
4538cf531edSSuraj Jitindar Singh 	if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
454a5fad1e9SNicholas Piggin 		kvmppc_update_dirty_map(memslot, gfn, page_size);
455a5fad1e9SNicholas Piggin }
456a5fad1e9SNicholas Piggin 
457a5704e83SNicholas Piggin /*
458a5704e83SNicholas Piggin  * kvmppc_free_p?d are used to free existing page tables, and recursively
459a5704e83SNicholas Piggin  * descend and clear and free children.
460a5704e83SNicholas Piggin  * Callers are responsible for flushing the PWC.
461a5704e83SNicholas Piggin  *
462a5704e83SNicholas Piggin  * When page tables are being unmapped/freed as part of page fault path
4633d89c2efSPaul Mackerras  * (full == false), valid ptes are generally not expected; however, there
4643d89c2efSPaul Mackerras  * is one situation where they arise, which is when dirty page logging is
4653d89c2efSPaul Mackerras  * turned off for a memslot while the VM is running.  The new memslot
4663d89c2efSPaul Mackerras  * becomes visible to page faults before the memslot commit function
4673d89c2efSPaul Mackerras  * gets to flush the memslot, which can lead to a 2MB page mapping being
4683d89c2efSPaul Mackerras  * installed for a guest physical address where there are already 64kB
4693d89c2efSPaul Mackerras  * (or 4kB) mappings (of sub-pages of the same 2MB page).
470a5704e83SNicholas Piggin  */
kvmppc_unmap_free_pte(struct kvm * kvm,pte_t * pte,bool full,unsigned int lpid)471fd10be25SSuraj Jitindar Singh static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
472fd10be25SSuraj Jitindar Singh 				  unsigned int lpid)
473a5704e83SNicholas Piggin {
474a5704e83SNicholas Piggin 	if (full) {
475afd31356SMichael Ellerman 		memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
476a5704e83SNicholas Piggin 	} else {
477a5704e83SNicholas Piggin 		pte_t *p = pte;
478a5704e83SNicholas Piggin 		unsigned long it;
479a5704e83SNicholas Piggin 
480a5704e83SNicholas Piggin 		for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
481a5704e83SNicholas Piggin 			if (pte_val(*p) == 0)
482a5704e83SNicholas Piggin 				continue;
483a5704e83SNicholas Piggin 			kvmppc_unmap_pte(kvm, p,
484a5704e83SNicholas Piggin 					 pte_pfn(*p) << PAGE_SHIFT,
485fd10be25SSuraj Jitindar Singh 					 PAGE_SHIFT, NULL, lpid);
486a5704e83SNicholas Piggin 		}
487a5704e83SNicholas Piggin 	}
488a5704e83SNicholas Piggin 
489a5704e83SNicholas Piggin 	kvmppc_pte_free(pte);
490a5704e83SNicholas Piggin }
491a5704e83SNicholas Piggin 
kvmppc_unmap_free_pmd(struct kvm * kvm,pmd_t * pmd,bool full,unsigned int lpid)492fd10be25SSuraj Jitindar Singh static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
493fd10be25SSuraj Jitindar Singh 				  unsigned int lpid)
494a5704e83SNicholas Piggin {
495a5704e83SNicholas Piggin 	unsigned long im;
496a5704e83SNicholas Piggin 	pmd_t *p = pmd;
497a5704e83SNicholas Piggin 
498a5704e83SNicholas Piggin 	for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
499a5704e83SNicholas Piggin 		if (!pmd_present(*p))
500a5704e83SNicholas Piggin 			continue;
501a5704e83SNicholas Piggin 		if (pmd_is_leaf(*p)) {
502a5704e83SNicholas Piggin 			if (full) {
503a5704e83SNicholas Piggin 				pmd_clear(p);
504a5704e83SNicholas Piggin 			} else {
505a5704e83SNicholas Piggin 				WARN_ON_ONCE(1);
506a5704e83SNicholas Piggin 				kvmppc_unmap_pte(kvm, (pte_t *)p,
507a5704e83SNicholas Piggin 					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
508fd10be25SSuraj Jitindar Singh 					 PMD_SHIFT, NULL, lpid);
509a5704e83SNicholas Piggin 			}
510a5704e83SNicholas Piggin 		} else {
511a5704e83SNicholas Piggin 			pte_t *pte;
512a5704e83SNicholas Piggin 
513d00ae31fSHugh Dickins 			pte = pte_offset_kernel(p, 0);
514fd10be25SSuraj Jitindar Singh 			kvmppc_unmap_free_pte(kvm, pte, full, lpid);
515a5704e83SNicholas Piggin 			pmd_clear(p);
516a5704e83SNicholas Piggin 		}
517a5704e83SNicholas Piggin 	}
518a5704e83SNicholas Piggin 	kvmppc_pmd_free(pmd);
519a5704e83SNicholas Piggin }
520a5704e83SNicholas Piggin 
kvmppc_unmap_free_pud(struct kvm * kvm,pud_t * pud,unsigned int lpid)521fd10be25SSuraj Jitindar Singh static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
522fd10be25SSuraj Jitindar Singh 				  unsigned int lpid)
523a5704e83SNicholas Piggin {
524a5704e83SNicholas Piggin 	unsigned long iu;
525a5704e83SNicholas Piggin 	pud_t *p = pud;
526a5704e83SNicholas Piggin 
527a5704e83SNicholas Piggin 	for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
528a5704e83SNicholas Piggin 		if (!pud_present(*p))
529a5704e83SNicholas Piggin 			continue;
530d6eaceddSAneesh Kumar K.V 		if (pud_is_leaf(*p)) {
531a5704e83SNicholas Piggin 			pud_clear(p);
532a5704e83SNicholas Piggin 		} else {
533a5704e83SNicholas Piggin 			pmd_t *pmd;
534a5704e83SNicholas Piggin 
535a5704e83SNicholas Piggin 			pmd = pmd_offset(p, 0);
536fd10be25SSuraj Jitindar Singh 			kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
537a5704e83SNicholas Piggin 			pud_clear(p);
538a5704e83SNicholas Piggin 		}
539a5704e83SNicholas Piggin 	}
540a5704e83SNicholas Piggin 	pud_free(kvm->mm, pud);
541a5704e83SNicholas Piggin }
542a5704e83SNicholas Piggin 
kvmppc_free_pgtable_radix(struct kvm * kvm,pgd_t * pgd,unsigned int lpid)543fd10be25SSuraj Jitindar Singh void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
544a5704e83SNicholas Piggin {
545a5704e83SNicholas Piggin 	unsigned long ig;
546a5704e83SNicholas Piggin 
547a5704e83SNicholas Piggin 	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
5482fb47060SMike Rapoport 		p4d_t *p4d = p4d_offset(pgd, 0);
549a5704e83SNicholas Piggin 		pud_t *pud;
550a5704e83SNicholas Piggin 
5512fb47060SMike Rapoport 		if (!p4d_present(*p4d))
552a5704e83SNicholas Piggin 			continue;
5532fb47060SMike Rapoport 		pud = pud_offset(p4d, 0);
554fd10be25SSuraj Jitindar Singh 		kvmppc_unmap_free_pud(kvm, pud, lpid);
5552fb47060SMike Rapoport 		p4d_clear(p4d);
556a5704e83SNicholas Piggin 	}
557fd10be25SSuraj Jitindar Singh }
558fd10be25SSuraj Jitindar Singh 
kvmppc_free_radix(struct kvm * kvm)559fd10be25SSuraj Jitindar Singh void kvmppc_free_radix(struct kvm *kvm)
560fd10be25SSuraj Jitindar Singh {
561fd10be25SSuraj Jitindar Singh 	if (kvm->arch.pgtable) {
562fd10be25SSuraj Jitindar Singh 		kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
563fd10be25SSuraj Jitindar Singh 					  kvm->arch.lpid);
564a5704e83SNicholas Piggin 		pgd_free(kvm->mm, kvm->arch.pgtable);
565a5704e83SNicholas Piggin 		kvm->arch.pgtable = NULL;
566a5704e83SNicholas Piggin 	}
567fd10be25SSuraj Jitindar Singh }
568a5704e83SNicholas Piggin 
kvmppc_unmap_free_pmd_entry_table(struct kvm * kvm,pmd_t * pmd,unsigned long gpa,unsigned int lpid)569a5704e83SNicholas Piggin static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
570fd10be25SSuraj Jitindar Singh 					unsigned long gpa, unsigned int lpid)
571a5704e83SNicholas Piggin {
572a5704e83SNicholas Piggin 	pte_t *pte = pte_offset_kernel(pmd, 0);
573a5704e83SNicholas Piggin 
574a5704e83SNicholas Piggin 	/*
575a5704e83SNicholas Piggin 	 * Clearing the pmd entry then flushing the PWC ensures that the pte
576a5704e83SNicholas Piggin 	 * page no longer be cached by the MMU, so can be freed without
577a5704e83SNicholas Piggin 	 * flushing the PWC again.
578a5704e83SNicholas Piggin 	 */
579a5704e83SNicholas Piggin 	pmd_clear(pmd);
580fd10be25SSuraj Jitindar Singh 	kvmppc_radix_flush_pwc(kvm, lpid);
581a5704e83SNicholas Piggin 
582fd10be25SSuraj Jitindar Singh 	kvmppc_unmap_free_pte(kvm, pte, false, lpid);
583a5704e83SNicholas Piggin }
584a5704e83SNicholas Piggin 
kvmppc_unmap_free_pud_entry_table(struct kvm * kvm,pud_t * pud,unsigned long gpa,unsigned int lpid)585a5704e83SNicholas Piggin static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
586fd10be25SSuraj Jitindar Singh 					unsigned long gpa, unsigned int lpid)
587a5704e83SNicholas Piggin {
588a5704e83SNicholas Piggin 	pmd_t *pmd = pmd_offset(pud, 0);
589a5704e83SNicholas Piggin 
590a5704e83SNicholas Piggin 	/*
591a5704e83SNicholas Piggin 	 * Clearing the pud entry then flushing the PWC ensures that the pmd
592a5704e83SNicholas Piggin 	 * page and any children pte pages will no longer be cached by the MMU,
593a5704e83SNicholas Piggin 	 * so can be freed without flushing the PWC again.
594a5704e83SNicholas Piggin 	 */
595a5704e83SNicholas Piggin 	pud_clear(pud);
596fd10be25SSuraj Jitindar Singh 	kvmppc_radix_flush_pwc(kvm, lpid);
597a5704e83SNicholas Piggin 
598fd10be25SSuraj Jitindar Singh 	kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
599a5704e83SNicholas Piggin }
600a5704e83SNicholas Piggin 
601878cf2bbSNicholas Piggin /*
602878cf2bbSNicholas Piggin  * There are a number of bits which may differ between different faults to
603878cf2bbSNicholas Piggin  * the same partition scope entry. RC bits, in the course of cleaning and
604878cf2bbSNicholas Piggin  * aging. And the write bit can change, either the access could have been
605878cf2bbSNicholas Piggin  * upgraded, or a read fault could happen concurrently with a write fault
606878cf2bbSNicholas Piggin  * that sets those bits first.
607878cf2bbSNicholas Piggin  */
608878cf2bbSNicholas Piggin #define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
609878cf2bbSNicholas Piggin 
kvmppc_create_pte(struct kvm * kvm,pgd_t * pgtable,pte_t pte,unsigned long gpa,unsigned int level,unsigned long mmu_seq,unsigned int lpid,unsigned long * rmapp,struct rmap_nested ** n_rmap)610fd10be25SSuraj Jitindar Singh int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
61104bae9d5SSuraj Jitindar Singh 		      unsigned long gpa, unsigned int level,
6128cf531edSSuraj Jitindar Singh 		      unsigned long mmu_seq, unsigned int lpid,
6138cf531edSSuraj Jitindar Singh 		      unsigned long *rmapp, struct rmap_nested **n_rmap)
6145a319350SPaul Mackerras {
6155a319350SPaul Mackerras 	pgd_t *pgd;
6162fb47060SMike Rapoport 	p4d_t *p4d;
6175a319350SPaul Mackerras 	pud_t *pud, *new_pud = NULL;
6185a319350SPaul Mackerras 	pmd_t *pmd, *new_pmd = NULL;
6195a319350SPaul Mackerras 	pte_t *ptep, *new_ptep = NULL;
6205a319350SPaul Mackerras 	int ret;
6215a319350SPaul Mackerras 
6225a319350SPaul Mackerras 	/* Traverse the guest's 2nd-level tree, allocate new levels needed */
62304bae9d5SSuraj Jitindar Singh 	pgd = pgtable + pgd_index(gpa);
6242fb47060SMike Rapoport 	p4d = p4d_offset(pgd, gpa);
6252fb47060SMike Rapoport 
6265a319350SPaul Mackerras 	pud = NULL;
6272fb47060SMike Rapoport 	if (p4d_present(*p4d))
6282fb47060SMike Rapoport 		pud = pud_offset(p4d, gpa);
6295a319350SPaul Mackerras 	else
6305a319350SPaul Mackerras 		new_pud = pud_alloc_one(kvm->mm, gpa);
6315a319350SPaul Mackerras 
6325a319350SPaul Mackerras 	pmd = NULL;
633d6eaceddSAneesh Kumar K.V 	if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
6345a319350SPaul Mackerras 		pmd = pmd_offset(pud, gpa);
63558c5c276SPaul Mackerras 	else if (level <= 1)
63621828c99SAneesh Kumar K.V 		new_pmd = kvmppc_pmd_alloc();
6375a319350SPaul Mackerras 
638c3856aebSPaul Mackerras 	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
6395a319350SPaul Mackerras 		new_ptep = kvmppc_pte_alloc();
6405a319350SPaul Mackerras 
6415a319350SPaul Mackerras 	/* Check if we might have been invalidated; let the guest retry if so */
6425a319350SPaul Mackerras 	spin_lock(&kvm->mmu_lock);
6435a319350SPaul Mackerras 	ret = -EAGAIN;
64420ec3ebdSChao Peng 	if (mmu_invalidate_retry(kvm, mmu_seq))
6455a319350SPaul Mackerras 		goto out_unlock;
6465a319350SPaul Mackerras 
6475a319350SPaul Mackerras 	/* Now traverse again under the lock and change the tree */
6485a319350SPaul Mackerras 	ret = -ENOMEM;
6492fb47060SMike Rapoport 	if (p4d_none(*p4d)) {
6505a319350SPaul Mackerras 		if (!new_pud)
6515a319350SPaul Mackerras 			goto out_unlock;
6522fb47060SMike Rapoport 		p4d_populate(kvm->mm, p4d, new_pud);
6535a319350SPaul Mackerras 		new_pud = NULL;
6545a319350SPaul Mackerras 	}
6552fb47060SMike Rapoport 	pud = pud_offset(p4d, gpa);
656d6eaceddSAneesh Kumar K.V 	if (pud_is_leaf(*pud)) {
65758c5c276SPaul Mackerras 		unsigned long hgpa = gpa & PUD_MASK;
65858c5c276SPaul Mackerras 
659878cf2bbSNicholas Piggin 		/* Check if we raced and someone else has set the same thing */
660878cf2bbSNicholas Piggin 		if (level == 2) {
661878cf2bbSNicholas Piggin 			if (pud_raw(*pud) == pte_raw(pte)) {
662878cf2bbSNicholas Piggin 				ret = 0;
663878cf2bbSNicholas Piggin 				goto out_unlock;
664878cf2bbSNicholas Piggin 			}
665878cf2bbSNicholas Piggin 			/* Valid 1GB page here already, add our extra bits */
666878cf2bbSNicholas Piggin 			WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
667878cf2bbSNicholas Piggin 							PTE_BITS_MUST_MATCH);
668878cf2bbSNicholas Piggin 			kvmppc_radix_update_pte(kvm, (pte_t *)pud,
669878cf2bbSNicholas Piggin 					      0, pte_val(pte), hgpa, PUD_SHIFT);
670878cf2bbSNicholas Piggin 			ret = 0;
671878cf2bbSNicholas Piggin 			goto out_unlock;
672878cf2bbSNicholas Piggin 		}
67358c5c276SPaul Mackerras 		/*
67458c5c276SPaul Mackerras 		 * If we raced with another CPU which has just put
67558c5c276SPaul Mackerras 		 * a 1GB pte in after we saw a pmd page, try again.
67658c5c276SPaul Mackerras 		 */
677878cf2bbSNicholas Piggin 		if (!new_pmd) {
67858c5c276SPaul Mackerras 			ret = -EAGAIN;
67958c5c276SPaul Mackerras 			goto out_unlock;
68058c5c276SPaul Mackerras 		}
68158c5c276SPaul Mackerras 		/* Valid 1GB page here already, remove it */
682fd10be25SSuraj Jitindar Singh 		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
683fd10be25SSuraj Jitindar Singh 				 lpid);
68458c5c276SPaul Mackerras 	}
68558c5c276SPaul Mackerras 	if (level == 2) {
68658c5c276SPaul Mackerras 		if (!pud_none(*pud)) {
68758c5c276SPaul Mackerras 			/*
68858c5c276SPaul Mackerras 			 * There's a page table page here, but we wanted to
68958c5c276SPaul Mackerras 			 * install a large page, so remove and free the page
690a5704e83SNicholas Piggin 			 * table page.
69158c5c276SPaul Mackerras 			 */
692fd10be25SSuraj Jitindar Singh 			kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
69358c5c276SPaul Mackerras 		}
69458c5c276SPaul Mackerras 		kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
6958cf531edSSuraj Jitindar Singh 		if (rmapp && n_rmap)
6968cf531edSSuraj Jitindar Singh 			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
69758c5c276SPaul Mackerras 		ret = 0;
69858c5c276SPaul Mackerras 		goto out_unlock;
69958c5c276SPaul Mackerras 	}
7005a319350SPaul Mackerras 	if (pud_none(*pud)) {
7015a319350SPaul Mackerras 		if (!new_pmd)
7025a319350SPaul Mackerras 			goto out_unlock;
7035a319350SPaul Mackerras 		pud_populate(kvm->mm, pud, new_pmd);
7045a319350SPaul Mackerras 		new_pmd = NULL;
7055a319350SPaul Mackerras 	}
7065a319350SPaul Mackerras 	pmd = pmd_offset(pud, gpa);
707c3856aebSPaul Mackerras 	if (pmd_is_leaf(*pmd)) {
708c3856aebSPaul Mackerras 		unsigned long lgpa = gpa & PMD_MASK;
709c3856aebSPaul Mackerras 
710878cf2bbSNicholas Piggin 		/* Check if we raced and someone else has set the same thing */
711878cf2bbSNicholas Piggin 		if (level == 1) {
712878cf2bbSNicholas Piggin 			if (pmd_raw(*pmd) == pte_raw(pte)) {
713878cf2bbSNicholas Piggin 				ret = 0;
714878cf2bbSNicholas Piggin 				goto out_unlock;
715878cf2bbSNicholas Piggin 			}
716878cf2bbSNicholas Piggin 			/* Valid 2MB page here already, add our extra bits */
717878cf2bbSNicholas Piggin 			WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
718878cf2bbSNicholas Piggin 							PTE_BITS_MUST_MATCH);
719878cf2bbSNicholas Piggin 			kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
720878cf2bbSNicholas Piggin 					0, pte_val(pte), lgpa, PMD_SHIFT);
721878cf2bbSNicholas Piggin 			ret = 0;
722878cf2bbSNicholas Piggin 			goto out_unlock;
723878cf2bbSNicholas Piggin 		}
724878cf2bbSNicholas Piggin 
725c3856aebSPaul Mackerras 		/*
726c3856aebSPaul Mackerras 		 * If we raced with another CPU which has just put
727c3856aebSPaul Mackerras 		 * a 2MB pte in after we saw a pte page, try again.
728c3856aebSPaul Mackerras 		 */
729878cf2bbSNicholas Piggin 		if (!new_ptep) {
7305a319350SPaul Mackerras 			ret = -EAGAIN;
7315a319350SPaul Mackerras 			goto out_unlock;
7325a319350SPaul Mackerras 		}
733c3856aebSPaul Mackerras 		/* Valid 2MB page here already, remove it */
734fd10be25SSuraj Jitindar Singh 		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
735fd10be25SSuraj Jitindar Singh 				 lpid);
73658c5c276SPaul Mackerras 	}
73758c5c276SPaul Mackerras 	if (level == 1) {
73858c5c276SPaul Mackerras 		if (!pmd_none(*pmd)) {
7395a319350SPaul Mackerras 			/*
740c4c8a764SPaul Mackerras 			 * There's a page table page here, but we wanted to
741c4c8a764SPaul Mackerras 			 * install a large page, so remove and free the page
742a5704e83SNicholas Piggin 			 * table page.
7435a319350SPaul Mackerras 			 */
744fd10be25SSuraj Jitindar Singh 			kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
7455a319350SPaul Mackerras 		}
74658c5c276SPaul Mackerras 		kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
7478cf531edSSuraj Jitindar Singh 		if (rmapp && n_rmap)
7488cf531edSSuraj Jitindar Singh 			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
74958c5c276SPaul Mackerras 		ret = 0;
7505a319350SPaul Mackerras 		goto out_unlock;
7515a319350SPaul Mackerras 	}
7525a319350SPaul Mackerras 	if (pmd_none(*pmd)) {
7535a319350SPaul Mackerras 		if (!new_ptep)
7545a319350SPaul Mackerras 			goto out_unlock;
7555a319350SPaul Mackerras 		pmd_populate(kvm->mm, pmd, new_ptep);
7565a319350SPaul Mackerras 		new_ptep = NULL;
7575a319350SPaul Mackerras 	}
7585a319350SPaul Mackerras 	ptep = pte_offset_kernel(pmd, gpa);
7595a319350SPaul Mackerras 	if (pte_present(*ptep)) {
760c4c8a764SPaul Mackerras 		/* Check if someone else set the same thing */
761c4c8a764SPaul Mackerras 		if (pte_raw(*ptep) == pte_raw(pte)) {
762c4c8a764SPaul Mackerras 			ret = 0;
763c4c8a764SPaul Mackerras 			goto out_unlock;
764c4c8a764SPaul Mackerras 		}
765878cf2bbSNicholas Piggin 		/* Valid page here already, add our extra bits */
766878cf2bbSNicholas Piggin 		WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
767878cf2bbSNicholas Piggin 							PTE_BITS_MUST_MATCH);
768878cf2bbSNicholas Piggin 		kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
769878cf2bbSNicholas Piggin 		ret = 0;
770878cf2bbSNicholas Piggin 		goto out_unlock;
7715a319350SPaul Mackerras 	}
7725a319350SPaul Mackerras 	kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
7738cf531edSSuraj Jitindar Singh 	if (rmapp && n_rmap)
7748cf531edSSuraj Jitindar Singh 		kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
7755a319350SPaul Mackerras 	ret = 0;
7765a319350SPaul Mackerras 
7775a319350SPaul Mackerras  out_unlock:
7785a319350SPaul Mackerras 	spin_unlock(&kvm->mmu_lock);
7795a319350SPaul Mackerras 	if (new_pud)
7805a319350SPaul Mackerras 		pud_free(kvm->mm, new_pud);
7815a319350SPaul Mackerras 	if (new_pmd)
78221828c99SAneesh Kumar K.V 		kvmppc_pmd_free(new_pmd);
7835a319350SPaul Mackerras 	if (new_ptep)
7845a319350SPaul Mackerras 		kvmppc_pte_free(new_ptep);
7855a319350SPaul Mackerras 	return ret;
7865a319350SPaul Mackerras }
7875a319350SPaul Mackerras 
kvmppc_hv_handle_set_rc(struct kvm * kvm,bool nested,bool writing,unsigned long gpa,unsigned int lpid)7886cdf3037SAneesh Kumar K.V bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
789fd10be25SSuraj Jitindar Singh 			     unsigned long gpa, unsigned int lpid)
7905a319350SPaul Mackerras {
7915a319350SPaul Mackerras 	unsigned long pgflags;
79204bae9d5SSuraj Jitindar Singh 	unsigned int shift;
79304bae9d5SSuraj Jitindar Singh 	pte_t *ptep;
7945a319350SPaul Mackerras 
7955a319350SPaul Mackerras 	/*
7965a319350SPaul Mackerras 	 * Need to set an R or C bit in the 2nd-level tables;
797f7caf712SPaul Mackerras 	 * since we are just helping out the hardware here,
798f7caf712SPaul Mackerras 	 * it is sufficient to do what the hardware does.
7995a319350SPaul Mackerras 	 */
8005a319350SPaul Mackerras 	pgflags = _PAGE_ACCESSED;
8015a319350SPaul Mackerras 	if (writing)
8025a319350SPaul Mackerras 		pgflags |= _PAGE_DIRTY;
8036cdf3037SAneesh Kumar K.V 
8046cdf3037SAneesh Kumar K.V 	if (nested)
8056cdf3037SAneesh Kumar K.V 		ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
8066cdf3037SAneesh Kumar K.V 	else
8076cdf3037SAneesh Kumar K.V 		ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
8086cdf3037SAneesh Kumar K.V 
80904bae9d5SSuraj Jitindar Singh 	if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
81004bae9d5SSuraj Jitindar Singh 		kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
81104bae9d5SSuraj Jitindar Singh 		return true;
8125a319350SPaul Mackerras 	}
81304bae9d5SSuraj Jitindar Singh 	return false;
8145a319350SPaul Mackerras }
8155a319350SPaul Mackerras 
kvmppc_book3s_instantiate_page(struct kvm_vcpu * vcpu,unsigned long gpa,struct kvm_memory_slot * memslot,bool writing,bool kvm_ro,pte_t * inserted_pte,unsigned int * levelp)816fd10be25SSuraj Jitindar Singh int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
81704bae9d5SSuraj Jitindar Singh 				   unsigned long gpa,
81804bae9d5SSuraj Jitindar Singh 				   struct kvm_memory_slot *memslot,
81904bae9d5SSuraj Jitindar Singh 				   bool writing, bool kvm_ro,
82004bae9d5SSuraj Jitindar Singh 				   pte_t *inserted_pte, unsigned int *levelp)
82104bae9d5SSuraj Jitindar Singh {
82204bae9d5SSuraj Jitindar Singh 	struct kvm *kvm = vcpu->kvm;
82304bae9d5SSuraj Jitindar Singh 	struct page *page = NULL;
82404bae9d5SSuraj Jitindar Singh 	unsigned long mmu_seq;
82504bae9d5SSuraj Jitindar Singh 	unsigned long hva, gfn = gpa >> PAGE_SHIFT;
82604bae9d5SSuraj Jitindar Singh 	bool upgrade_write = false;
82704bae9d5SSuraj Jitindar Singh 	bool *upgrade_p = &upgrade_write;
82804bae9d5SSuraj Jitindar Singh 	pte_t pte, *ptep;
82904bae9d5SSuraj Jitindar Singh 	unsigned int shift, level;
83004bae9d5SSuraj Jitindar Singh 	int ret;
831f460f679SPaul Mackerras 	bool large_enable;
83204bae9d5SSuraj Jitindar Singh 
83331c8b0d0SPaul Mackerras 	/* used to check for invalidations in progress */
83420ec3ebdSChao Peng 	mmu_seq = kvm->mmu_invalidate_seq;
83531c8b0d0SPaul Mackerras 	smp_rmb();
83631c8b0d0SPaul Mackerras 
83731c8b0d0SPaul Mackerras 	/*
83831c8b0d0SPaul Mackerras 	 * Do a fast check first, since __gfn_to_pfn_memslot doesn't
83931c8b0d0SPaul Mackerras 	 * do it with !atomic && !async, which is how we call it.
84031c8b0d0SPaul Mackerras 	 * We always ask for write permission since the common case
84131c8b0d0SPaul Mackerras 	 * is that the page is writable.
84231c8b0d0SPaul Mackerras 	 */
84331c8b0d0SPaul Mackerras 	hva = gfn_to_hva_memslot(memslot, gfn);
844dadbb612SSouptick Joarder 	if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
84531c8b0d0SPaul Mackerras 		upgrade_write = true;
84631c8b0d0SPaul Mackerras 	} else {
84771d29f43SNicholas Piggin 		unsigned long pfn;
84871d29f43SNicholas Piggin 
84931c8b0d0SPaul Mackerras 		/* Call KVM generic code to do the slow-path check */
850c8b88b33SPeter Xu 		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
8514a42d848SDavid Stevens 					   writing, upgrade_p, NULL);
85231c8b0d0SPaul Mackerras 		if (is_error_noslot_pfn(pfn))
85331c8b0d0SPaul Mackerras 			return -EFAULT;
85431c8b0d0SPaul Mackerras 		page = NULL;
85531c8b0d0SPaul Mackerras 		if (pfn_valid(pfn)) {
85631c8b0d0SPaul Mackerras 			page = pfn_to_page(pfn);
85731c8b0d0SPaul Mackerras 			if (PageReserved(page))
85831c8b0d0SPaul Mackerras 				page = NULL;
8595a319350SPaul Mackerras 		}
8605a319350SPaul Mackerras 	}
8615a319350SPaul Mackerras 
86231c8b0d0SPaul Mackerras 	/*
86331c8b0d0SPaul Mackerras 	 * Read the PTE from the process' radix tree and use that
86471d29f43SNicholas Piggin 	 * so we get the shift and attribute bits.
86531c8b0d0SPaul Mackerras 	 */
866bda3deaaSAneesh Kumar K.V 	spin_lock(&kvm->mmu_lock);
867bda3deaaSAneesh Kumar K.V 	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
868ae49dedaSPaul Mackerras 	pte = __pte(0);
869ae49dedaSPaul Mackerras 	if (ptep)
870bda3deaaSAneesh Kumar K.V 		pte = READ_ONCE(*ptep);
871bda3deaaSAneesh Kumar K.V 	spin_unlock(&kvm->mmu_lock);
8726579804cSPaul Mackerras 	/*
8736579804cSPaul Mackerras 	 * If the PTE disappeared temporarily due to a THP
8746579804cSPaul Mackerras 	 * collapse, just return and let the guest try again.
8756579804cSPaul Mackerras 	 */
876ae49dedaSPaul Mackerras 	if (!pte_present(pte)) {
8776579804cSPaul Mackerras 		if (page)
8786579804cSPaul Mackerras 			put_page(page);
8796579804cSPaul Mackerras 		return RESUME_GUEST;
8806579804cSPaul Mackerras 	}
88171d29f43SNicholas Piggin 
882f460f679SPaul Mackerras 	/* If we're logging dirty pages, always map single pages */
883f460f679SPaul Mackerras 	large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
884f460f679SPaul Mackerras 
88571d29f43SNicholas Piggin 	/* Get pte level from shift/size */
886f460f679SPaul Mackerras 	if (large_enable && shift == PUD_SHIFT &&
88731c8b0d0SPaul Mackerras 	    (gpa & (PUD_SIZE - PAGE_SIZE)) ==
88831c8b0d0SPaul Mackerras 	    (hva & (PUD_SIZE - PAGE_SIZE))) {
88931c8b0d0SPaul Mackerras 		level = 2;
890f460f679SPaul Mackerras 	} else if (large_enable && shift == PMD_SHIFT &&
89131c8b0d0SPaul Mackerras 		   (gpa & (PMD_SIZE - PAGE_SIZE)) ==
89231c8b0d0SPaul Mackerras 		   (hva & (PMD_SIZE - PAGE_SIZE))) {
89331c8b0d0SPaul Mackerras 		level = 1;
89471d29f43SNicholas Piggin 	} else {
89571d29f43SNicholas Piggin 		level = 0;
89671d29f43SNicholas Piggin 		if (shift > PAGE_SHIFT) {
89771d29f43SNicholas Piggin 			/*
89871d29f43SNicholas Piggin 			 * If the pte maps more than one page, bring over
89971d29f43SNicholas Piggin 			 * bits from the virtual address to get the real
90071d29f43SNicholas Piggin 			 * address of the specific single page we want.
90171d29f43SNicholas Piggin 			 */
90271d29f43SNicholas Piggin 			unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
90371d29f43SNicholas Piggin 			pte = __pte(pte_val(pte) | (hva & rpnmask));
90431c8b0d0SPaul Mackerras 		}
90571d29f43SNicholas Piggin 	}
90671d29f43SNicholas Piggin 
907bc64dd0eSNicholas Piggin 	pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
908bc64dd0eSNicholas Piggin 	if (writing || upgrade_write) {
909bc64dd0eSNicholas Piggin 		if (pte_val(pte) & _PAGE_WRITE)
910bc64dd0eSNicholas Piggin 			pte = __pte(pte_val(pte) | _PAGE_DIRTY);
911bc64dd0eSNicholas Piggin 	} else {
912bc64dd0eSNicholas Piggin 		pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
913bc64dd0eSNicholas Piggin 	}
9145a319350SPaul Mackerras 
9155a319350SPaul Mackerras 	/* Allocate space in the tree and write the PTE */
91604bae9d5SSuraj Jitindar Singh 	ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
9178cf531edSSuraj Jitindar Singh 				mmu_seq, kvm->arch.lpid, NULL, NULL);
91804bae9d5SSuraj Jitindar Singh 	if (inserted_pte)
91904bae9d5SSuraj Jitindar Singh 		*inserted_pte = pte;
92004bae9d5SSuraj Jitindar Singh 	if (levelp)
92104bae9d5SSuraj Jitindar Singh 		*levelp = level;
9225a319350SPaul Mackerras 
9235a319350SPaul Mackerras 	if (page) {
92431c8b0d0SPaul Mackerras 		if (!ret && (pte_val(pte) & _PAGE_WRITE))
925c3856aebSPaul Mackerras 			set_page_dirty_lock(page);
926c3856aebSPaul Mackerras 		put_page(page);
9275a319350SPaul Mackerras 	}
928c3856aebSPaul Mackerras 
9298f1f7b9bSSuraj Jitindar Singh 	/* Increment number of large pages if we (successfully) inserted one */
9308f1f7b9bSSuraj Jitindar Singh 	if (!ret) {
9318f1f7b9bSSuraj Jitindar Singh 		if (level == 1)
9328f1f7b9bSSuraj Jitindar Singh 			kvm->stat.num_2M_pages++;
9338f1f7b9bSSuraj Jitindar Singh 		else if (level == 2)
9348f1f7b9bSSuraj Jitindar Singh 			kvm->stat.num_1G_pages++;
9358f1f7b9bSSuraj Jitindar Singh 	}
9368f1f7b9bSSuraj Jitindar Singh 
93704bae9d5SSuraj Jitindar Singh 	return ret;
93804bae9d5SSuraj Jitindar Singh }
93904bae9d5SSuraj Jitindar Singh 
kvmppc_book3s_radix_page_fault(struct kvm_vcpu * vcpu,unsigned long ea,unsigned long dsisr)9408c99d345STianjia Zhang int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
94104bae9d5SSuraj Jitindar Singh 				   unsigned long ea, unsigned long dsisr)
94204bae9d5SSuraj Jitindar Singh {
94304bae9d5SSuraj Jitindar Singh 	struct kvm *kvm = vcpu->kvm;
94404bae9d5SSuraj Jitindar Singh 	unsigned long gpa, gfn;
94504bae9d5SSuraj Jitindar Singh 	struct kvm_memory_slot *memslot;
94604bae9d5SSuraj Jitindar Singh 	long ret;
94704bae9d5SSuraj Jitindar Singh 	bool writing = !!(dsisr & DSISR_ISSTORE);
94804bae9d5SSuraj Jitindar Singh 	bool kvm_ro = false;
94904bae9d5SSuraj Jitindar Singh 
95004bae9d5SSuraj Jitindar Singh 	/* Check for unusual errors */
95104bae9d5SSuraj Jitindar Singh 	if (dsisr & DSISR_UNSUPP_MMU) {
95204bae9d5SSuraj Jitindar Singh 		pr_err("KVM: Got unsupported MMU fault\n");
95304bae9d5SSuraj Jitindar Singh 		return -EFAULT;
95404bae9d5SSuraj Jitindar Singh 	}
95504bae9d5SSuraj Jitindar Singh 	if (dsisr & DSISR_BADACCESS) {
95604bae9d5SSuraj Jitindar Singh 		/* Reflect to the guest as DSI */
95704bae9d5SSuraj Jitindar Singh 		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
9586cd5c1dbSNicholas Piggin 		kvmppc_core_queue_data_storage(vcpu,
9596cd5c1dbSNicholas Piggin 				kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
9606cd5c1dbSNicholas Piggin 				ea, dsisr);
96104bae9d5SSuraj Jitindar Singh 		return RESUME_GUEST;
96204bae9d5SSuraj Jitindar Singh 	}
96304bae9d5SSuraj Jitindar Singh 
96404bae9d5SSuraj Jitindar Singh 	/* Translate the logical address */
96504bae9d5SSuraj Jitindar Singh 	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
96604bae9d5SSuraj Jitindar Singh 	gpa &= ~0xF000000000000000ul;
96704bae9d5SSuraj Jitindar Singh 	gfn = gpa >> PAGE_SHIFT;
96804bae9d5SSuraj Jitindar Singh 	if (!(dsisr & DSISR_PRTABLE_FAULT))
96904bae9d5SSuraj Jitindar Singh 		gpa |= ea & 0xfff;
97004bae9d5SSuraj Jitindar Singh 
971008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
972008e359cSBharata B Rao 		return kvmppc_send_page_to_uv(kvm, gfn);
973008e359cSBharata B Rao 
97404bae9d5SSuraj Jitindar Singh 	/* Get the corresponding memslot */
97504bae9d5SSuraj Jitindar Singh 	memslot = gfn_to_memslot(kvm, gfn);
97604bae9d5SSuraj Jitindar Singh 
97704bae9d5SSuraj Jitindar Singh 	/* No memslot means it's an emulated MMIO region */
97804bae9d5SSuraj Jitindar Singh 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
97904bae9d5SSuraj Jitindar Singh 		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
98004bae9d5SSuraj Jitindar Singh 			     DSISR_SET_RC)) {
98104bae9d5SSuraj Jitindar Singh 			/*
98204bae9d5SSuraj Jitindar Singh 			 * Bad address in guest page table tree, or other
98304bae9d5SSuraj Jitindar Singh 			 * unusual error - reflect it to the guest as DSI.
98404bae9d5SSuraj Jitindar Singh 			 */
9856cd5c1dbSNicholas Piggin 			kvmppc_core_queue_data_storage(vcpu,
9866cd5c1dbSNicholas Piggin 					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
9876cd5c1dbSNicholas Piggin 					ea, dsisr);
98804bae9d5SSuraj Jitindar Singh 			return RESUME_GUEST;
98904bae9d5SSuraj Jitindar Singh 		}
9908c99d345STianjia Zhang 		return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
99104bae9d5SSuraj Jitindar Singh 	}
99204bae9d5SSuraj Jitindar Singh 
99304bae9d5SSuraj Jitindar Singh 	if (memslot->flags & KVM_MEM_READONLY) {
99404bae9d5SSuraj Jitindar Singh 		if (writing) {
99504bae9d5SSuraj Jitindar Singh 			/* give the guest a DSI */
9966cd5c1dbSNicholas Piggin 			kvmppc_core_queue_data_storage(vcpu,
9976cd5c1dbSNicholas Piggin 					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
9986cd5c1dbSNicholas Piggin 					ea, DSISR_ISSTORE | DSISR_PROTFAULT);
99904bae9d5SSuraj Jitindar Singh 			return RESUME_GUEST;
100004bae9d5SSuraj Jitindar Singh 		}
100104bae9d5SSuraj Jitindar Singh 		kvm_ro = true;
100204bae9d5SSuraj Jitindar Singh 	}
100304bae9d5SSuraj Jitindar Singh 
100404bae9d5SSuraj Jitindar Singh 	/* Failed to set the reference/change bits */
100504bae9d5SSuraj Jitindar Singh 	if (dsisr & DSISR_SET_RC) {
100604bae9d5SSuraj Jitindar Singh 		spin_lock(&kvm->mmu_lock);
10076cdf3037SAneesh Kumar K.V 		if (kvmppc_hv_handle_set_rc(kvm, false, writing,
10086cdf3037SAneesh Kumar K.V 					    gpa, kvm->arch.lpid))
100904bae9d5SSuraj Jitindar Singh 			dsisr &= ~DSISR_SET_RC;
101004bae9d5SSuraj Jitindar Singh 		spin_unlock(&kvm->mmu_lock);
101104bae9d5SSuraj Jitindar Singh 
101204bae9d5SSuraj Jitindar Singh 		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
101304bae9d5SSuraj Jitindar Singh 			       DSISR_PROTFAULT | DSISR_SET_RC)))
101404bae9d5SSuraj Jitindar Singh 			return RESUME_GUEST;
101504bae9d5SSuraj Jitindar Singh 	}
101604bae9d5SSuraj Jitindar Singh 
101704bae9d5SSuraj Jitindar Singh 	/* Try to insert a pte */
101804bae9d5SSuraj Jitindar Singh 	ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
101904bae9d5SSuraj Jitindar Singh 					     kvm_ro, NULL, NULL);
102004bae9d5SSuraj Jitindar Singh 
1021c3856aebSPaul Mackerras 	if (ret == 0 || ret == -EAGAIN)
1022c3856aebSPaul Mackerras 		ret = RESUME_GUEST;
10235a319350SPaul Mackerras 	return ret;
10245a319350SPaul Mackerras }
10255a319350SPaul Mackerras 
1026c43c3a86SPaul Mackerras /* Called with kvm->mmu_lock held */
kvm_unmap_radix(struct kvm * kvm,struct kvm_memory_slot * memslot,unsigned long gfn)102732b48bf8SNicholas Piggin void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
102801756099SPaul Mackerras 		     unsigned long gfn)
102901756099SPaul Mackerras {
103001756099SPaul Mackerras 	pte_t *ptep;
103101756099SPaul Mackerras 	unsigned long gpa = gfn << PAGE_SHIFT;
103201756099SPaul Mackerras 	unsigned int shift;
103301756099SPaul Mackerras 
1034008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
1035008e359cSBharata B Rao 		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
103632b48bf8SNicholas Piggin 		return;
1037008e359cSBharata B Rao 	}
1038008e359cSBharata B Rao 
10394b99412eSAneesh Kumar K.V 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
1040f0f825f0SPaul Mackerras 	if (ptep && pte_present(*ptep))
1041fd10be25SSuraj Jitindar Singh 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
1042fd10be25SSuraj Jitindar Singh 				 kvm->arch.lpid);
104301756099SPaul Mackerras }
104401756099SPaul Mackerras 
1045c43c3a86SPaul Mackerras /* Called with kvm->mmu_lock held */
kvm_age_radix(struct kvm * kvm,struct kvm_memory_slot * memslot,unsigned long gfn)1046b1c5356eSSean Christopherson bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
104701756099SPaul Mackerras 		   unsigned long gfn)
104801756099SPaul Mackerras {
104901756099SPaul Mackerras 	pte_t *ptep;
105001756099SPaul Mackerras 	unsigned long gpa = gfn << PAGE_SHIFT;
105101756099SPaul Mackerras 	unsigned int shift;
1052b1c5356eSSean Christopherson 	bool ref = false;
1053ae59a7e1SSuraj Jitindar Singh 	unsigned long old, *rmapp;
105401756099SPaul Mackerras 
1055008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1056008e359cSBharata B Rao 		return ref;
1057008e359cSBharata B Rao 
10584b99412eSAneesh Kumar K.V 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
105901756099SPaul Mackerras 	if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
1060ae59a7e1SSuraj Jitindar Singh 		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
106101756099SPaul Mackerras 					      gpa, shift);
106201756099SPaul Mackerras 		/* XXX need to flush tlb here? */
1063ae59a7e1SSuraj Jitindar Singh 		/* Also clear bit in ptes in shadow pgtable for nested guests */
1064ae59a7e1SSuraj Jitindar Singh 		rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1065ae59a7e1SSuraj Jitindar Singh 		kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
1066ae59a7e1SSuraj Jitindar Singh 					       old & PTE_RPN_MASK,
1067ae59a7e1SSuraj Jitindar Singh 					       1UL << shift);
1068b1c5356eSSean Christopherson 		ref = true;
106901756099SPaul Mackerras 	}
107001756099SPaul Mackerras 	return ref;
107101756099SPaul Mackerras }
107201756099SPaul Mackerras 
1073c43c3a86SPaul Mackerras /* Called with kvm->mmu_lock held */
kvm_test_age_radix(struct kvm * kvm,struct kvm_memory_slot * memslot,unsigned long gfn)1074b1c5356eSSean Christopherson bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
107501756099SPaul Mackerras 			unsigned long gfn)
1076b1c5356eSSean Christopherson 
107701756099SPaul Mackerras {
107801756099SPaul Mackerras 	pte_t *ptep;
107901756099SPaul Mackerras 	unsigned long gpa = gfn << PAGE_SHIFT;
108001756099SPaul Mackerras 	unsigned int shift;
1081b1c5356eSSean Christopherson 	bool ref = false;
108201756099SPaul Mackerras 
1083008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1084008e359cSBharata B Rao 		return ref;
1085008e359cSBharata B Rao 
10864b99412eSAneesh Kumar K.V 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
108701756099SPaul Mackerras 	if (ptep && pte_present(*ptep) && pte_young(*ptep))
1088b1c5356eSSean Christopherson 		ref = true;
108901756099SPaul Mackerras 	return ref;
109001756099SPaul Mackerras }
109101756099SPaul Mackerras 
10928f7b79b8SPaul Mackerras /* Returns the number of PAGE_SIZE pages that are dirty */
kvm_radix_test_clear_dirty(struct kvm * kvm,struct kvm_memory_slot * memslot,int pagenum)10938f7b79b8SPaul Mackerras static int kvm_radix_test_clear_dirty(struct kvm *kvm,
10948f7b79b8SPaul Mackerras 				struct kvm_memory_slot *memslot, int pagenum)
10958f7b79b8SPaul Mackerras {
10968f7b79b8SPaul Mackerras 	unsigned long gfn = memslot->base_gfn + pagenum;
10978f7b79b8SPaul Mackerras 	unsigned long gpa = gfn << PAGE_SHIFT;
1098bf8036a4SAneesh Kumar K.V 	pte_t *ptep, pte;
10998f7b79b8SPaul Mackerras 	unsigned int shift;
11008f7b79b8SPaul Mackerras 	int ret = 0;
1101ae59a7e1SSuraj Jitindar Singh 	unsigned long old, *rmapp;
11028f7b79b8SPaul Mackerras 
1103008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1104008e359cSBharata B Rao 		return ret;
1105008e359cSBharata B Rao 
1106bf8036a4SAneesh Kumar K.V 	/*
1107bf8036a4SAneesh Kumar K.V 	 * For performance reasons we don't hold kvm->mmu_lock while walking the
1108bf8036a4SAneesh Kumar K.V 	 * partition scoped table.
1109bf8036a4SAneesh Kumar K.V 	 */
1110bf8036a4SAneesh Kumar K.V 	ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
1111bf8036a4SAneesh Kumar K.V 	if (!ptep)
1112bf8036a4SAneesh Kumar K.V 		return 0;
1113bf8036a4SAneesh Kumar K.V 
1114bf8036a4SAneesh Kumar K.V 	pte = READ_ONCE(*ptep);
1115bf8036a4SAneesh Kumar K.V 	if (pte_present(pte) && pte_dirty(pte)) {
1116ae59a7e1SSuraj Jitindar Singh 		spin_lock(&kvm->mmu_lock);
1117bf8036a4SAneesh Kumar K.V 		/*
1118bf8036a4SAneesh Kumar K.V 		 * Recheck the pte again
1119bf8036a4SAneesh Kumar K.V 		 */
1120bf8036a4SAneesh Kumar K.V 		if (pte_val(pte) != pte_val(*ptep)) {
1121bf8036a4SAneesh Kumar K.V 			/*
1122bf8036a4SAneesh Kumar K.V 			 * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
1123bf8036a4SAneesh Kumar K.V 			 * only find PAGE_SIZE pte entries here. We can continue
1124bf8036a4SAneesh Kumar K.V 			 * to use the pte addr returned by above page table
1125bf8036a4SAneesh Kumar K.V 			 * walk.
1126bf8036a4SAneesh Kumar K.V 			 */
1127bf8036a4SAneesh Kumar K.V 			if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
1128bf8036a4SAneesh Kumar K.V 				spin_unlock(&kvm->mmu_lock);
1129bf8036a4SAneesh Kumar K.V 				return 0;
1130bf8036a4SAneesh Kumar K.V 			}
1131bf8036a4SAneesh Kumar K.V 		}
1132bf8036a4SAneesh Kumar K.V 
1133bf8036a4SAneesh Kumar K.V 		ret = 1;
1134bf8036a4SAneesh Kumar K.V 		VM_BUG_ON(shift);
1135ae59a7e1SSuraj Jitindar Singh 		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
11368f7b79b8SPaul Mackerras 					      gpa, shift);
1137fd10be25SSuraj Jitindar Singh 		kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
1138ae59a7e1SSuraj Jitindar Singh 		/* Also clear bit in ptes in shadow pgtable for nested guests */
1139ae59a7e1SSuraj Jitindar Singh 		rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1140ae59a7e1SSuraj Jitindar Singh 		kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
1141ae59a7e1SSuraj Jitindar Singh 					       old & PTE_RPN_MASK,
1142ae59a7e1SSuraj Jitindar Singh 					       1UL << shift);
1143ae59a7e1SSuraj Jitindar Singh 		spin_unlock(&kvm->mmu_lock);
11448f7b79b8SPaul Mackerras 	}
11458f7b79b8SPaul Mackerras 	return ret;
11468f7b79b8SPaul Mackerras }
11478f7b79b8SPaul Mackerras 
kvmppc_hv_get_dirty_log_radix(struct kvm * kvm,struct kvm_memory_slot * memslot,unsigned long * map)11488f7b79b8SPaul Mackerras long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
11498f7b79b8SPaul Mackerras 			struct kvm_memory_slot *memslot, unsigned long *map)
11508f7b79b8SPaul Mackerras {
11518f7b79b8SPaul Mackerras 	unsigned long i, j;
11528f7b79b8SPaul Mackerras 	int npages;
11538f7b79b8SPaul Mackerras 
11548f7b79b8SPaul Mackerras 	for (i = 0; i < memslot->npages; i = j) {
11558f7b79b8SPaul Mackerras 		npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
11568f7b79b8SPaul Mackerras 
11578f7b79b8SPaul Mackerras 		/*
11588f7b79b8SPaul Mackerras 		 * Note that if npages > 0 then i must be a multiple of npages,
11598f7b79b8SPaul Mackerras 		 * since huge pages are only used to back the guest at guest
11608f7b79b8SPaul Mackerras 		 * real addresses that are a multiple of their size.
11618f7b79b8SPaul Mackerras 		 * Since we have at most one PTE covering any given guest
11628f7b79b8SPaul Mackerras 		 * real address, if npages > 1 we can skip to i + npages.
11638f7b79b8SPaul Mackerras 		 */
11648f7b79b8SPaul Mackerras 		j = i + 1;
1165e641a317SPaul Mackerras 		if (npages) {
1166e641a317SPaul Mackerras 			set_dirty_bits(map, i, npages);
1167117647ffSPaul Mackerras 			j = i + npages;
1168e641a317SPaul Mackerras 		}
11698f7b79b8SPaul Mackerras 	}
11708f7b79b8SPaul Mackerras 	return 0;
11718f7b79b8SPaul Mackerras }
11728f7b79b8SPaul Mackerras 
kvmppc_radix_flush_memslot(struct kvm * kvm,const struct kvm_memory_slot * memslot)11735af3e9d0SPaul Mackerras void kvmppc_radix_flush_memslot(struct kvm *kvm,
11745af3e9d0SPaul Mackerras 				const struct kvm_memory_slot *memslot)
11755af3e9d0SPaul Mackerras {
11765af3e9d0SPaul Mackerras 	unsigned long n;
11775af3e9d0SPaul Mackerras 	pte_t *ptep;
11785af3e9d0SPaul Mackerras 	unsigned long gpa;
11795af3e9d0SPaul Mackerras 	unsigned int shift;
11805af3e9d0SPaul Mackerras 
1181c3262257SBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
1182ce477a7aSSukadev Bhattiprolu 		kvmppc_uvmem_drop_pages(memslot, kvm, true);
1183c3262257SBharata B Rao 
1184008e359cSBharata B Rao 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1185008e359cSBharata B Rao 		return;
1186008e359cSBharata B Rao 
11875af3e9d0SPaul Mackerras 	gpa = memslot->base_gfn << PAGE_SHIFT;
11885af3e9d0SPaul Mackerras 	spin_lock(&kvm->mmu_lock);
11895af3e9d0SPaul Mackerras 	for (n = memslot->npages; n; --n) {
11904b99412eSAneesh Kumar K.V 		ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
11915af3e9d0SPaul Mackerras 		if (ptep && pte_present(*ptep))
11925af3e9d0SPaul Mackerras 			kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
11935af3e9d0SPaul Mackerras 					 kvm->arch.lpid);
11945af3e9d0SPaul Mackerras 		gpa += PAGE_SIZE;
11955af3e9d0SPaul Mackerras 	}
119611362b1bSPaul Mackerras 	/*
119711362b1bSPaul Mackerras 	 * Increase the mmu notifier sequence number to prevent any page
119811362b1bSPaul Mackerras 	 * fault that read the memslot earlier from writing a PTE.
119911362b1bSPaul Mackerras 	 */
120020ec3ebdSChao Peng 	kvm->mmu_invalidate_seq++;
12015af3e9d0SPaul Mackerras 	spin_unlock(&kvm->mmu_lock);
12025af3e9d0SPaul Mackerras }
12035af3e9d0SPaul Mackerras 
add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info * info,int psize,int * indexp)12048cf4ecc0SPaul Mackerras static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
12058cf4ecc0SPaul Mackerras 				 int psize, int *indexp)
12068cf4ecc0SPaul Mackerras {
12078cf4ecc0SPaul Mackerras 	if (!mmu_psize_defs[psize].shift)
12088cf4ecc0SPaul Mackerras 		return;
12098cf4ecc0SPaul Mackerras 	info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
12108cf4ecc0SPaul Mackerras 		(mmu_psize_defs[psize].ap << 29);
12118cf4ecc0SPaul Mackerras 	++(*indexp);
12128cf4ecc0SPaul Mackerras }
12138cf4ecc0SPaul Mackerras 
kvmhv_get_rmmu_info(struct kvm * kvm,struct kvm_ppc_rmmu_info * info)12148cf4ecc0SPaul Mackerras int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
12158cf4ecc0SPaul Mackerras {
12168cf4ecc0SPaul Mackerras 	int i;
12178cf4ecc0SPaul Mackerras 
12188cf4ecc0SPaul Mackerras 	if (!radix_enabled())
12198cf4ecc0SPaul Mackerras 		return -EINVAL;
12208cf4ecc0SPaul Mackerras 	memset(info, 0, sizeof(*info));
12218cf4ecc0SPaul Mackerras 
12228cf4ecc0SPaul Mackerras 	/* 4k page size */
12238cf4ecc0SPaul Mackerras 	info->geometries[0].page_shift = 12;
12248cf4ecc0SPaul Mackerras 	info->geometries[0].level_bits[0] = 9;
12258cf4ecc0SPaul Mackerras 	for (i = 1; i < 4; ++i)
12268cf4ecc0SPaul Mackerras 		info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
12278cf4ecc0SPaul Mackerras 	/* 64k page size */
12288cf4ecc0SPaul Mackerras 	info->geometries[1].page_shift = 16;
12298cf4ecc0SPaul Mackerras 	for (i = 0; i < 4; ++i)
12308cf4ecc0SPaul Mackerras 		info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
12318cf4ecc0SPaul Mackerras 
12328cf4ecc0SPaul Mackerras 	i = 0;
12338cf4ecc0SPaul Mackerras 	add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
12348cf4ecc0SPaul Mackerras 	add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
12358cf4ecc0SPaul Mackerras 	add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
12368cf4ecc0SPaul Mackerras 	add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
12378cf4ecc0SPaul Mackerras 
12388cf4ecc0SPaul Mackerras 	return 0;
12398cf4ecc0SPaul Mackerras }
12408cf4ecc0SPaul Mackerras 
kvmppc_init_vm_radix(struct kvm * kvm)12418cf4ecc0SPaul Mackerras int kvmppc_init_vm_radix(struct kvm *kvm)
12428cf4ecc0SPaul Mackerras {
12438cf4ecc0SPaul Mackerras 	kvm->arch.pgtable = pgd_alloc(kvm->mm);
12448cf4ecc0SPaul Mackerras 	if (!kvm->arch.pgtable)
12458cf4ecc0SPaul Mackerras 		return -ENOMEM;
12468cf4ecc0SPaul Mackerras 	return 0;
12478cf4ecc0SPaul Mackerras }
12488cf4ecc0SPaul Mackerras 
pte_ctor(void * addr)12495a319350SPaul Mackerras static void pte_ctor(void *addr)
12505a319350SPaul Mackerras {
125121828c99SAneesh Kumar K.V 	memset(addr, 0, RADIX_PTE_TABLE_SIZE);
125221828c99SAneesh Kumar K.V }
125321828c99SAneesh Kumar K.V 
pmd_ctor(void * addr)125421828c99SAneesh Kumar K.V static void pmd_ctor(void *addr)
125521828c99SAneesh Kumar K.V {
125621828c99SAneesh Kumar K.V 	memset(addr, 0, RADIX_PMD_TABLE_SIZE);
12575a319350SPaul Mackerras }
12585a319350SPaul Mackerras 
12599a94d3eeSPaul Mackerras struct debugfs_radix_state {
12609a94d3eeSPaul Mackerras 	struct kvm	*kvm;
12619a94d3eeSPaul Mackerras 	struct mutex	mutex;
12629a94d3eeSPaul Mackerras 	unsigned long	gpa;
126383a05510SPaul Mackerras 	int		lpid;
12649a94d3eeSPaul Mackerras 	int		chars_left;
12659a94d3eeSPaul Mackerras 	int		buf_index;
12669a94d3eeSPaul Mackerras 	char		buf[128];
12679a94d3eeSPaul Mackerras 	u8		hdr;
12689a94d3eeSPaul Mackerras };
12699a94d3eeSPaul Mackerras 
debugfs_radix_open(struct inode * inode,struct file * file)12709a94d3eeSPaul Mackerras static int debugfs_radix_open(struct inode *inode, struct file *file)
12719a94d3eeSPaul Mackerras {
12729a94d3eeSPaul Mackerras 	struct kvm *kvm = inode->i_private;
12739a94d3eeSPaul Mackerras 	struct debugfs_radix_state *p;
12749a94d3eeSPaul Mackerras 
12759a94d3eeSPaul Mackerras 	p = kzalloc(sizeof(*p), GFP_KERNEL);
12769a94d3eeSPaul Mackerras 	if (!p)
12779a94d3eeSPaul Mackerras 		return -ENOMEM;
12789a94d3eeSPaul Mackerras 
12799a94d3eeSPaul Mackerras 	kvm_get_kvm(kvm);
12809a94d3eeSPaul Mackerras 	p->kvm = kvm;
12819a94d3eeSPaul Mackerras 	mutex_init(&p->mutex);
12829a94d3eeSPaul Mackerras 	file->private_data = p;
12839a94d3eeSPaul Mackerras 
12849a94d3eeSPaul Mackerras 	return nonseekable_open(inode, file);
12859a94d3eeSPaul Mackerras }
12869a94d3eeSPaul Mackerras 
debugfs_radix_release(struct inode * inode,struct file * file)12879a94d3eeSPaul Mackerras static int debugfs_radix_release(struct inode *inode, struct file *file)
12889a94d3eeSPaul Mackerras {
12899a94d3eeSPaul Mackerras 	struct debugfs_radix_state *p = file->private_data;
12909a94d3eeSPaul Mackerras 
12919a94d3eeSPaul Mackerras 	kvm_put_kvm(p->kvm);
12929a94d3eeSPaul Mackerras 	kfree(p);
12939a94d3eeSPaul Mackerras 	return 0;
12949a94d3eeSPaul Mackerras }
12959a94d3eeSPaul Mackerras 
debugfs_radix_read(struct file * file,char __user * buf,size_t len,loff_t * ppos)12969a94d3eeSPaul Mackerras static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
12979a94d3eeSPaul Mackerras 				 size_t len, loff_t *ppos)
12989a94d3eeSPaul Mackerras {
12999a94d3eeSPaul Mackerras 	struct debugfs_radix_state *p = file->private_data;
13009a94d3eeSPaul Mackerras 	ssize_t ret, r;
13019a94d3eeSPaul Mackerras 	unsigned long n;
13029a94d3eeSPaul Mackerras 	struct kvm *kvm;
13039a94d3eeSPaul Mackerras 	unsigned long gpa;
13049a94d3eeSPaul Mackerras 	pgd_t *pgt;
130583a05510SPaul Mackerras 	struct kvm_nested_guest *nested;
13062fb47060SMike Rapoport 	pgd_t *pgdp;
13072fb47060SMike Rapoport 	p4d_t p4d, *p4dp;
13089a94d3eeSPaul Mackerras 	pud_t pud, *pudp;
13099a94d3eeSPaul Mackerras 	pmd_t pmd, *pmdp;
13109a94d3eeSPaul Mackerras 	pte_t *ptep;
13119a94d3eeSPaul Mackerras 	int shift;
13129a94d3eeSPaul Mackerras 	unsigned long pte;
13139a94d3eeSPaul Mackerras 
13149a94d3eeSPaul Mackerras 	kvm = p->kvm;
13159a94d3eeSPaul Mackerras 	if (!kvm_is_radix(kvm))
13169a94d3eeSPaul Mackerras 		return 0;
13179a94d3eeSPaul Mackerras 
13189a94d3eeSPaul Mackerras 	ret = mutex_lock_interruptible(&p->mutex);
13199a94d3eeSPaul Mackerras 	if (ret)
13209a94d3eeSPaul Mackerras 		return ret;
13219a94d3eeSPaul Mackerras 
13229a94d3eeSPaul Mackerras 	if (p->chars_left) {
13239a94d3eeSPaul Mackerras 		n = p->chars_left;
13249a94d3eeSPaul Mackerras 		if (n > len)
13259a94d3eeSPaul Mackerras 			n = len;
13269a94d3eeSPaul Mackerras 		r = copy_to_user(buf, p->buf + p->buf_index, n);
13279a94d3eeSPaul Mackerras 		n -= r;
13289a94d3eeSPaul Mackerras 		p->chars_left -= n;
13299a94d3eeSPaul Mackerras 		p->buf_index += n;
13309a94d3eeSPaul Mackerras 		buf += n;
13319a94d3eeSPaul Mackerras 		len -= n;
13329a94d3eeSPaul Mackerras 		ret = n;
13339a94d3eeSPaul Mackerras 		if (r) {
13349a94d3eeSPaul Mackerras 			if (!n)
13359a94d3eeSPaul Mackerras 				ret = -EFAULT;
13369a94d3eeSPaul Mackerras 			goto out;
13379a94d3eeSPaul Mackerras 		}
13389a94d3eeSPaul Mackerras 	}
13399a94d3eeSPaul Mackerras 
13409a94d3eeSPaul Mackerras 	gpa = p->gpa;
134183a05510SPaul Mackerras 	nested = NULL;
134283a05510SPaul Mackerras 	pgt = NULL;
134383a05510SPaul Mackerras 	while (len != 0 && p->lpid >= 0) {
134483a05510SPaul Mackerras 		if (gpa >= RADIX_PGTABLE_RANGE) {
134583a05510SPaul Mackerras 			gpa = 0;
134683a05510SPaul Mackerras 			pgt = NULL;
134783a05510SPaul Mackerras 			if (nested) {
134883a05510SPaul Mackerras 				kvmhv_put_nested(nested);
134983a05510SPaul Mackerras 				nested = NULL;
135083a05510SPaul Mackerras 			}
135183a05510SPaul Mackerras 			p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
135283a05510SPaul Mackerras 			p->hdr = 0;
135383a05510SPaul Mackerras 			if (p->lpid < 0)
135483a05510SPaul Mackerras 				break;
135583a05510SPaul Mackerras 		}
135683a05510SPaul Mackerras 		if (!pgt) {
135783a05510SPaul Mackerras 			if (p->lpid == 0) {
13589a94d3eeSPaul Mackerras 				pgt = kvm->arch.pgtable;
135983a05510SPaul Mackerras 			} else {
136083a05510SPaul Mackerras 				nested = kvmhv_get_nested(kvm, p->lpid, false);
136183a05510SPaul Mackerras 				if (!nested) {
136283a05510SPaul Mackerras 					gpa = RADIX_PGTABLE_RANGE;
136383a05510SPaul Mackerras 					continue;
136483a05510SPaul Mackerras 				}
136583a05510SPaul Mackerras 				pgt = nested->shadow_pgtable;
136683a05510SPaul Mackerras 			}
136783a05510SPaul Mackerras 		}
136883a05510SPaul Mackerras 		n = 0;
13699a94d3eeSPaul Mackerras 		if (!p->hdr) {
137083a05510SPaul Mackerras 			if (p->lpid > 0)
13719a94d3eeSPaul Mackerras 				n = scnprintf(p->buf, sizeof(p->buf),
137283a05510SPaul Mackerras 					      "\nNested LPID %d: ", p->lpid);
137383a05510SPaul Mackerras 			n += scnprintf(p->buf + n, sizeof(p->buf) - n,
13749a94d3eeSPaul Mackerras 				      "pgdir: %lx\n", (unsigned long)pgt);
13759a94d3eeSPaul Mackerras 			p->hdr = 1;
13769a94d3eeSPaul Mackerras 			goto copy;
13779a94d3eeSPaul Mackerras 		}
13789a94d3eeSPaul Mackerras 
13799a94d3eeSPaul Mackerras 		pgdp = pgt + pgd_index(gpa);
13802fb47060SMike Rapoport 		p4dp = p4d_offset(pgdp, gpa);
13812fb47060SMike Rapoport 		p4d = READ_ONCE(*p4dp);
13822fb47060SMike Rapoport 		if (!(p4d_val(p4d) & _PAGE_PRESENT)) {
13832fb47060SMike Rapoport 			gpa = (gpa & P4D_MASK) + P4D_SIZE;
13849a94d3eeSPaul Mackerras 			continue;
13859a94d3eeSPaul Mackerras 		}
13869a94d3eeSPaul Mackerras 
13872fb47060SMike Rapoport 		pudp = pud_offset(&p4d, gpa);
13889a94d3eeSPaul Mackerras 		pud = READ_ONCE(*pudp);
13899a94d3eeSPaul Mackerras 		if (!(pud_val(pud) & _PAGE_PRESENT)) {
13909a94d3eeSPaul Mackerras 			gpa = (gpa & PUD_MASK) + PUD_SIZE;
13919a94d3eeSPaul Mackerras 			continue;
13929a94d3eeSPaul Mackerras 		}
13939a94d3eeSPaul Mackerras 		if (pud_val(pud) & _PAGE_PTE) {
13949a94d3eeSPaul Mackerras 			pte = pud_val(pud);
13959a94d3eeSPaul Mackerras 			shift = PUD_SHIFT;
13969a94d3eeSPaul Mackerras 			goto leaf;
13979a94d3eeSPaul Mackerras 		}
13989a94d3eeSPaul Mackerras 
13999a94d3eeSPaul Mackerras 		pmdp = pmd_offset(&pud, gpa);
14009a94d3eeSPaul Mackerras 		pmd = READ_ONCE(*pmdp);
14019a94d3eeSPaul Mackerras 		if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
14029a94d3eeSPaul Mackerras 			gpa = (gpa & PMD_MASK) + PMD_SIZE;
14039a94d3eeSPaul Mackerras 			continue;
14049a94d3eeSPaul Mackerras 		}
14059a94d3eeSPaul Mackerras 		if (pmd_val(pmd) & _PAGE_PTE) {
14069a94d3eeSPaul Mackerras 			pte = pmd_val(pmd);
14079a94d3eeSPaul Mackerras 			shift = PMD_SHIFT;
14089a94d3eeSPaul Mackerras 			goto leaf;
14099a94d3eeSPaul Mackerras 		}
14109a94d3eeSPaul Mackerras 
14119a94d3eeSPaul Mackerras 		ptep = pte_offset_kernel(&pmd, gpa);
14129a94d3eeSPaul Mackerras 		pte = pte_val(READ_ONCE(*ptep));
14139a94d3eeSPaul Mackerras 		if (!(pte & _PAGE_PRESENT)) {
14149a94d3eeSPaul Mackerras 			gpa += PAGE_SIZE;
14159a94d3eeSPaul Mackerras 			continue;
14169a94d3eeSPaul Mackerras 		}
14179a94d3eeSPaul Mackerras 		shift = PAGE_SHIFT;
14189a94d3eeSPaul Mackerras 	leaf:
14199a94d3eeSPaul Mackerras 		n = scnprintf(p->buf, sizeof(p->buf),
14209a94d3eeSPaul Mackerras 			      " %lx: %lx %d\n", gpa, pte, shift);
14219a94d3eeSPaul Mackerras 		gpa += 1ul << shift;
14229a94d3eeSPaul Mackerras 	copy:
14239a94d3eeSPaul Mackerras 		p->chars_left = n;
14249a94d3eeSPaul Mackerras 		if (n > len)
14259a94d3eeSPaul Mackerras 			n = len;
14269a94d3eeSPaul Mackerras 		r = copy_to_user(buf, p->buf, n);
14279a94d3eeSPaul Mackerras 		n -= r;
14289a94d3eeSPaul Mackerras 		p->chars_left -= n;
14299a94d3eeSPaul Mackerras 		p->buf_index = n;
14309a94d3eeSPaul Mackerras 		buf += n;
14319a94d3eeSPaul Mackerras 		len -= n;
14329a94d3eeSPaul Mackerras 		ret += n;
14339a94d3eeSPaul Mackerras 		if (r) {
14349a94d3eeSPaul Mackerras 			if (!ret)
14359a94d3eeSPaul Mackerras 				ret = -EFAULT;
14369a94d3eeSPaul Mackerras 			break;
14379a94d3eeSPaul Mackerras 		}
14389a94d3eeSPaul Mackerras 	}
14399a94d3eeSPaul Mackerras 	p->gpa = gpa;
144083a05510SPaul Mackerras 	if (nested)
144183a05510SPaul Mackerras 		kvmhv_put_nested(nested);
14429a94d3eeSPaul Mackerras 
14439a94d3eeSPaul Mackerras  out:
14449a94d3eeSPaul Mackerras 	mutex_unlock(&p->mutex);
14459a94d3eeSPaul Mackerras 	return ret;
14469a94d3eeSPaul Mackerras }
14479a94d3eeSPaul Mackerras 
debugfs_radix_write(struct file * file,const char __user * buf,size_t len,loff_t * ppos)14489a94d3eeSPaul Mackerras static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
14499a94d3eeSPaul Mackerras 			   size_t len, loff_t *ppos)
14509a94d3eeSPaul Mackerras {
14519a94d3eeSPaul Mackerras 	return -EACCES;
14529a94d3eeSPaul Mackerras }
14539a94d3eeSPaul Mackerras 
14549a94d3eeSPaul Mackerras static const struct file_operations debugfs_radix_fops = {
14559a94d3eeSPaul Mackerras 	.owner	 = THIS_MODULE,
14569a94d3eeSPaul Mackerras 	.open	 = debugfs_radix_open,
14579a94d3eeSPaul Mackerras 	.release = debugfs_radix_release,
14589a94d3eeSPaul Mackerras 	.read	 = debugfs_radix_read,
14599a94d3eeSPaul Mackerras 	.write	 = debugfs_radix_write,
14609a94d3eeSPaul Mackerras 	.llseek	 = generic_file_llseek,
14619a94d3eeSPaul Mackerras };
14629a94d3eeSPaul Mackerras 
kvmhv_radix_debugfs_init(struct kvm * kvm)14639a94d3eeSPaul Mackerras void kvmhv_radix_debugfs_init(struct kvm *kvm)
14649a94d3eeSPaul Mackerras {
1465faf01aefSAlexey Kardashevskiy 	debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
14669a94d3eeSPaul Mackerras 			    &debugfs_radix_fops);
14679a94d3eeSPaul Mackerras }
14689a94d3eeSPaul Mackerras 
kvmppc_radix_init(void)14695a319350SPaul Mackerras int kvmppc_radix_init(void)
14705a319350SPaul Mackerras {
147121828c99SAneesh Kumar K.V 	unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
14725a319350SPaul Mackerras 
14735a319350SPaul Mackerras 	kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
14745a319350SPaul Mackerras 	if (!kvm_pte_cache)
14755a319350SPaul Mackerras 		return -ENOMEM;
147621828c99SAneesh Kumar K.V 
147721828c99SAneesh Kumar K.V 	size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
147821828c99SAneesh Kumar K.V 
147921828c99SAneesh Kumar K.V 	kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
148021828c99SAneesh Kumar K.V 	if (!kvm_pmd_cache) {
148121828c99SAneesh Kumar K.V 		kmem_cache_destroy(kvm_pte_cache);
148221828c99SAneesh Kumar K.V 		return -ENOMEM;
148321828c99SAneesh Kumar K.V 	}
148421828c99SAneesh Kumar K.V 
14855a319350SPaul Mackerras 	return 0;
14865a319350SPaul Mackerras }
14875a319350SPaul Mackerras 
kvmppc_radix_exit(void)14885a319350SPaul Mackerras void kvmppc_radix_exit(void)
14895a319350SPaul Mackerras {
14905a319350SPaul Mackerras 	kmem_cache_destroy(kvm_pte_cache);
149121828c99SAneesh Kumar K.V 	kmem_cache_destroy(kvm_pmd_cache);
14925a319350SPaul Mackerras }
1493