1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Will Deacon <will@kernel.org>
5  */
6 
7 #ifndef __ARM64_KVM_PGTABLE_H__
8 #define __ARM64_KVM_PGTABLE_H__
9 
10 #include <linux/bits.h>
11 #include <linux/kvm_host.h>
12 #include <linux/types.h>
13 
14 typedef u64 kvm_pte_t;
15 
16 /**
17  * struct kvm_pgtable - KVM page-table.
18  * @ia_bits:		Maximum input address size, in bits.
19  * @start_level:	Level at which the page-table walk starts.
20  * @pgd:		Pointer to the first top-level entry of the page-table.
21  * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
22  */
23 struct kvm_pgtable {
24 	u32					ia_bits;
25 	u32					start_level;
26 	kvm_pte_t				*pgd;
27 
28 	/* Stage-2 only */
29 	struct kvm_s2_mmu			*mmu;
30 };
31 
32 /**
33  * enum kvm_pgtable_prot - Page-table permissions and attributes.
34  * @KVM_PGTABLE_PROT_X:		Execute permission.
35  * @KVM_PGTABLE_PROT_W:		Write permission.
36  * @KVM_PGTABLE_PROT_R:		Read permission.
37  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
38  */
39 enum kvm_pgtable_prot {
40 	KVM_PGTABLE_PROT_X			= BIT(0),
41 	KVM_PGTABLE_PROT_W			= BIT(1),
42 	KVM_PGTABLE_PROT_R			= BIT(2),
43 
44 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
45 };
46 
47 #define PAGE_HYP		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
48 #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
49 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
50 #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
51 
52 /**
53  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
54  * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
55  *					entries.
56  * @KVM_PGTABLE_WALK_TABLE_PRE:		Visit table entries before their
57  *					children.
58  * @KVM_PGTABLE_WALK_TABLE_POST:	Visit table entries after their
59  *					children.
60  */
61 enum kvm_pgtable_walk_flags {
62 	KVM_PGTABLE_WALK_LEAF			= BIT(0),
63 	KVM_PGTABLE_WALK_TABLE_PRE		= BIT(1),
64 	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
65 };
66 
67 typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
68 					kvm_pte_t *ptep,
69 					enum kvm_pgtable_walk_flags flag,
70 					void * const arg);
71 
72 /**
73  * struct kvm_pgtable_walker - Hook into a page-table walk.
74  * @cb:		Callback function to invoke during the walk.
75  * @arg:	Argument passed to the callback function.
76  * @flags:	Bitwise-OR of flags to identify the entry types on which to
77  *		invoke the callback function.
78  */
79 struct kvm_pgtable_walker {
80 	const kvm_pgtable_visitor_fn_t		cb;
81 	void * const				arg;
82 	const enum kvm_pgtable_walk_flags	flags;
83 };
84 
85 /**
86  * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
87  * @pgt:	Uninitialised page-table structure to initialise.
88  * @va_bits:	Maximum virtual address bits.
89  *
90  * Return: 0 on success, negative error code on failure.
91  */
92 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits);
93 
94 /**
95  * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
96  * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
97  *
98  * The page-table is assumed to be unreachable by any hardware walkers prior
99  * to freeing and therefore no TLB invalidation is performed.
100  */
101 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
102 
103 /**
104  * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
105  * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
106  * @addr:	Virtual address at which to place the mapping.
107  * @size:	Size of the mapping.
108  * @phys:	Physical address of the memory to map.
109  * @prot:	Permissions and attributes for the mapping.
110  *
111  * The offset of @addr within a page is ignored, @size is rounded-up to
112  * the next page boundary and @phys is rounded-down to the previous page
113  * boundary.
114  *
115  * If device attributes are not explicitly requested in @prot, then the
116  * mapping will be normal, cacheable. Attempts to install a new mapping
117  * for a virtual address that is already mapped will be rejected with an
118  * error and a WARN().
119  *
120  * Return: 0 on success, negative error code on failure.
121  */
122 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
123 			enum kvm_pgtable_prot prot);
124 
125 /**
126  * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
127  * @pgt:	Uninitialised page-table structure to initialise.
128  * @kvm:	KVM structure representing the guest virtual machine.
129  *
130  * Return: 0 on success, negative error code on failure.
131  */
132 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
133 
134 /**
135  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
136  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
137  *
138  * The page-table is assumed to be unreachable by any hardware walkers prior
139  * to freeing and therefore no TLB invalidation is performed.
140  */
141 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
142 
143 /**
144  * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
145  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
146  * @addr:	Intermediate physical address at which to place the mapping.
147  * @size:	Size of the mapping.
148  * @phys:	Physical address of the memory to map.
149  * @prot:	Permissions and attributes for the mapping.
150  * @mc:		Cache of pre-allocated GFP_PGTABLE_USER memory from which to
151  *		allocate page-table pages.
152  *
153  * The offset of @addr within a page is ignored, @size is rounded-up to
154  * the next page boundary and @phys is rounded-down to the previous page
155  * boundary.
156  *
157  * If device attributes are not explicitly requested in @prot, then the
158  * mapping will be normal, cacheable.
159  *
160  * Note that the update of a valid leaf PTE in this function will be aborted,
161  * if it's trying to recreate the exact same mapping or only change the access
162  * permissions. Instead, the vCPU will exit one more time from guest if still
163  * needed and then go through the path of relaxing permissions.
164  *
165  * Note that this function will both coalesce existing table entries and split
166  * existing block mappings, relying on page-faults to fault back areas outside
167  * of the new mapping lazily.
168  *
169  * Return: 0 on success, negative error code on failure.
170  */
171 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
172 			   u64 phys, enum kvm_pgtable_prot prot,
173 			   struct kvm_mmu_memory_cache *mc);
174 
175 /**
176  * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
177  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
178  * @addr:	Intermediate physical address from which to remove the mapping.
179  * @size:	Size of the mapping.
180  *
181  * The offset of @addr within a page is ignored and @size is rounded-up to
182  * the next page boundary.
183  *
184  * TLB invalidation is performed for each page-table entry cleared during the
185  * unmapping operation and the reference count for the page-table page
186  * containing the cleared entry is decremented, with unreferenced pages being
187  * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
188  * FWB is not supported by the CPU.
189  *
190  * Return: 0 on success, negative error code on failure.
191  */
192 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
193 
194 /**
195  * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
196  *                                  without TLB invalidation.
197  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
198  * @addr:	Intermediate physical address from which to write-protect,
199  * @size:	Size of the range.
200  *
201  * The offset of @addr within a page is ignored and @size is rounded-up to
202  * the next page boundary.
203  *
204  * Note that it is the caller's responsibility to invalidate the TLB after
205  * calling this function to ensure that the updated permissions are visible
206  * to the CPUs.
207  *
208  * Return: 0 on success, negative error code on failure.
209  */
210 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
211 
212 /**
213  * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
214  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
215  * @addr:	Intermediate physical address to identify the page-table entry.
216  *
217  * The offset of @addr within a page is ignored.
218  *
219  * If there is a valid, leaf page-table entry used to translate @addr, then
220  * set the access flag in that entry.
221  *
222  * Return: The old page-table entry prior to setting the flag, 0 on failure.
223  */
224 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
225 
226 /**
227  * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
228  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
229  * @addr:	Intermediate physical address to identify the page-table entry.
230  *
231  * The offset of @addr within a page is ignored.
232  *
233  * If there is a valid, leaf page-table entry used to translate @addr, then
234  * clear the access flag in that entry.
235  *
236  * Note that it is the caller's responsibility to invalidate the TLB after
237  * calling this function to ensure that the updated permissions are visible
238  * to the CPUs.
239  *
240  * Return: The old page-table entry prior to clearing the flag, 0 on failure.
241  */
242 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
243 
244 /**
245  * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
246  *				      page-table entry.
247  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
248  * @addr:	Intermediate physical address to identify the page-table entry.
249  * @prot:	Additional permissions to grant for the mapping.
250  *
251  * The offset of @addr within a page is ignored.
252  *
253  * If there is a valid, leaf page-table entry used to translate @addr, then
254  * relax the permissions in that entry according to the read, write and
255  * execute permissions specified by @prot. No permissions are removed, and
256  * TLB invalidation is performed after updating the entry.
257  *
258  * Return: 0 on success, negative error code on failure.
259  */
260 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
261 				   enum kvm_pgtable_prot prot);
262 
263 /**
264  * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
265  *				   access flag set.
266  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
267  * @addr:	Intermediate physical address to identify the page-table entry.
268  *
269  * The offset of @addr within a page is ignored.
270  *
271  * Return: True if the page-table entry has the access flag set, false otherwise.
272  */
273 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
274 
275 /**
276  * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
277  * 				      of Coherency for guest stage-2 address
278  *				      range.
279  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
280  * @addr:	Intermediate physical address from which to flush.
281  * @size:	Size of the range.
282  *
283  * The offset of @addr within a page is ignored and @size is rounded-up to
284  * the next page boundary.
285  *
286  * Return: 0 on success, negative error code on failure.
287  */
288 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
289 
290 /**
291  * kvm_pgtable_walk() - Walk a page-table.
292  * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
293  * @addr:	Input address for the start of the walk.
294  * @size:	Size of the range to walk.
295  * @walker:	Walker callback description.
296  *
297  * The offset of @addr within a page is ignored and @size is rounded-up to
298  * the next page boundary.
299  *
300  * The walker will walk the page-table entries corresponding to the input
301  * address range specified, visiting entries according to the walker flags.
302  * Invalid entries are treated as leaf entries. Leaf entries are reloaded
303  * after invoking the walker callback, allowing the walker to descend into
304  * a newly installed table.
305  *
306  * Returning a negative error code from the walker callback function will
307  * terminate the walk immediately with the same error code.
308  *
309  * Return: 0 on success, negative error code on failure.
310  */
311 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
312 		     struct kvm_pgtable_walker *walker);
313 
314 #endif	/* __ARM64_KVM_PGTABLE_H__ */
315