1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Will Deacon <will@kernel.org> 5 */ 6 7 #ifndef __ARM64_KVM_PGTABLE_H__ 8 #define __ARM64_KVM_PGTABLE_H__ 9 10 #include <linux/bits.h> 11 #include <linux/kvm_host.h> 12 #include <linux/types.h> 13 14 #define KVM_PGTABLE_MAX_LEVELS 4U 15 16 static inline u64 kvm_get_parange(u64 mmfr0) 17 { 18 u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, 19 ID_AA64MMFR0_PARANGE_SHIFT); 20 if (parange > ID_AA64MMFR0_PARANGE_MAX) 21 parange = ID_AA64MMFR0_PARANGE_MAX; 22 23 return parange; 24 } 25 26 typedef u64 kvm_pte_t; 27 28 /** 29 * struct kvm_pgtable_mm_ops - Memory management callbacks. 30 * @zalloc_page: Allocate a single zeroed memory page. 31 * The @arg parameter can be used by the walker 32 * to pass a memcache. The initial refcount of 33 * the page is 1. 34 * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. 35 * The @size parameter is in bytes, and is rounded 36 * up to the next page boundary. The resulting 37 * allocation is physically contiguous. 38 * @free_pages_exact: Free an exact number of memory pages previously 39 * allocated by zalloc_pages_exact. 40 * @get_page: Increment the refcount on a page. 41 * @put_page: Decrement the refcount on a page. When the 42 * refcount reaches 0 the page is automatically 43 * freed. 44 * @page_count: Return the refcount of a page. 45 * @phys_to_virt: Convert a physical address into a virtual 46 * address mapped in the current context. 47 * @virt_to_phys: Convert a virtual address mapped in the current 48 * context into a physical address. 49 * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC 50 * for the specified memory address range. 51 * @icache_inval_pou: Invalidate the instruction cache to the PoU 52 * for the specified memory address range. 53 */ 54 struct kvm_pgtable_mm_ops { 55 void* (*zalloc_page)(void *arg); 56 void* (*zalloc_pages_exact)(size_t size); 57 void (*free_pages_exact)(void *addr, size_t size); 58 void (*get_page)(void *addr); 59 void (*put_page)(void *addr); 60 int (*page_count)(void *addr); 61 void* (*phys_to_virt)(phys_addr_t phys); 62 phys_addr_t (*virt_to_phys)(void *addr); 63 void (*dcache_clean_inval_poc)(void *addr, size_t size); 64 void (*icache_inval_pou)(void *addr, size_t size); 65 }; 66 67 /** 68 * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. 69 * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have 70 * ARM64_HAS_STAGE2_FWB. 71 * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. 72 */ 73 enum kvm_pgtable_stage2_flags { 74 KVM_PGTABLE_S2_NOFWB = BIT(0), 75 KVM_PGTABLE_S2_IDMAP = BIT(1), 76 }; 77 78 /** 79 * struct kvm_pgtable - KVM page-table. 80 * @ia_bits: Maximum input address size, in bits. 81 * @start_level: Level at which the page-table walk starts. 82 * @pgd: Pointer to the first top-level entry of the page-table. 83 * @mm_ops: Memory management callbacks. 84 * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. 85 */ 86 struct kvm_pgtable { 87 u32 ia_bits; 88 u32 start_level; 89 kvm_pte_t *pgd; 90 struct kvm_pgtable_mm_ops *mm_ops; 91 92 /* Stage-2 only */ 93 struct kvm_s2_mmu *mmu; 94 enum kvm_pgtable_stage2_flags flags; 95 }; 96 97 /** 98 * enum kvm_pgtable_prot - Page-table permissions and attributes. 99 * @KVM_PGTABLE_PROT_X: Execute permission. 100 * @KVM_PGTABLE_PROT_W: Write permission. 101 * @KVM_PGTABLE_PROT_R: Read permission. 102 * @KVM_PGTABLE_PROT_DEVICE: Device attributes. 103 */ 104 enum kvm_pgtable_prot { 105 KVM_PGTABLE_PROT_X = BIT(0), 106 KVM_PGTABLE_PROT_W = BIT(1), 107 KVM_PGTABLE_PROT_R = BIT(2), 108 109 KVM_PGTABLE_PROT_DEVICE = BIT(3), 110 }; 111 112 #define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) 113 #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) 114 #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) 115 #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) 116 117 /** 118 * struct kvm_mem_range - Range of Intermediate Physical Addresses 119 * @start: Start of the range. 120 * @end: End of the range. 121 */ 122 struct kvm_mem_range { 123 u64 start; 124 u64 end; 125 }; 126 127 /** 128 * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. 129 * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid 130 * entries. 131 * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their 132 * children. 133 * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their 134 * children. 135 */ 136 enum kvm_pgtable_walk_flags { 137 KVM_PGTABLE_WALK_LEAF = BIT(0), 138 KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), 139 KVM_PGTABLE_WALK_TABLE_POST = BIT(2), 140 }; 141 142 typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, 143 kvm_pte_t *ptep, 144 enum kvm_pgtable_walk_flags flag, 145 void * const arg); 146 147 /** 148 * struct kvm_pgtable_walker - Hook into a page-table walk. 149 * @cb: Callback function to invoke during the walk. 150 * @arg: Argument passed to the callback function. 151 * @flags: Bitwise-OR of flags to identify the entry types on which to 152 * invoke the callback function. 153 */ 154 struct kvm_pgtable_walker { 155 const kvm_pgtable_visitor_fn_t cb; 156 void * const arg; 157 const enum kvm_pgtable_walk_flags flags; 158 }; 159 160 /** 161 * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. 162 * @pgt: Uninitialised page-table structure to initialise. 163 * @va_bits: Maximum virtual address bits. 164 * @mm_ops: Memory management callbacks. 165 * 166 * Return: 0 on success, negative error code on failure. 167 */ 168 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, 169 struct kvm_pgtable_mm_ops *mm_ops); 170 171 /** 172 * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. 173 * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). 174 * 175 * The page-table is assumed to be unreachable by any hardware walkers prior 176 * to freeing and therefore no TLB invalidation is performed. 177 */ 178 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); 179 180 /** 181 * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. 182 * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). 183 * @addr: Virtual address at which to place the mapping. 184 * @size: Size of the mapping. 185 * @phys: Physical address of the memory to map. 186 * @prot: Permissions and attributes for the mapping. 187 * 188 * The offset of @addr within a page is ignored, @size is rounded-up to 189 * the next page boundary and @phys is rounded-down to the previous page 190 * boundary. 191 * 192 * If device attributes are not explicitly requested in @prot, then the 193 * mapping will be normal, cacheable. Attempts to install a new mapping 194 * for a virtual address that is already mapped will be rejected with an 195 * error and a WARN(). 196 * 197 * Return: 0 on success, negative error code on failure. 198 */ 199 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 200 enum kvm_pgtable_prot prot); 201 202 /** 203 * kvm_get_vtcr() - Helper to construct VTCR_EL2 204 * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. 205 * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register. 206 * @phys_shfit: Value to set in VTCR_EL2.T0SZ. 207 * 208 * The VTCR value is common across all the physical CPUs on the system. 209 * We use system wide sanitised values to fill in different fields, 210 * except for Hardware Management of Access Flags. HA Flag is set 211 * unconditionally on all CPUs, as it is safe to run with or without 212 * the feature and the bit is RES0 on CPUs that don't support it. 213 * 214 * Return: VTCR_EL2 value 215 */ 216 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); 217 218 /** 219 * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table. 220 * @pgt: Uninitialised page-table structure to initialise. 221 * @arch: Arch-specific KVM structure representing the guest virtual 222 * machine. 223 * @mm_ops: Memory management callbacks. 224 * @flags: Stage-2 configuration flags. 225 * 226 * Return: 0 on success, negative error code on failure. 227 */ 228 int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, 229 struct kvm_pgtable_mm_ops *mm_ops, 230 enum kvm_pgtable_stage2_flags flags); 231 232 #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ 233 kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0) 234 235 /** 236 * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. 237 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 238 * 239 * The page-table is assumed to be unreachable by any hardware walkers prior 240 * to freeing and therefore no TLB invalidation is performed. 241 */ 242 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 243 244 /** 245 * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. 246 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 247 * @addr: Intermediate physical address at which to place the mapping. 248 * @size: Size of the mapping. 249 * @phys: Physical address of the memory to map. 250 * @prot: Permissions and attributes for the mapping. 251 * @mc: Cache of pre-allocated and zeroed memory from which to allocate 252 * page-table pages. 253 * 254 * The offset of @addr within a page is ignored, @size is rounded-up to 255 * the next page boundary and @phys is rounded-down to the previous page 256 * boundary. 257 * 258 * If device attributes are not explicitly requested in @prot, then the 259 * mapping will be normal, cacheable. 260 * 261 * Note that the update of a valid leaf PTE in this function will be aborted, 262 * if it's trying to recreate the exact same mapping or only change the access 263 * permissions. Instead, the vCPU will exit one more time from guest if still 264 * needed and then go through the path of relaxing permissions. 265 * 266 * Note that this function will both coalesce existing table entries and split 267 * existing block mappings, relying on page-faults to fault back areas outside 268 * of the new mapping lazily. 269 * 270 * Return: 0 on success, negative error code on failure. 271 */ 272 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 273 u64 phys, enum kvm_pgtable_prot prot, 274 void *mc); 275 276 /** 277 * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to 278 * track ownership. 279 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 280 * @addr: Base intermediate physical address to annotate. 281 * @size: Size of the annotated range. 282 * @mc: Cache of pre-allocated and zeroed memory from which to allocate 283 * page-table pages. 284 * @owner_id: Unique identifier for the owner of the page. 285 * 286 * By default, all page-tables are owned by identifier 0. This function can be 287 * used to mark portions of the IPA space as owned by other entities. When a 288 * stage 2 is used with identity-mappings, these annotations allow to use the 289 * page-table data structure as a simple rmap. 290 * 291 * Return: 0 on success, negative error code on failure. 292 */ 293 int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, 294 void *mc, u8 owner_id); 295 296 /** 297 * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. 298 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 299 * @addr: Intermediate physical address from which to remove the mapping. 300 * @size: Size of the mapping. 301 * 302 * The offset of @addr within a page is ignored and @size is rounded-up to 303 * the next page boundary. 304 * 305 * TLB invalidation is performed for each page-table entry cleared during the 306 * unmapping operation and the reference count for the page-table page 307 * containing the cleared entry is decremented, with unreferenced pages being 308 * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if 309 * FWB is not supported by the CPU. 310 * 311 * Return: 0 on success, negative error code on failure. 312 */ 313 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); 314 315 /** 316 * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range 317 * without TLB invalidation. 318 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 319 * @addr: Intermediate physical address from which to write-protect, 320 * @size: Size of the range. 321 * 322 * The offset of @addr within a page is ignored and @size is rounded-up to 323 * the next page boundary. 324 * 325 * Note that it is the caller's responsibility to invalidate the TLB after 326 * calling this function to ensure that the updated permissions are visible 327 * to the CPUs. 328 * 329 * Return: 0 on success, negative error code on failure. 330 */ 331 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); 332 333 /** 334 * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. 335 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 336 * @addr: Intermediate physical address to identify the page-table entry. 337 * 338 * The offset of @addr within a page is ignored. 339 * 340 * If there is a valid, leaf page-table entry used to translate @addr, then 341 * set the access flag in that entry. 342 * 343 * Return: The old page-table entry prior to setting the flag, 0 on failure. 344 */ 345 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); 346 347 /** 348 * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. 349 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 350 * @addr: Intermediate physical address to identify the page-table entry. 351 * 352 * The offset of @addr within a page is ignored. 353 * 354 * If there is a valid, leaf page-table entry used to translate @addr, then 355 * clear the access flag in that entry. 356 * 357 * Note that it is the caller's responsibility to invalidate the TLB after 358 * calling this function to ensure that the updated permissions are visible 359 * to the CPUs. 360 * 361 * Return: The old page-table entry prior to clearing the flag, 0 on failure. 362 */ 363 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); 364 365 /** 366 * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a 367 * page-table entry. 368 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 369 * @addr: Intermediate physical address to identify the page-table entry. 370 * @prot: Additional permissions to grant for the mapping. 371 * 372 * The offset of @addr within a page is ignored. 373 * 374 * If there is a valid, leaf page-table entry used to translate @addr, then 375 * relax the permissions in that entry according to the read, write and 376 * execute permissions specified by @prot. No permissions are removed, and 377 * TLB invalidation is performed after updating the entry. 378 * 379 * Return: 0 on success, negative error code on failure. 380 */ 381 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 382 enum kvm_pgtable_prot prot); 383 384 /** 385 * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the 386 * access flag set. 387 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 388 * @addr: Intermediate physical address to identify the page-table entry. 389 * 390 * The offset of @addr within a page is ignored. 391 * 392 * Return: True if the page-table entry has the access flag set, false otherwise. 393 */ 394 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); 395 396 /** 397 * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point 398 * of Coherency for guest stage-2 address 399 * range. 400 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 401 * @addr: Intermediate physical address from which to flush. 402 * @size: Size of the range. 403 * 404 * The offset of @addr within a page is ignored and @size is rounded-up to 405 * the next page boundary. 406 * 407 * Return: 0 on success, negative error code on failure. 408 */ 409 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); 410 411 /** 412 * kvm_pgtable_walk() - Walk a page-table. 413 * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). 414 * @addr: Input address for the start of the walk. 415 * @size: Size of the range to walk. 416 * @walker: Walker callback description. 417 * 418 * The offset of @addr within a page is ignored and @size is rounded-up to 419 * the next page boundary. 420 * 421 * The walker will walk the page-table entries corresponding to the input 422 * address range specified, visiting entries according to the walker flags. 423 * Invalid entries are treated as leaf entries. Leaf entries are reloaded 424 * after invoking the walker callback, allowing the walker to descend into 425 * a newly installed table. 426 * 427 * Returning a negative error code from the walker callback function will 428 * terminate the walk immediately with the same error code. 429 * 430 * Return: 0 on success, negative error code on failure. 431 */ 432 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 433 struct kvm_pgtable_walker *walker); 434 435 /** 436 * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical 437 * Addresses with compatible permission 438 * attributes. 439 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 440 * @addr: Address that must be covered by the range. 441 * @prot: Protection attributes that the range must be compatible with. 442 * @range: Range structure used to limit the search space at call time and 443 * that will hold the result. 444 * 445 * The offset of @addr within a page is ignored. An IPA is compatible with @prot 446 * iff its corresponding stage-2 page-table entry has default ownership and, if 447 * valid, is mapped with protection attributes identical to @prot. 448 * 449 * Return: 0 on success, negative error code on failure. 450 */ 451 int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, 452 enum kvm_pgtable_prot prot, 453 struct kvm_mem_range *range); 454 #endif /* __ARM64_KVM_PGTABLE_H__ */ 455