1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2021 Google LLC 4 * Author: Fuad Tabba <tabba@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <linux/mm.h> 9 #include <nvhe/fixed_config.h> 10 #include <nvhe/mem_protect.h> 11 #include <nvhe/memory.h> 12 #include <nvhe/pkvm.h> 13 #include <nvhe/trap_handler.h> 14 15 /* 16 * Set trap register values based on features in ID_AA64PFR0. 17 */ 18 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) 19 { 20 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); 21 u64 hcr_set = HCR_RW; 22 u64 hcr_clear = 0; 23 u64 cptr_set = 0; 24 25 /* Protected KVM does not support AArch32 guests. */ 26 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), 27 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); 28 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), 29 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); 30 31 /* 32 * Linux guests assume support for floating-point and Advanced SIMD. Do 33 * not change the trapping behavior for these from the KVM default. 34 */ 35 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), 36 PVM_ID_AA64PFR0_ALLOW)); 37 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD), 38 PVM_ID_AA64PFR0_ALLOW)); 39 40 /* Trap RAS unless all current versions are supported */ 41 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) < 42 ID_AA64PFR0_EL1_RAS_V1P1) { 43 hcr_set |= HCR_TERR | HCR_TEA; 44 hcr_clear |= HCR_FIEN; 45 } 46 47 /* Trap AMU */ 48 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { 49 hcr_clear |= HCR_AMVOFFEN; 50 cptr_set |= CPTR_EL2_TAM; 51 } 52 53 /* Trap SVE */ 54 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) 55 cptr_set |= CPTR_EL2_TZ; 56 57 vcpu->arch.hcr_el2 |= hcr_set; 58 vcpu->arch.hcr_el2 &= ~hcr_clear; 59 vcpu->arch.cptr_el2 |= cptr_set; 60 } 61 62 /* 63 * Set trap register values based on features in ID_AA64PFR1. 64 */ 65 static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) 66 { 67 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); 68 u64 hcr_set = 0; 69 u64 hcr_clear = 0; 70 71 /* Memory Tagging: Trap and Treat as Untagged if not supported. */ 72 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) { 73 hcr_set |= HCR_TID5; 74 hcr_clear |= HCR_DCT | HCR_ATA; 75 } 76 77 vcpu->arch.hcr_el2 |= hcr_set; 78 vcpu->arch.hcr_el2 &= ~hcr_clear; 79 } 80 81 /* 82 * Set trap register values based on features in ID_AA64DFR0. 83 */ 84 static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) 85 { 86 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); 87 u64 mdcr_set = 0; 88 u64 mdcr_clear = 0; 89 u64 cptr_set = 0; 90 91 /* Trap/constrain PMU */ 92 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { 93 mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; 94 mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | 95 MDCR_EL2_HPMN_MASK; 96 } 97 98 /* Trap Debug */ 99 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids)) 100 mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; 101 102 /* Trap OS Double Lock */ 103 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids)) 104 mdcr_set |= MDCR_EL2_TDOSA; 105 106 /* Trap SPE */ 107 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { 108 mdcr_set |= MDCR_EL2_TPMS; 109 mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; 110 } 111 112 /* Trap Trace Filter */ 113 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) 114 mdcr_set |= MDCR_EL2_TTRF; 115 116 /* Trap Trace */ 117 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) 118 cptr_set |= CPTR_EL2_TTA; 119 120 vcpu->arch.mdcr_el2 |= mdcr_set; 121 vcpu->arch.mdcr_el2 &= ~mdcr_clear; 122 vcpu->arch.cptr_el2 |= cptr_set; 123 } 124 125 /* 126 * Set trap register values based on features in ID_AA64MMFR0. 127 */ 128 static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) 129 { 130 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); 131 u64 mdcr_set = 0; 132 133 /* Trap Debug Communications Channel registers */ 134 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids)) 135 mdcr_set |= MDCR_EL2_TDCC; 136 137 vcpu->arch.mdcr_el2 |= mdcr_set; 138 } 139 140 /* 141 * Set trap register values based on features in ID_AA64MMFR1. 142 */ 143 static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) 144 { 145 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); 146 u64 hcr_set = 0; 147 148 /* Trap LOR */ 149 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids)) 150 hcr_set |= HCR_TLOR; 151 152 vcpu->arch.hcr_el2 |= hcr_set; 153 } 154 155 /* 156 * Set baseline trap register values. 157 */ 158 static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) 159 { 160 const u64 hcr_trap_feat_regs = HCR_TID3; 161 const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; 162 163 /* 164 * Always trap: 165 * - Feature id registers: to control features exposed to guests 166 * - Implementation-defined features 167 */ 168 vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; 169 170 /* Clear res0 and set res1 bits to trap potential new features. */ 171 vcpu->arch.hcr_el2 &= ~(HCR_RES0); 172 vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); 173 vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; 174 vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); 175 } 176 177 /* 178 * Initialize trap register values for protected VMs. 179 */ 180 void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) 181 { 182 pvm_init_trap_regs(vcpu); 183 pvm_init_traps_aa64pfr0(vcpu); 184 pvm_init_traps_aa64pfr1(vcpu); 185 pvm_init_traps_aa64dfr0(vcpu); 186 pvm_init_traps_aa64mmfr0(vcpu); 187 pvm_init_traps_aa64mmfr1(vcpu); 188 } 189 190 /* 191 * Start the VM table handle at the offset defined instead of at 0. 192 * Mainly for sanity checking and debugging. 193 */ 194 #define HANDLE_OFFSET 0x1000 195 196 static unsigned int vm_handle_to_idx(pkvm_handle_t handle) 197 { 198 return handle - HANDLE_OFFSET; 199 } 200 201 static pkvm_handle_t idx_to_vm_handle(unsigned int idx) 202 { 203 return idx + HANDLE_OFFSET; 204 } 205 206 /* 207 * Spinlock for protecting state related to the VM table. Protects writes 208 * to 'vm_table' and 'nr_table_entries' as well as reads and writes to 209 * 'last_hyp_vcpu_lookup'. 210 */ 211 static DEFINE_HYP_SPINLOCK(vm_table_lock); 212 213 /* 214 * The table of VM entries for protected VMs in hyp. 215 * Allocated at hyp initialization and setup. 216 */ 217 static struct pkvm_hyp_vm **vm_table; 218 219 void pkvm_hyp_vm_table_init(void *tbl) 220 { 221 WARN_ON(vm_table); 222 vm_table = tbl; 223 } 224 225 /* 226 * Return the hyp vm structure corresponding to the handle. 227 */ 228 static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) 229 { 230 unsigned int idx = vm_handle_to_idx(handle); 231 232 if (unlikely(idx >= KVM_MAX_PVMS)) 233 return NULL; 234 235 return vm_table[idx]; 236 } 237 238 static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) 239 { 240 if (host_vcpu) 241 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); 242 } 243 244 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], 245 unsigned int nr_vcpus) 246 { 247 int i; 248 249 for (i = 0; i < nr_vcpus; i++) 250 unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); 251 } 252 253 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, 254 unsigned int nr_vcpus) 255 { 256 hyp_vm->host_kvm = host_kvm; 257 hyp_vm->kvm.created_vcpus = nr_vcpus; 258 hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; 259 } 260 261 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, 262 struct pkvm_hyp_vm *hyp_vm, 263 struct kvm_vcpu *host_vcpu, 264 unsigned int vcpu_idx) 265 { 266 int ret = 0; 267 268 if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) 269 return -EBUSY; 270 271 if (host_vcpu->vcpu_idx != vcpu_idx) { 272 ret = -EINVAL; 273 goto done; 274 } 275 276 hyp_vcpu->host_vcpu = host_vcpu; 277 278 hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; 279 hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); 280 hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; 281 282 hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; 283 done: 284 if (ret) 285 unpin_host_vcpu(host_vcpu); 286 return ret; 287 } 288 289 static int find_free_vm_table_entry(struct kvm *host_kvm) 290 { 291 int i; 292 293 for (i = 0; i < KVM_MAX_PVMS; ++i) { 294 if (!vm_table[i]) 295 return i; 296 } 297 298 return -ENOMEM; 299 } 300 301 /* 302 * Allocate a VM table entry and insert a pointer to the new vm. 303 * 304 * Return a unique handle to the protected VM on success, 305 * negative error code on failure. 306 */ 307 static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, 308 struct pkvm_hyp_vm *hyp_vm) 309 { 310 struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; 311 int idx; 312 313 hyp_assert_lock_held(&vm_table_lock); 314 315 /* 316 * Initializing protected state might have failed, yet a malicious 317 * host could trigger this function. Thus, ensure that 'vm_table' 318 * exists. 319 */ 320 if (unlikely(!vm_table)) 321 return -EINVAL; 322 323 idx = find_free_vm_table_entry(host_kvm); 324 if (idx < 0) 325 return idx; 326 327 hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); 328 329 /* VMID 0 is reserved for the host */ 330 atomic64_set(&mmu->vmid.id, idx + 1); 331 332 mmu->arch = &hyp_vm->kvm.arch; 333 mmu->pgt = &hyp_vm->pgt; 334 335 vm_table[idx] = hyp_vm; 336 return hyp_vm->kvm.arch.pkvm.handle; 337 } 338 339 /* 340 * Deallocate and remove the VM table entry corresponding to the handle. 341 */ 342 static void remove_vm_table_entry(pkvm_handle_t handle) 343 { 344 hyp_assert_lock_held(&vm_table_lock); 345 vm_table[vm_handle_to_idx(handle)] = NULL; 346 } 347 348 static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) 349 { 350 return size_add(sizeof(struct pkvm_hyp_vm), 351 size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); 352 } 353 354 static void *map_donated_memory_noclear(unsigned long host_va, size_t size) 355 { 356 void *va = (void *)kern_hyp_va(host_va); 357 358 if (!PAGE_ALIGNED(va)) 359 return NULL; 360 361 if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), 362 PAGE_ALIGN(size) >> PAGE_SHIFT)) 363 return NULL; 364 365 return va; 366 } 367 368 static void *map_donated_memory(unsigned long host_va, size_t size) 369 { 370 void *va = map_donated_memory_noclear(host_va, size); 371 372 if (va) 373 memset(va, 0, size); 374 375 return va; 376 } 377 378 static void __unmap_donated_memory(void *va, size_t size) 379 { 380 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), 381 PAGE_ALIGN(size) >> PAGE_SHIFT)); 382 } 383 384 static void unmap_donated_memory(void *va, size_t size) 385 { 386 if (!va) 387 return; 388 389 memset(va, 0, size); 390 __unmap_donated_memory(va, size); 391 } 392 393 static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) 394 { 395 if (!va) 396 return; 397 398 __unmap_donated_memory(va, size); 399 } 400 401 /* 402 * Initialize the hypervisor copy of the protected VM state using the 403 * memory donated by the host. 404 * 405 * Unmaps the donated memory from the host at stage 2. 406 * 407 * host_kvm: A pointer to the host's struct kvm. 408 * vm_hva: The host va of the area being donated for the VM state. 409 * Must be page aligned. 410 * pgd_hva: The host va of the area being donated for the stage-2 PGD for 411 * the VM. Must be page aligned. Its size is implied by the VM's 412 * VTCR. 413 * 414 * Return a unique handle to the protected VM on success, 415 * negative error code on failure. 416 */ 417 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, 418 unsigned long pgd_hva) 419 { 420 struct pkvm_hyp_vm *hyp_vm = NULL; 421 size_t vm_size, pgd_size; 422 unsigned int nr_vcpus; 423 void *pgd = NULL; 424 int ret; 425 426 ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); 427 if (ret) 428 return ret; 429 430 nr_vcpus = READ_ONCE(host_kvm->created_vcpus); 431 if (nr_vcpus < 1) { 432 ret = -EINVAL; 433 goto err_unpin_kvm; 434 } 435 436 vm_size = pkvm_get_hyp_vm_size(nr_vcpus); 437 pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); 438 439 ret = -ENOMEM; 440 441 hyp_vm = map_donated_memory(vm_hva, vm_size); 442 if (!hyp_vm) 443 goto err_remove_mappings; 444 445 pgd = map_donated_memory_noclear(pgd_hva, pgd_size); 446 if (!pgd) 447 goto err_remove_mappings; 448 449 init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); 450 451 hyp_spin_lock(&vm_table_lock); 452 ret = insert_vm_table_entry(host_kvm, hyp_vm); 453 if (ret < 0) 454 goto err_unlock; 455 456 ret = kvm_guest_prepare_stage2(hyp_vm, pgd); 457 if (ret) 458 goto err_remove_vm_table_entry; 459 hyp_spin_unlock(&vm_table_lock); 460 461 return hyp_vm->kvm.arch.pkvm.handle; 462 463 err_remove_vm_table_entry: 464 remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); 465 err_unlock: 466 hyp_spin_unlock(&vm_table_lock); 467 err_remove_mappings: 468 unmap_donated_memory(hyp_vm, vm_size); 469 unmap_donated_memory(pgd, pgd_size); 470 err_unpin_kvm: 471 hyp_unpin_shared_mem(host_kvm, host_kvm + 1); 472 return ret; 473 } 474 475 /* 476 * Initialize the hypervisor copy of the protected vCPU state using the 477 * memory donated by the host. 478 * 479 * handle: The handle for the protected vm. 480 * host_vcpu: A pointer to the corresponding host vcpu. 481 * vcpu_hva: The host va of the area being donated for the vcpu state. 482 * Must be page aligned. The size of the area must be equal to 483 * the page-aligned size of 'struct pkvm_hyp_vcpu'. 484 * Return 0 on success, negative error code on failure. 485 */ 486 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, 487 unsigned long vcpu_hva) 488 { 489 struct pkvm_hyp_vcpu *hyp_vcpu; 490 struct pkvm_hyp_vm *hyp_vm; 491 unsigned int idx; 492 int ret; 493 494 hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); 495 if (!hyp_vcpu) 496 return -ENOMEM; 497 498 hyp_spin_lock(&vm_table_lock); 499 500 hyp_vm = get_vm_by_handle(handle); 501 if (!hyp_vm) { 502 ret = -ENOENT; 503 goto unlock; 504 } 505 506 idx = hyp_vm->nr_vcpus; 507 if (idx >= hyp_vm->kvm.created_vcpus) { 508 ret = -EINVAL; 509 goto unlock; 510 } 511 512 ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); 513 if (ret) 514 goto unlock; 515 516 hyp_vm->vcpus[idx] = hyp_vcpu; 517 hyp_vm->nr_vcpus++; 518 unlock: 519 hyp_spin_unlock(&vm_table_lock); 520 521 if (ret) 522 unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); 523 524 return ret; 525 } 526 527 int __pkvm_teardown_vm(pkvm_handle_t handle) 528 { 529 struct pkvm_hyp_vm *hyp_vm; 530 struct kvm *host_kvm; 531 unsigned int idx; 532 size_t vm_size; 533 int err; 534 535 hyp_spin_lock(&vm_table_lock); 536 hyp_vm = get_vm_by_handle(handle); 537 if (!hyp_vm) { 538 err = -ENOENT; 539 goto err_unlock; 540 } 541 542 if (WARN_ON(hyp_page_count(hyp_vm))) { 543 err = -EBUSY; 544 goto err_unlock; 545 } 546 547 /* Ensure the VMID is clean before it can be reallocated */ 548 __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); 549 remove_vm_table_entry(handle); 550 hyp_spin_unlock(&vm_table_lock); 551 552 /* Reclaim guest pages (including page-table pages) */ 553 reclaim_guest_pages(hyp_vm); 554 unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); 555 556 /* Return the metadata pages to the host */ 557 for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { 558 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; 559 560 unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); 561 } 562 563 host_kvm = hyp_vm->host_kvm; 564 vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); 565 unmap_donated_memory(hyp_vm, vm_size); 566 hyp_unpin_shared_mem(host_kvm, host_kvm + 1); 567 return 0; 568 569 err_unlock: 570 hyp_spin_unlock(&vm_table_lock); 571 return err; 572 } 573