1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <asm/kvm_emulate.h> 9 #include <asm/kvm_hyp.h> 10 #include <asm/kvm_mmu.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/kvm_pkvm.h> 13 #include <asm/stage2_pgtable.h> 14 15 #include <hyp/fault.h> 16 17 #include <nvhe/gfp.h> 18 #include <nvhe/memory.h> 19 #include <nvhe/mem_protect.h> 20 #include <nvhe/mm.h> 21 22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) 23 24 struct host_mmu host_mmu; 25 26 static struct hyp_pool host_s2_pool; 27 28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); 29 #define current_vm (*this_cpu_ptr(&__current_vm)) 30 31 static void guest_lock_component(struct pkvm_hyp_vm *vm) 32 { 33 hyp_spin_lock(&vm->lock); 34 current_vm = vm; 35 } 36 37 static void guest_unlock_component(struct pkvm_hyp_vm *vm) 38 { 39 current_vm = NULL; 40 hyp_spin_unlock(&vm->lock); 41 } 42 43 static void host_lock_component(void) 44 { 45 hyp_spin_lock(&host_mmu.lock); 46 } 47 48 static void host_unlock_component(void) 49 { 50 hyp_spin_unlock(&host_mmu.lock); 51 } 52 53 static void hyp_lock_component(void) 54 { 55 hyp_spin_lock(&pkvm_pgd_lock); 56 } 57 58 static void hyp_unlock_component(void) 59 { 60 hyp_spin_unlock(&pkvm_pgd_lock); 61 } 62 63 static void *host_s2_zalloc_pages_exact(size_t size) 64 { 65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); 66 67 hyp_split_page(hyp_virt_to_page(addr)); 68 69 /* 70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE, 71 * so there should be no need to free any of the tail pages to make the 72 * allocation exact. 73 */ 74 WARN_ON(size != (PAGE_SIZE << get_order(size))); 75 76 return addr; 77 } 78 79 static void *host_s2_zalloc_page(void *pool) 80 { 81 return hyp_alloc_pages(pool, 0); 82 } 83 84 static void host_s2_get_page(void *addr) 85 { 86 hyp_get_page(&host_s2_pool, addr); 87 } 88 89 static void host_s2_put_page(void *addr) 90 { 91 hyp_put_page(&host_s2_pool, addr); 92 } 93 94 static void host_s2_free_removed_table(void *addr, u32 level) 95 { 96 kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level); 97 } 98 99 static int prepare_s2_pool(void *pgt_pool_base) 100 { 101 unsigned long nr_pages, pfn; 102 int ret; 103 104 pfn = hyp_virt_to_pfn(pgt_pool_base); 105 nr_pages = host_s2_pgtable_pages(); 106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); 107 if (ret) 108 return ret; 109 110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { 111 .zalloc_pages_exact = host_s2_zalloc_pages_exact, 112 .zalloc_page = host_s2_zalloc_page, 113 .free_removed_table = host_s2_free_removed_table, 114 .phys_to_virt = hyp_phys_to_virt, 115 .virt_to_phys = hyp_virt_to_phys, 116 .page_count = hyp_page_count, 117 .get_page = host_s2_get_page, 118 .put_page = host_s2_put_page, 119 }; 120 121 return 0; 122 } 123 124 static void prepare_host_vtcr(void) 125 { 126 u32 parange, phys_shift; 127 128 /* The host stage 2 is id-mapped, so use parange for T0SZ */ 129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); 130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); 131 132 host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, 133 id_aa64mmfr1_el1_sys_val, phys_shift); 134 } 135 136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); 137 138 int kvm_host_prepare_stage2(void *pgt_pool_base) 139 { 140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 141 int ret; 142 143 prepare_host_vtcr(); 144 hyp_spin_lock_init(&host_mmu.lock); 145 mmu->arch = &host_mmu.arch; 146 147 ret = prepare_s2_pool(pgt_pool_base); 148 if (ret) 149 return ret; 150 151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, 152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, 153 host_stage2_force_pte_cb); 154 if (ret) 155 return ret; 156 157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); 158 mmu->pgt = &host_mmu.pgt; 159 atomic64_set(&mmu->vmid.id, 0); 160 161 return 0; 162 } 163 164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 165 enum kvm_pgtable_prot prot) 166 { 167 return true; 168 } 169 170 static void *guest_s2_zalloc_pages_exact(size_t size) 171 { 172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); 173 174 WARN_ON(size != (PAGE_SIZE << get_order(size))); 175 hyp_split_page(hyp_virt_to_page(addr)); 176 177 return addr; 178 } 179 180 static void guest_s2_free_pages_exact(void *addr, unsigned long size) 181 { 182 u8 order = get_order(size); 183 unsigned int i; 184 185 for (i = 0; i < (1 << order); i++) 186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); 187 } 188 189 static void *guest_s2_zalloc_page(void *mc) 190 { 191 struct hyp_page *p; 192 void *addr; 193 194 addr = hyp_alloc_pages(¤t_vm->pool, 0); 195 if (addr) 196 return addr; 197 198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt); 199 if (!addr) 200 return addr; 201 202 memset(addr, 0, PAGE_SIZE); 203 p = hyp_virt_to_page(addr); 204 memset(p, 0, sizeof(*p)); 205 p->refcount = 1; 206 207 return addr; 208 } 209 210 static void guest_s2_get_page(void *addr) 211 { 212 hyp_get_page(¤t_vm->pool, addr); 213 } 214 215 static void guest_s2_put_page(void *addr) 216 { 217 hyp_put_page(¤t_vm->pool, addr); 218 } 219 220 static void clean_dcache_guest_page(void *va, size_t size) 221 { 222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 223 hyp_fixmap_unmap(); 224 } 225 226 static void invalidate_icache_guest_page(void *va, size_t size) 227 { 228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 229 hyp_fixmap_unmap(); 230 } 231 232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) 233 { 234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; 235 unsigned long nr_pages; 236 int ret; 237 238 nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; 239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); 240 if (ret) 241 return ret; 242 243 hyp_spin_lock_init(&vm->lock); 244 vm->mm_ops = (struct kvm_pgtable_mm_ops) { 245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact, 246 .free_pages_exact = guest_s2_free_pages_exact, 247 .zalloc_page = guest_s2_zalloc_page, 248 .phys_to_virt = hyp_phys_to_virt, 249 .virt_to_phys = hyp_virt_to_phys, 250 .page_count = hyp_page_count, 251 .get_page = guest_s2_get_page, 252 .put_page = guest_s2_put_page, 253 .dcache_clean_inval_poc = clean_dcache_guest_page, 254 .icache_inval_pou = invalidate_icache_guest_page, 255 }; 256 257 guest_lock_component(vm); 258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 259 guest_stage2_force_pte_cb); 260 guest_unlock_component(vm); 261 if (ret) 262 return ret; 263 264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); 265 266 return 0; 267 } 268 269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 { 271 void *addr; 272 273 /* Dump all pgtable pages in the hyp_pool */ 274 guest_lock_component(vm); 275 kvm_pgtable_stage2_destroy(&vm->pgt); 276 vm->kvm.arch.mmu.pgd_phys = 0ULL; 277 guest_unlock_component(vm); 278 279 /* Drain the hyp_pool into the memcache */ 280 addr = hyp_alloc_pages(&vm->pool, 0); 281 while (addr) { 282 memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); 283 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 284 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 285 addr = hyp_alloc_pages(&vm->pool, 0); 286 } 287 } 288 289 int __pkvm_prot_finalize(void) 290 { 291 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 292 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 293 294 if (params->hcr_el2 & HCR_VM) 295 return -EPERM; 296 297 params->vttbr = kvm_get_vttbr(mmu); 298 params->vtcr = host_mmu.arch.vtcr; 299 params->hcr_el2 |= HCR_VM; 300 301 /* 302 * The CMO below not only cleans the updated params to the 303 * PoC, but also provides the DSB that ensures ongoing 304 * page-table walks that have started before we trapped to EL2 305 * have completed. 306 */ 307 kvm_flush_dcache_to_poc(params, sizeof(*params)); 308 309 write_sysreg(params->hcr_el2, hcr_el2); 310 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); 311 312 /* 313 * Make sure to have an ISB before the TLB maintenance below but only 314 * when __load_stage2() doesn't include one already. 315 */ 316 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 317 318 /* Invalidate stale HCR bits that may be cached in TLBs */ 319 __tlbi(vmalls12e1); 320 dsb(nsh); 321 isb(); 322 323 return 0; 324 } 325 326 static int host_stage2_unmap_dev_all(void) 327 { 328 struct kvm_pgtable *pgt = &host_mmu.pgt; 329 struct memblock_region *reg; 330 u64 addr = 0; 331 int i, ret; 332 333 /* Unmap all non-memory regions to recycle the pages */ 334 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { 335 reg = &hyp_memory[i]; 336 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); 337 if (ret) 338 return ret; 339 } 340 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 341 } 342 343 struct kvm_mem_range { 344 u64 start; 345 u64 end; 346 }; 347 348 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) 349 { 350 int cur, left = 0, right = hyp_memblock_nr; 351 struct memblock_region *reg; 352 phys_addr_t end; 353 354 range->start = 0; 355 range->end = ULONG_MAX; 356 357 /* The list of memblock regions is sorted, binary search it */ 358 while (left < right) { 359 cur = (left + right) >> 1; 360 reg = &hyp_memory[cur]; 361 end = reg->base + reg->size; 362 if (addr < reg->base) { 363 right = cur; 364 range->end = reg->base; 365 } else if (addr >= end) { 366 left = cur + 1; 367 range->start = end; 368 } else { 369 range->start = reg->base; 370 range->end = end; 371 return reg; 372 } 373 } 374 375 return NULL; 376 } 377 378 bool addr_is_memory(phys_addr_t phys) 379 { 380 struct kvm_mem_range range; 381 382 return !!find_mem_range(phys, &range); 383 } 384 385 static bool addr_is_allowed_memory(phys_addr_t phys) 386 { 387 struct memblock_region *reg; 388 struct kvm_mem_range range; 389 390 reg = find_mem_range(phys, &range); 391 392 return reg && !(reg->flags & MEMBLOCK_NOMAP); 393 } 394 395 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 396 { 397 return range->start <= addr && addr < range->end; 398 } 399 400 static bool range_is_memory(u64 start, u64 end) 401 { 402 struct kvm_mem_range r; 403 404 if (!find_mem_range(start, &r)) 405 return false; 406 407 return is_in_mem_range(end - 1, &r); 408 } 409 410 static inline int __host_stage2_idmap(u64 start, u64 end, 411 enum kvm_pgtable_prot prot) 412 { 413 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, 414 prot, &host_s2_pool, 0); 415 } 416 417 /* 418 * The pool has been provided with enough pages to cover all of memory with 419 * page granularity, but it is difficult to know how much of the MMIO range 420 * we will need to cover upfront, so we may need to 'recycle' the pages if we 421 * run out. 422 */ 423 #define host_stage2_try(fn, ...) \ 424 ({ \ 425 int __ret; \ 426 hyp_assert_lock_held(&host_mmu.lock); \ 427 __ret = fn(__VA_ARGS__); \ 428 if (__ret == -ENOMEM) { \ 429 __ret = host_stage2_unmap_dev_all(); \ 430 if (!__ret) \ 431 __ret = fn(__VA_ARGS__); \ 432 } \ 433 __ret; \ 434 }) 435 436 static inline bool range_included(struct kvm_mem_range *child, 437 struct kvm_mem_range *parent) 438 { 439 return parent->start <= child->start && child->end <= parent->end; 440 } 441 442 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) 443 { 444 struct kvm_mem_range cur; 445 kvm_pte_t pte; 446 u32 level; 447 int ret; 448 449 hyp_assert_lock_held(&host_mmu.lock); 450 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); 451 if (ret) 452 return ret; 453 454 if (kvm_pte_valid(pte)) 455 return -EAGAIN; 456 457 if (pte) 458 return -EPERM; 459 460 do { 461 u64 granule = kvm_granule_size(level); 462 cur.start = ALIGN_DOWN(addr, granule); 463 cur.end = cur.start + granule; 464 level++; 465 } while ((level < KVM_PGTABLE_MAX_LEVELS) && 466 !(kvm_level_supports_block_mapping(level) && 467 range_included(&cur, range))); 468 469 *range = cur; 470 471 return 0; 472 } 473 474 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 475 enum kvm_pgtable_prot prot) 476 { 477 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 478 } 479 480 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 481 { 482 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 483 addr, size, &host_s2_pool, owner_id); 484 } 485 486 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) 487 { 488 /* 489 * Block mappings must be used with care in the host stage-2 as a 490 * kvm_pgtable_stage2_map() operation targeting a page in the range of 491 * an existing block will delete the block under the assumption that 492 * mappings in the rest of the block range can always be rebuilt lazily. 493 * That assumption is correct for the host stage-2 with RWX mappings 494 * targeting memory or RW mappings targeting MMIO ranges (see 495 * host_stage2_idmap() below which implements some of the host memory 496 * abort logic). However, this is not safe for any other mappings where 497 * the host stage-2 page-table is in fact the only place where this 498 * state is stored. In all those cases, it is safer to use page-level 499 * mappings, hence avoiding to lose the state because of side-effects in 500 * kvm_pgtable_stage2_map(). 501 */ 502 if (range_is_memory(addr, end)) 503 return prot != PKVM_HOST_MEM_PROT; 504 else 505 return prot != PKVM_HOST_MMIO_PROT; 506 } 507 508 static int host_stage2_idmap(u64 addr) 509 { 510 struct kvm_mem_range range; 511 bool is_memory = !!find_mem_range(addr, &range); 512 enum kvm_pgtable_prot prot; 513 int ret; 514 515 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; 516 517 host_lock_component(); 518 ret = host_stage2_adjust_range(addr, &range); 519 if (ret) 520 goto unlock; 521 522 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); 523 unlock: 524 host_unlock_component(); 525 526 return ret; 527 } 528 529 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) 530 { 531 struct kvm_vcpu_fault_info fault; 532 u64 esr, addr; 533 int ret = 0; 534 535 esr = read_sysreg_el2(SYS_ESR); 536 BUG_ON(!__get_fault_info(esr, &fault)); 537 538 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; 539 ret = host_stage2_idmap(addr); 540 BUG_ON(ret && ret != -EAGAIN); 541 } 542 543 struct pkvm_mem_transition { 544 u64 nr_pages; 545 546 struct { 547 enum pkvm_component_id id; 548 /* Address in the initiator's address space */ 549 u64 addr; 550 551 union { 552 struct { 553 /* Address in the completer's address space */ 554 u64 completer_addr; 555 } host; 556 struct { 557 u64 completer_addr; 558 } hyp; 559 }; 560 } initiator; 561 562 struct { 563 enum pkvm_component_id id; 564 } completer; 565 }; 566 567 struct pkvm_mem_share { 568 const struct pkvm_mem_transition tx; 569 const enum kvm_pgtable_prot completer_prot; 570 }; 571 572 struct pkvm_mem_donation { 573 const struct pkvm_mem_transition tx; 574 }; 575 576 struct check_walk_data { 577 enum pkvm_page_state desired; 578 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); 579 }; 580 581 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, 582 enum kvm_pgtable_walk_flags visit) 583 { 584 struct check_walk_data *d = ctx->arg; 585 586 if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) 587 return -EINVAL; 588 589 return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; 590 } 591 592 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, 593 struct check_walk_data *data) 594 { 595 struct kvm_pgtable_walker walker = { 596 .cb = __check_page_state_visitor, 597 .arg = data, 598 .flags = KVM_PGTABLE_WALK_LEAF, 599 }; 600 601 return kvm_pgtable_walk(pgt, addr, size, &walker); 602 } 603 604 static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) 605 { 606 if (!kvm_pte_valid(pte) && pte) 607 return PKVM_NOPAGE; 608 609 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 610 } 611 612 static int __host_check_page_state_range(u64 addr, u64 size, 613 enum pkvm_page_state state) 614 { 615 struct check_walk_data d = { 616 .desired = state, 617 .get_page_state = host_get_page_state, 618 }; 619 620 hyp_assert_lock_held(&host_mmu.lock); 621 return check_page_state_range(&host_mmu.pgt, addr, size, &d); 622 } 623 624 static int __host_set_page_state_range(u64 addr, u64 size, 625 enum pkvm_page_state state) 626 { 627 enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); 628 629 return host_stage2_idmap_locked(addr, size, prot); 630 } 631 632 static int host_request_owned_transition(u64 *completer_addr, 633 const struct pkvm_mem_transition *tx) 634 { 635 u64 size = tx->nr_pages * PAGE_SIZE; 636 u64 addr = tx->initiator.addr; 637 638 *completer_addr = tx->initiator.host.completer_addr; 639 return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 640 } 641 642 static int host_request_unshare(u64 *completer_addr, 643 const struct pkvm_mem_transition *tx) 644 { 645 u64 size = tx->nr_pages * PAGE_SIZE; 646 u64 addr = tx->initiator.addr; 647 648 *completer_addr = tx->initiator.host.completer_addr; 649 return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 650 } 651 652 static int host_initiate_share(u64 *completer_addr, 653 const struct pkvm_mem_transition *tx) 654 { 655 u64 size = tx->nr_pages * PAGE_SIZE; 656 u64 addr = tx->initiator.addr; 657 658 *completer_addr = tx->initiator.host.completer_addr; 659 return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 660 } 661 662 static int host_initiate_unshare(u64 *completer_addr, 663 const struct pkvm_mem_transition *tx) 664 { 665 u64 size = tx->nr_pages * PAGE_SIZE; 666 u64 addr = tx->initiator.addr; 667 668 *completer_addr = tx->initiator.host.completer_addr; 669 return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); 670 } 671 672 static int host_initiate_donation(u64 *completer_addr, 673 const struct pkvm_mem_transition *tx) 674 { 675 u8 owner_id = tx->completer.id; 676 u64 size = tx->nr_pages * PAGE_SIZE; 677 678 *completer_addr = tx->initiator.host.completer_addr; 679 return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); 680 } 681 682 static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 683 { 684 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 685 tx->initiator.id != PKVM_ID_HYP); 686 } 687 688 static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, 689 enum pkvm_page_state state) 690 { 691 u64 size = tx->nr_pages * PAGE_SIZE; 692 693 if (__host_ack_skip_pgtable_check(tx)) 694 return 0; 695 696 return __host_check_page_state_range(addr, size, state); 697 } 698 699 static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 700 { 701 return __host_ack_transition(addr, tx, PKVM_NOPAGE); 702 } 703 704 static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) 705 { 706 u64 size = tx->nr_pages * PAGE_SIZE; 707 u8 host_id = tx->completer.id; 708 709 return host_stage2_set_owner_locked(addr, size, host_id); 710 } 711 712 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) 713 { 714 if (!kvm_pte_valid(pte)) 715 return PKVM_NOPAGE; 716 717 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); 718 } 719 720 static int __hyp_check_page_state_range(u64 addr, u64 size, 721 enum pkvm_page_state state) 722 { 723 struct check_walk_data d = { 724 .desired = state, 725 .get_page_state = hyp_get_page_state, 726 }; 727 728 hyp_assert_lock_held(&pkvm_pgd_lock); 729 return check_page_state_range(&pkvm_pgtable, addr, size, &d); 730 } 731 732 static int hyp_request_donation(u64 *completer_addr, 733 const struct pkvm_mem_transition *tx) 734 { 735 u64 size = tx->nr_pages * PAGE_SIZE; 736 u64 addr = tx->initiator.addr; 737 738 *completer_addr = tx->initiator.hyp.completer_addr; 739 return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 740 } 741 742 static int hyp_initiate_donation(u64 *completer_addr, 743 const struct pkvm_mem_transition *tx) 744 { 745 u64 size = tx->nr_pages * PAGE_SIZE; 746 int ret; 747 748 *completer_addr = tx->initiator.hyp.completer_addr; 749 ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); 750 return (ret != size) ? -EFAULT : 0; 751 } 752 753 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 754 { 755 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 756 tx->initiator.id != PKVM_ID_HOST); 757 } 758 759 static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, 760 enum kvm_pgtable_prot perms) 761 { 762 u64 size = tx->nr_pages * PAGE_SIZE; 763 764 if (perms != PAGE_HYP) 765 return -EPERM; 766 767 if (__hyp_ack_skip_pgtable_check(tx)) 768 return 0; 769 770 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 771 } 772 773 static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) 774 { 775 u64 size = tx->nr_pages * PAGE_SIZE; 776 777 if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) 778 return -EBUSY; 779 780 if (__hyp_ack_skip_pgtable_check(tx)) 781 return 0; 782 783 return __hyp_check_page_state_range(addr, size, 784 PKVM_PAGE_SHARED_BORROWED); 785 } 786 787 static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 788 { 789 u64 size = tx->nr_pages * PAGE_SIZE; 790 791 if (__hyp_ack_skip_pgtable_check(tx)) 792 return 0; 793 794 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 795 } 796 797 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, 798 enum kvm_pgtable_prot perms) 799 { 800 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 801 enum kvm_pgtable_prot prot; 802 803 prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); 804 return pkvm_create_mappings_locked(start, end, prot); 805 } 806 807 static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) 808 { 809 u64 size = tx->nr_pages * PAGE_SIZE; 810 int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); 811 812 return (ret != size) ? -EFAULT : 0; 813 } 814 815 static int hyp_complete_donation(u64 addr, 816 const struct pkvm_mem_transition *tx) 817 { 818 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 819 enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 820 821 return pkvm_create_mappings_locked(start, end, prot); 822 } 823 824 static int check_share(struct pkvm_mem_share *share) 825 { 826 const struct pkvm_mem_transition *tx = &share->tx; 827 u64 completer_addr; 828 int ret; 829 830 switch (tx->initiator.id) { 831 case PKVM_ID_HOST: 832 ret = host_request_owned_transition(&completer_addr, tx); 833 break; 834 default: 835 ret = -EINVAL; 836 } 837 838 if (ret) 839 return ret; 840 841 switch (tx->completer.id) { 842 case PKVM_ID_HYP: 843 ret = hyp_ack_share(completer_addr, tx, share->completer_prot); 844 break; 845 default: 846 ret = -EINVAL; 847 } 848 849 return ret; 850 } 851 852 static int __do_share(struct pkvm_mem_share *share) 853 { 854 const struct pkvm_mem_transition *tx = &share->tx; 855 u64 completer_addr; 856 int ret; 857 858 switch (tx->initiator.id) { 859 case PKVM_ID_HOST: 860 ret = host_initiate_share(&completer_addr, tx); 861 break; 862 default: 863 ret = -EINVAL; 864 } 865 866 if (ret) 867 return ret; 868 869 switch (tx->completer.id) { 870 case PKVM_ID_HYP: 871 ret = hyp_complete_share(completer_addr, tx, share->completer_prot); 872 break; 873 default: 874 ret = -EINVAL; 875 } 876 877 return ret; 878 } 879 880 /* 881 * do_share(): 882 * 883 * The page owner grants access to another component with a given set 884 * of permissions. 885 * 886 * Initiator: OWNED => SHARED_OWNED 887 * Completer: NOPAGE => SHARED_BORROWED 888 */ 889 static int do_share(struct pkvm_mem_share *share) 890 { 891 int ret; 892 893 ret = check_share(share); 894 if (ret) 895 return ret; 896 897 return WARN_ON(__do_share(share)); 898 } 899 900 static int check_unshare(struct pkvm_mem_share *share) 901 { 902 const struct pkvm_mem_transition *tx = &share->tx; 903 u64 completer_addr; 904 int ret; 905 906 switch (tx->initiator.id) { 907 case PKVM_ID_HOST: 908 ret = host_request_unshare(&completer_addr, tx); 909 break; 910 default: 911 ret = -EINVAL; 912 } 913 914 if (ret) 915 return ret; 916 917 switch (tx->completer.id) { 918 case PKVM_ID_HYP: 919 ret = hyp_ack_unshare(completer_addr, tx); 920 break; 921 default: 922 ret = -EINVAL; 923 } 924 925 return ret; 926 } 927 928 static int __do_unshare(struct pkvm_mem_share *share) 929 { 930 const struct pkvm_mem_transition *tx = &share->tx; 931 u64 completer_addr; 932 int ret; 933 934 switch (tx->initiator.id) { 935 case PKVM_ID_HOST: 936 ret = host_initiate_unshare(&completer_addr, tx); 937 break; 938 default: 939 ret = -EINVAL; 940 } 941 942 if (ret) 943 return ret; 944 945 switch (tx->completer.id) { 946 case PKVM_ID_HYP: 947 ret = hyp_complete_unshare(completer_addr, tx); 948 break; 949 default: 950 ret = -EINVAL; 951 } 952 953 return ret; 954 } 955 956 /* 957 * do_unshare(): 958 * 959 * The page owner revokes access from another component for a range of 960 * pages which were previously shared using do_share(). 961 * 962 * Initiator: SHARED_OWNED => OWNED 963 * Completer: SHARED_BORROWED => NOPAGE 964 */ 965 static int do_unshare(struct pkvm_mem_share *share) 966 { 967 int ret; 968 969 ret = check_unshare(share); 970 if (ret) 971 return ret; 972 973 return WARN_ON(__do_unshare(share)); 974 } 975 976 static int check_donation(struct pkvm_mem_donation *donation) 977 { 978 const struct pkvm_mem_transition *tx = &donation->tx; 979 u64 completer_addr; 980 int ret; 981 982 switch (tx->initiator.id) { 983 case PKVM_ID_HOST: 984 ret = host_request_owned_transition(&completer_addr, tx); 985 break; 986 case PKVM_ID_HYP: 987 ret = hyp_request_donation(&completer_addr, tx); 988 break; 989 default: 990 ret = -EINVAL; 991 } 992 993 if (ret) 994 return ret; 995 996 switch (tx->completer.id) { 997 case PKVM_ID_HOST: 998 ret = host_ack_donation(completer_addr, tx); 999 break; 1000 case PKVM_ID_HYP: 1001 ret = hyp_ack_donation(completer_addr, tx); 1002 break; 1003 default: 1004 ret = -EINVAL; 1005 } 1006 1007 return ret; 1008 } 1009 1010 static int __do_donate(struct pkvm_mem_donation *donation) 1011 { 1012 const struct pkvm_mem_transition *tx = &donation->tx; 1013 u64 completer_addr; 1014 int ret; 1015 1016 switch (tx->initiator.id) { 1017 case PKVM_ID_HOST: 1018 ret = host_initiate_donation(&completer_addr, tx); 1019 break; 1020 case PKVM_ID_HYP: 1021 ret = hyp_initiate_donation(&completer_addr, tx); 1022 break; 1023 default: 1024 ret = -EINVAL; 1025 } 1026 1027 if (ret) 1028 return ret; 1029 1030 switch (tx->completer.id) { 1031 case PKVM_ID_HOST: 1032 ret = host_complete_donation(completer_addr, tx); 1033 break; 1034 case PKVM_ID_HYP: 1035 ret = hyp_complete_donation(completer_addr, tx); 1036 break; 1037 default: 1038 ret = -EINVAL; 1039 } 1040 1041 return ret; 1042 } 1043 1044 /* 1045 * do_donate(): 1046 * 1047 * The page owner transfers ownership to another component, losing access 1048 * as a consequence. 1049 * 1050 * Initiator: OWNED => NOPAGE 1051 * Completer: NOPAGE => OWNED 1052 */ 1053 static int do_donate(struct pkvm_mem_donation *donation) 1054 { 1055 int ret; 1056 1057 ret = check_donation(donation); 1058 if (ret) 1059 return ret; 1060 1061 return WARN_ON(__do_donate(donation)); 1062 } 1063 1064 int __pkvm_host_share_hyp(u64 pfn) 1065 { 1066 int ret; 1067 u64 host_addr = hyp_pfn_to_phys(pfn); 1068 u64 hyp_addr = (u64)__hyp_va(host_addr); 1069 struct pkvm_mem_share share = { 1070 .tx = { 1071 .nr_pages = 1, 1072 .initiator = { 1073 .id = PKVM_ID_HOST, 1074 .addr = host_addr, 1075 .host = { 1076 .completer_addr = hyp_addr, 1077 }, 1078 }, 1079 .completer = { 1080 .id = PKVM_ID_HYP, 1081 }, 1082 }, 1083 .completer_prot = PAGE_HYP, 1084 }; 1085 1086 host_lock_component(); 1087 hyp_lock_component(); 1088 1089 ret = do_share(&share); 1090 1091 hyp_unlock_component(); 1092 host_unlock_component(); 1093 1094 return ret; 1095 } 1096 1097 int __pkvm_host_unshare_hyp(u64 pfn) 1098 { 1099 int ret; 1100 u64 host_addr = hyp_pfn_to_phys(pfn); 1101 u64 hyp_addr = (u64)__hyp_va(host_addr); 1102 struct pkvm_mem_share share = { 1103 .tx = { 1104 .nr_pages = 1, 1105 .initiator = { 1106 .id = PKVM_ID_HOST, 1107 .addr = host_addr, 1108 .host = { 1109 .completer_addr = hyp_addr, 1110 }, 1111 }, 1112 .completer = { 1113 .id = PKVM_ID_HYP, 1114 }, 1115 }, 1116 .completer_prot = PAGE_HYP, 1117 }; 1118 1119 host_lock_component(); 1120 hyp_lock_component(); 1121 1122 ret = do_unshare(&share); 1123 1124 hyp_unlock_component(); 1125 host_unlock_component(); 1126 1127 return ret; 1128 } 1129 1130 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) 1131 { 1132 int ret; 1133 u64 host_addr = hyp_pfn_to_phys(pfn); 1134 u64 hyp_addr = (u64)__hyp_va(host_addr); 1135 struct pkvm_mem_donation donation = { 1136 .tx = { 1137 .nr_pages = nr_pages, 1138 .initiator = { 1139 .id = PKVM_ID_HOST, 1140 .addr = host_addr, 1141 .host = { 1142 .completer_addr = hyp_addr, 1143 }, 1144 }, 1145 .completer = { 1146 .id = PKVM_ID_HYP, 1147 }, 1148 }, 1149 }; 1150 1151 host_lock_component(); 1152 hyp_lock_component(); 1153 1154 ret = do_donate(&donation); 1155 1156 hyp_unlock_component(); 1157 host_unlock_component(); 1158 1159 return ret; 1160 } 1161 1162 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) 1163 { 1164 int ret; 1165 u64 host_addr = hyp_pfn_to_phys(pfn); 1166 u64 hyp_addr = (u64)__hyp_va(host_addr); 1167 struct pkvm_mem_donation donation = { 1168 .tx = { 1169 .nr_pages = nr_pages, 1170 .initiator = { 1171 .id = PKVM_ID_HYP, 1172 .addr = hyp_addr, 1173 .hyp = { 1174 .completer_addr = host_addr, 1175 }, 1176 }, 1177 .completer = { 1178 .id = PKVM_ID_HOST, 1179 }, 1180 }, 1181 }; 1182 1183 host_lock_component(); 1184 hyp_lock_component(); 1185 1186 ret = do_donate(&donation); 1187 1188 hyp_unlock_component(); 1189 host_unlock_component(); 1190 1191 return ret; 1192 } 1193 1194 int hyp_pin_shared_mem(void *from, void *to) 1195 { 1196 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 1197 u64 end = PAGE_ALIGN((u64)to); 1198 u64 size = end - start; 1199 int ret; 1200 1201 host_lock_component(); 1202 hyp_lock_component(); 1203 1204 ret = __host_check_page_state_range(__hyp_pa(start), size, 1205 PKVM_PAGE_SHARED_OWNED); 1206 if (ret) 1207 goto unlock; 1208 1209 ret = __hyp_check_page_state_range(start, size, 1210 PKVM_PAGE_SHARED_BORROWED); 1211 if (ret) 1212 goto unlock; 1213 1214 for (cur = start; cur < end; cur += PAGE_SIZE) 1215 hyp_page_ref_inc(hyp_virt_to_page(cur)); 1216 1217 unlock: 1218 hyp_unlock_component(); 1219 host_unlock_component(); 1220 1221 return ret; 1222 } 1223 1224 void hyp_unpin_shared_mem(void *from, void *to) 1225 { 1226 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 1227 u64 end = PAGE_ALIGN((u64)to); 1228 1229 host_lock_component(); 1230 hyp_lock_component(); 1231 1232 for (cur = start; cur < end; cur += PAGE_SIZE) 1233 hyp_page_ref_dec(hyp_virt_to_page(cur)); 1234 1235 hyp_unlock_component(); 1236 host_unlock_component(); 1237 } 1238