1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Secure pages management: Migration of pages between normal and secure 4 * memory of KVM guests. 5 * 6 * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com> 7 */ 8 9 /* 10 * A pseries guest can be run as secure guest on Ultravisor-enabled 11 * POWER platforms. On such platforms, this driver will be used to manage 12 * the movement of guest pages between the normal memory managed by 13 * hypervisor (HV) and secure memory managed by Ultravisor (UV). 14 * 15 * The page-in or page-out requests from UV will come to HV as hcalls and 16 * HV will call back into UV via ultracalls to satisfy these page requests. 17 * 18 * Private ZONE_DEVICE memory equal to the amount of secure memory 19 * available in the platform for running secure guests is hotplugged. 20 * Whenever a page belonging to the guest becomes secure, a page from this 21 * private device memory is used to represent and track that secure page 22 * on the HV side. Some pages (like virtio buffers, VPA pages etc) are 23 * shared between UV and HV. However such pages aren't represented by 24 * device private memory and mappings to shared memory exist in both 25 * UV and HV page tables. 26 */ 27 28 /* 29 * Notes on locking 30 * 31 * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent 32 * page-in and page-out requests for the same GPA. Concurrent accesses 33 * can either come via UV (guest vCPUs requesting for same page) 34 * or when HV and guest simultaneously access the same page. 35 * This mutex serializes the migration of page from HV(normal) to 36 * UV(secure) and vice versa. So the serialization points are around 37 * migrate_vma routines and page-in/out routines. 38 * 39 * Per-guest mutex comes with a cost though. Mainly it serializes the 40 * fault path as page-out can occur when HV faults on accessing secure 41 * guest pages. Currently UV issues page-in requests for all the guest 42 * PFNs one at a time during early boot (UV_ESM uvcall), so this is 43 * not a cause for concern. Also currently the number of page-outs caused 44 * by HV touching secure pages is very very low. If an when UV supports 45 * overcommitting, then we might see concurrent guest driven page-outs. 46 * 47 * Locking order 48 * 49 * 1. kvm->srcu - Protects KVM memslots 50 * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise 51 * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting 52 * as sync-points for page-in/out 53 */ 54 55 /* 56 * Notes on page size 57 * 58 * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN 59 * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks 60 * secure GPAs at 64K page size and maintains one device PFN for each 61 * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued 62 * for 64K page at a time. 63 * 64 * HV faulting on secure pages: When HV touches any secure page, it 65 * faults and issues a UV_PAGE_OUT request with 64K page size. Currently 66 * UV splits and remaps the 2MB page if necessary and copies out the 67 * required 64K page contents. 68 * 69 * Shared pages: Whenever guest shares a secure page, UV will split and 70 * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size. 71 * 72 * HV invalidating a page: When a regular page belonging to secure 73 * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K 74 * page size. Using 64K page size is correct here because any non-secure 75 * page will essentially be of 64K page size. Splitting by UV during sharing 76 * and page-out ensures this. 77 * 78 * Page fault handling: When HV handles page fault of a page belonging 79 * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request. 80 * Using 64K size is correct here too as UV would have split the 2MB page 81 * into 64k mappings and would have done page-outs earlier. 82 * 83 * In summary, the current secure pages handling code in HV assumes 84 * 64K page size and in fact fails any page-in/page-out requests of 85 * non-64K size upfront. If and when UV starts supporting multiple 86 * page-sizes, we need to break this assumption. 87 */ 88 89 #include <linux/pagemap.h> 90 #include <linux/migrate.h> 91 #include <linux/kvm_host.h> 92 #include <linux/ksm.h> 93 #include <linux/of.h> 94 #include <linux/memremap.h> 95 #include <asm/ultravisor.h> 96 #include <asm/mman.h> 97 #include <asm/kvm_ppc.h> 98 #include <asm/kvm_book3s_uvmem.h> 99 100 static struct dev_pagemap kvmppc_uvmem_pgmap; 101 static unsigned long *kvmppc_uvmem_bitmap; 102 static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); 103 104 /* 105 * States of a GFN 106 * --------------- 107 * The GFN can be in one of the following states. 108 * 109 * (a) Secure - The GFN is secure. The GFN is associated with 110 * a Secure VM, the contents of the GFN is not accessible 111 * to the Hypervisor. This GFN can be backed by a secure-PFN, 112 * or can be backed by a normal-PFN with contents encrypted. 113 * The former is true when the GFN is paged-in into the 114 * ultravisor. The latter is true when the GFN is paged-out 115 * of the ultravisor. 116 * 117 * (b) Shared - The GFN is shared. The GFN is associated with a 118 * a secure VM. The contents of the GFN is accessible to 119 * Hypervisor. This GFN is backed by a normal-PFN and its 120 * content is un-encrypted. 121 * 122 * (c) Normal - The GFN is a normal. The GFN is associated with 123 * a normal VM. The contents of the GFN is accessible to 124 * the Hypervisor. Its content is never encrypted. 125 * 126 * States of a VM. 127 * --------------- 128 * 129 * Normal VM: A VM whose contents are always accessible to 130 * the hypervisor. All its GFNs are normal-GFNs. 131 * 132 * Secure VM: A VM whose contents are not accessible to the 133 * hypervisor without the VM's consent. Its GFNs are 134 * either Shared-GFN or Secure-GFNs. 135 * 136 * Transient VM: A Normal VM that is transitioning to secure VM. 137 * The transition starts on successful return of 138 * H_SVM_INIT_START, and ends on successful return 139 * of H_SVM_INIT_DONE. This transient VM, can have GFNs 140 * in any of the three states; i.e Secure-GFN, Shared-GFN, 141 * and Normal-GFN. The VM never executes in this state 142 * in supervisor-mode. 143 * 144 * Memory slot State. 145 * ----------------------------- 146 * The state of a memory slot mirrors the state of the 147 * VM the memory slot is associated with. 148 * 149 * VM State transition. 150 * -------------------- 151 * 152 * A VM always starts in Normal Mode. 153 * 154 * H_SVM_INIT_START moves the VM into transient state. During this 155 * time the Ultravisor may request some of its GFNs to be shared or 156 * secured. So its GFNs can be in one of the three GFN states. 157 * 158 * H_SVM_INIT_DONE moves the VM entirely from transient state to 159 * secure-state. At this point any left-over normal-GFNs are 160 * transitioned to Secure-GFN. 161 * 162 * H_SVM_INIT_ABORT moves the transient VM back to normal VM. 163 * All its GFNs are moved to Normal-GFNs. 164 * 165 * UV_TERMINATE transitions the secure-VM back to normal-VM. All 166 * the secure-GFN and shared-GFNs are tranistioned to normal-GFN 167 * Note: The contents of the normal-GFN is undefined at this point. 168 * 169 * GFN state implementation: 170 * ------------------------- 171 * 172 * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, 173 * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag 174 * set, and contains the value of the secure-PFN. 175 * It is associated with a normal-PFN; also called mem_pfn, when 176 * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. 177 * The value of the normal-PFN is not tracked. 178 * 179 * Shared GFN is associated with a normal-PFN. Its pfn[] has 180 * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN 181 * is not tracked. 182 * 183 * Normal GFN is associated with normal-PFN. Its pfn[] has 184 * no flag set. The value of the normal-PFN is not tracked. 185 * 186 * Life cycle of a GFN 187 * -------------------- 188 * 189 * -------------------------------------------------------------- 190 * | | Share | Unshare | SVM |H_SVM_INIT_DONE| 191 * | |operation |operation | abort/ | | 192 * | | | | terminate | | 193 * ------------------------------------------------------------- 194 * | | | | | | 195 * | Secure | Shared | Secure |Normal |Secure | 196 * | | | | | | 197 * | Shared | Shared | Secure |Normal |Shared | 198 * | | | | | | 199 * | Normal | Shared | Secure |Normal |Secure | 200 * -------------------------------------------------------------- 201 * 202 * Life cycle of a VM 203 * -------------------- 204 * 205 * -------------------------------------------------------------------- 206 * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | 207 * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | 208 * | | | | | | | 209 * --------- ---------------------------------------------------------- 210 * | | | | | | | 211 * | Normal | Normal | Transient|Error |Error |Normal | 212 * | | | | | | | 213 * | Secure | Error | Error |Error |Error |Normal | 214 * | | | | | | | 215 * |Transient| N/A | Error |Secure |Normal |Normal | 216 * -------------------------------------------------------------------- 217 */ 218 219 #define KVMPPC_GFN_UVMEM_PFN (1UL << 63) 220 #define KVMPPC_GFN_MEM_PFN (1UL << 62) 221 #define KVMPPC_GFN_SHARED (1UL << 61) 222 #define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) 223 #define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) 224 #define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) 225 226 struct kvmppc_uvmem_slot { 227 struct list_head list; 228 unsigned long nr_pfns; 229 unsigned long base_pfn; 230 unsigned long *pfns; 231 }; 232 struct kvmppc_uvmem_page_pvt { 233 struct kvm *kvm; 234 unsigned long gpa; 235 bool skip_page_out; 236 bool remove_gfn; 237 }; 238 239 bool kvmppc_uvmem_available(void) 240 { 241 /* 242 * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor 243 * and our data structures have been initialized successfully. 244 */ 245 return !!kvmppc_uvmem_bitmap; 246 } 247 248 int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 249 { 250 struct kvmppc_uvmem_slot *p; 251 252 p = kzalloc(sizeof(*p), GFP_KERNEL); 253 if (!p) 254 return -ENOMEM; 255 p->pfns = vcalloc(slot->npages, sizeof(*p->pfns)); 256 if (!p->pfns) { 257 kfree(p); 258 return -ENOMEM; 259 } 260 p->nr_pfns = slot->npages; 261 p->base_pfn = slot->base_gfn; 262 263 mutex_lock(&kvm->arch.uvmem_lock); 264 list_add(&p->list, &kvm->arch.uvmem_pfns); 265 mutex_unlock(&kvm->arch.uvmem_lock); 266 267 return 0; 268 } 269 270 /* 271 * All device PFNs are already released by the time we come here. 272 */ 273 void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) 274 { 275 struct kvmppc_uvmem_slot *p, *next; 276 277 mutex_lock(&kvm->arch.uvmem_lock); 278 list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) { 279 if (p->base_pfn == slot->base_gfn) { 280 vfree(p->pfns); 281 list_del(&p->list); 282 kfree(p); 283 break; 284 } 285 } 286 mutex_unlock(&kvm->arch.uvmem_lock); 287 } 288 289 static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, 290 unsigned long flag, unsigned long uvmem_pfn) 291 { 292 struct kvmppc_uvmem_slot *p; 293 294 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 295 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 296 unsigned long index = gfn - p->base_pfn; 297 298 if (flag == KVMPPC_GFN_UVMEM_PFN) 299 p->pfns[index] = uvmem_pfn | flag; 300 else 301 p->pfns[index] = flag; 302 return; 303 } 304 } 305 } 306 307 /* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ 308 static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, 309 unsigned long uvmem_pfn, struct kvm *kvm) 310 { 311 kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); 312 } 313 314 /* mark the GFN as secure-GFN associated with a memory-PFN. */ 315 static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) 316 { 317 kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); 318 } 319 320 /* mark the GFN as a shared GFN. */ 321 static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) 322 { 323 kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); 324 } 325 326 /* mark the GFN as a non-existent GFN. */ 327 static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) 328 { 329 kvmppc_mark_gfn(gfn, kvm, 0, 0); 330 } 331 332 /* return true, if the GFN is a secure-GFN backed by a secure-PFN */ 333 static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, 334 unsigned long *uvmem_pfn) 335 { 336 struct kvmppc_uvmem_slot *p; 337 338 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 339 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 340 unsigned long index = gfn - p->base_pfn; 341 342 if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { 343 if (uvmem_pfn) 344 *uvmem_pfn = p->pfns[index] & 345 KVMPPC_GFN_PFN_MASK; 346 return true; 347 } else 348 return false; 349 } 350 } 351 return false; 352 } 353 354 /* 355 * starting from *gfn search for the next available GFN that is not yet 356 * transitioned to a secure GFN. return the value of that GFN in *gfn. If a 357 * GFN is found, return true, else return false 358 * 359 * Must be called with kvm->arch.uvmem_lock held. 360 */ 361 static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, 362 struct kvm *kvm, unsigned long *gfn) 363 { 364 struct kvmppc_uvmem_slot *p = NULL, *iter; 365 bool ret = false; 366 unsigned long i; 367 368 list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) 369 if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { 370 p = iter; 371 break; 372 } 373 if (!p) 374 return ret; 375 /* 376 * The code below assumes, one to one correspondence between 377 * kvmppc_uvmem_slot and memslot. 378 */ 379 for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { 380 unsigned long index = i - p->base_pfn; 381 382 if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { 383 *gfn = i; 384 ret = true; 385 break; 386 } 387 } 388 return ret; 389 } 390 391 static int kvmppc_memslot_page_merge(struct kvm *kvm, 392 const struct kvm_memory_slot *memslot, bool merge) 393 { 394 unsigned long gfn = memslot->base_gfn; 395 unsigned long end, start = gfn_to_hva(kvm, gfn); 396 int ret = 0; 397 struct vm_area_struct *vma; 398 int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; 399 400 if (kvm_is_error_hva(start)) 401 return H_STATE; 402 403 end = start + (memslot->npages << PAGE_SHIFT); 404 405 mmap_write_lock(kvm->mm); 406 do { 407 vma = find_vma_intersection(kvm->mm, start, end); 408 if (!vma) { 409 ret = H_STATE; 410 break; 411 } 412 ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 413 merge_flag, &vma->vm_flags); 414 if (ret) { 415 ret = H_STATE; 416 break; 417 } 418 start = vma->vm_end; 419 } while (end > vma->vm_end); 420 421 mmap_write_unlock(kvm->mm); 422 return ret; 423 } 424 425 static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, 426 const struct kvm_memory_slot *memslot) 427 { 428 uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); 429 kvmppc_uvmem_slot_free(kvm, memslot); 430 kvmppc_memslot_page_merge(kvm, memslot, true); 431 } 432 433 static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, 434 const struct kvm_memory_slot *memslot) 435 { 436 int ret = H_PARAMETER; 437 438 if (kvmppc_memslot_page_merge(kvm, memslot, false)) 439 return ret; 440 441 if (kvmppc_uvmem_slot_init(kvm, memslot)) 442 goto out1; 443 444 ret = uv_register_mem_slot(kvm->arch.lpid, 445 memslot->base_gfn << PAGE_SHIFT, 446 memslot->npages * PAGE_SIZE, 447 0, memslot->id); 448 if (ret < 0) { 449 ret = H_PARAMETER; 450 goto out; 451 } 452 return 0; 453 out: 454 kvmppc_uvmem_slot_free(kvm, memslot); 455 out1: 456 kvmppc_memslot_page_merge(kvm, memslot, true); 457 return ret; 458 } 459 460 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 461 { 462 struct kvm_memslots *slots; 463 struct kvm_memory_slot *memslot, *m; 464 int ret = H_SUCCESS; 465 int srcu_idx, bkt; 466 467 kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START; 468 469 if (!kvmppc_uvmem_bitmap) 470 return H_UNSUPPORTED; 471 472 /* Only radix guests can be secure guests */ 473 if (!kvm_is_radix(kvm)) 474 return H_UNSUPPORTED; 475 476 /* NAK the transition to secure if not enabled */ 477 if (!kvm->arch.svm_enabled) 478 return H_AUTHORITY; 479 480 srcu_idx = srcu_read_lock(&kvm->srcu); 481 482 /* register the memslot */ 483 slots = kvm_memslots(kvm); 484 kvm_for_each_memslot(memslot, bkt, slots) { 485 ret = __kvmppc_uvmem_memslot_create(kvm, memslot); 486 if (ret) 487 break; 488 } 489 490 if (ret) { 491 slots = kvm_memslots(kvm); 492 kvm_for_each_memslot(m, bkt, slots) { 493 if (m == memslot) 494 break; 495 __kvmppc_uvmem_memslot_delete(kvm, memslot); 496 } 497 } 498 499 srcu_read_unlock(&kvm->srcu, srcu_idx); 500 return ret; 501 } 502 503 /* 504 * Provision a new page on HV side and copy over the contents 505 * from secure memory using UV_PAGE_OUT uvcall. 506 * Caller must held kvm->arch.uvmem_lock. 507 */ 508 static int __kvmppc_svm_page_out(struct vm_area_struct *vma, 509 unsigned long start, 510 unsigned long end, unsigned long page_shift, 511 struct kvm *kvm, unsigned long gpa, struct page *fault_page) 512 { 513 unsigned long src_pfn, dst_pfn = 0; 514 struct migrate_vma mig = { 0 }; 515 struct page *dpage, *spage; 516 struct kvmppc_uvmem_page_pvt *pvt; 517 unsigned long pfn; 518 int ret = U_SUCCESS; 519 520 memset(&mig, 0, sizeof(mig)); 521 mig.vma = vma; 522 mig.start = start; 523 mig.end = end; 524 mig.src = &src_pfn; 525 mig.dst = &dst_pfn; 526 mig.pgmap_owner = &kvmppc_uvmem_pgmap; 527 mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 528 mig.fault_page = fault_page; 529 530 /* The requested page is already paged-out, nothing to do */ 531 if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 532 return ret; 533 534 ret = migrate_vma_setup(&mig); 535 if (ret) 536 return -1; 537 538 spage = migrate_pfn_to_page(*mig.src); 539 if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 540 goto out_finalize; 541 542 if (!is_zone_device_page(spage)) 543 goto out_finalize; 544 545 dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 546 if (!dpage) { 547 ret = -1; 548 goto out_finalize; 549 } 550 551 lock_page(dpage); 552 pvt = spage->zone_device_data; 553 pfn = page_to_pfn(dpage); 554 555 /* 556 * This function is used in two cases: 557 * - When HV touches a secure page, for which we do UV_PAGE_OUT 558 * - When a secure page is converted to shared page, we *get* 559 * the page to essentially unmap the device page. In this 560 * case we skip page-out. 561 */ 562 if (!pvt->skip_page_out) 563 ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 564 gpa, 0, page_shift); 565 566 if (ret == U_SUCCESS) 567 *mig.dst = migrate_pfn(pfn); 568 else { 569 unlock_page(dpage); 570 __free_page(dpage); 571 goto out_finalize; 572 } 573 574 migrate_vma_pages(&mig); 575 576 out_finalize: 577 migrate_vma_finalize(&mig); 578 return ret; 579 } 580 581 static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, 582 unsigned long start, unsigned long end, 583 unsigned long page_shift, 584 struct kvm *kvm, unsigned long gpa, 585 struct page *fault_page) 586 { 587 int ret; 588 589 mutex_lock(&kvm->arch.uvmem_lock); 590 ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, 591 fault_page); 592 mutex_unlock(&kvm->arch.uvmem_lock); 593 594 return ret; 595 } 596 597 /* 598 * Drop device pages that we maintain for the secure guest 599 * 600 * We first mark the pages to be skipped from UV_PAGE_OUT when there 601 * is HV side fault on these pages. Next we *get* these pages, forcing 602 * fault on them, do fault time migration to replace the device PTEs in 603 * QEMU page table with normal PTEs from newly allocated pages. 604 */ 605 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, 606 struct kvm *kvm, bool skip_page_out) 607 { 608 int i; 609 struct kvmppc_uvmem_page_pvt *pvt; 610 struct page *uvmem_page; 611 struct vm_area_struct *vma = NULL; 612 unsigned long uvmem_pfn, gfn; 613 unsigned long addr; 614 615 mmap_read_lock(kvm->mm); 616 617 addr = slot->userspace_addr; 618 619 gfn = slot->base_gfn; 620 for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { 621 622 /* Fetch the VMA if addr is not in the latest fetched one */ 623 if (!vma || addr >= vma->vm_end) { 624 vma = vma_lookup(kvm->mm, addr); 625 if (!vma) { 626 pr_err("Can't find VMA for gfn:0x%lx\n", gfn); 627 break; 628 } 629 } 630 631 mutex_lock(&kvm->arch.uvmem_lock); 632 633 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 634 uvmem_page = pfn_to_page(uvmem_pfn); 635 pvt = uvmem_page->zone_device_data; 636 pvt->skip_page_out = skip_page_out; 637 pvt->remove_gfn = true; 638 639 if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, 640 PAGE_SHIFT, kvm, pvt->gpa, NULL)) 641 pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", 642 pvt->gpa, addr); 643 } else { 644 /* Remove the shared flag if any */ 645 kvmppc_gfn_remove(gfn, kvm); 646 } 647 648 mutex_unlock(&kvm->arch.uvmem_lock); 649 } 650 651 mmap_read_unlock(kvm->mm); 652 } 653 654 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) 655 { 656 int srcu_idx, bkt; 657 struct kvm_memory_slot *memslot; 658 659 /* 660 * Expect to be called only after INIT_START and before INIT_DONE. 661 * If INIT_DONE was completed, use normal VM termination sequence. 662 */ 663 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 664 return H_UNSUPPORTED; 665 666 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 667 return H_STATE; 668 669 srcu_idx = srcu_read_lock(&kvm->srcu); 670 671 kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm)) 672 kvmppc_uvmem_drop_pages(memslot, kvm, false); 673 674 srcu_read_unlock(&kvm->srcu, srcu_idx); 675 676 kvm->arch.secure_guest = 0; 677 uv_svm_terminate(kvm->arch.lpid); 678 679 return H_PARAMETER; 680 } 681 682 /* 683 * Get a free device PFN from the pool 684 * 685 * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device 686 * PFN will be used to keep track of the secure page on HV side. 687 * 688 * Called with kvm->arch.uvmem_lock held 689 */ 690 static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) 691 { 692 struct page *dpage = NULL; 693 unsigned long bit, uvmem_pfn; 694 struct kvmppc_uvmem_page_pvt *pvt; 695 unsigned long pfn_last, pfn_first; 696 697 pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT; 698 pfn_last = pfn_first + 699 (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT); 700 701 spin_lock(&kvmppc_uvmem_bitmap_lock); 702 bit = find_first_zero_bit(kvmppc_uvmem_bitmap, 703 pfn_last - pfn_first); 704 if (bit >= (pfn_last - pfn_first)) 705 goto out; 706 bitmap_set(kvmppc_uvmem_bitmap, bit, 1); 707 spin_unlock(&kvmppc_uvmem_bitmap_lock); 708 709 pvt = kzalloc(sizeof(*pvt), GFP_KERNEL); 710 if (!pvt) 711 goto out_clear; 712 713 uvmem_pfn = bit + pfn_first; 714 kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 715 716 pvt->gpa = gpa; 717 pvt->kvm = kvm; 718 719 dpage = pfn_to_page(uvmem_pfn); 720 dpage->zone_device_data = pvt; 721 zone_device_page_init(dpage); 722 return dpage; 723 out_clear: 724 spin_lock(&kvmppc_uvmem_bitmap_lock); 725 bitmap_clear(kvmppc_uvmem_bitmap, bit, 1); 726 out: 727 spin_unlock(&kvmppc_uvmem_bitmap_lock); 728 return NULL; 729 } 730 731 /* 732 * Alloc a PFN from private device memory pool. If @pagein is true, 733 * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. 734 */ 735 static int kvmppc_svm_page_in(struct vm_area_struct *vma, 736 unsigned long start, 737 unsigned long end, unsigned long gpa, struct kvm *kvm, 738 unsigned long page_shift, 739 bool pagein) 740 { 741 unsigned long src_pfn, dst_pfn = 0; 742 struct migrate_vma mig = { 0 }; 743 struct page *spage; 744 unsigned long pfn; 745 struct page *dpage; 746 int ret = 0; 747 748 memset(&mig, 0, sizeof(mig)); 749 mig.vma = vma; 750 mig.start = start; 751 mig.end = end; 752 mig.src = &src_pfn; 753 mig.dst = &dst_pfn; 754 mig.flags = MIGRATE_VMA_SELECT_SYSTEM; 755 756 ret = migrate_vma_setup(&mig); 757 if (ret) 758 return ret; 759 760 if (!(*mig.src & MIGRATE_PFN_MIGRATE)) { 761 ret = -1; 762 goto out_finalize; 763 } 764 765 dpage = kvmppc_uvmem_get_page(gpa, kvm); 766 if (!dpage) { 767 ret = -1; 768 goto out_finalize; 769 } 770 771 if (pagein) { 772 pfn = *mig.src >> MIGRATE_PFN_SHIFT; 773 spage = migrate_pfn_to_page(*mig.src); 774 if (spage) { 775 ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, 776 gpa, 0, page_shift); 777 if (ret) 778 goto out_finalize; 779 } 780 } 781 782 *mig.dst = migrate_pfn(page_to_pfn(dpage)); 783 migrate_vma_pages(&mig); 784 out_finalize: 785 migrate_vma_finalize(&mig); 786 return ret; 787 } 788 789 static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, 790 const struct kvm_memory_slot *memslot) 791 { 792 unsigned long gfn = memslot->base_gfn; 793 struct vm_area_struct *vma; 794 unsigned long start, end; 795 int ret = 0; 796 797 mmap_read_lock(kvm->mm); 798 mutex_lock(&kvm->arch.uvmem_lock); 799 while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { 800 ret = H_STATE; 801 start = gfn_to_hva(kvm, gfn); 802 if (kvm_is_error_hva(start)) 803 break; 804 805 end = start + (1UL << PAGE_SHIFT); 806 vma = find_vma_intersection(kvm->mm, start, end); 807 if (!vma || vma->vm_start > start || vma->vm_end < end) 808 break; 809 810 ret = kvmppc_svm_page_in(vma, start, end, 811 (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); 812 if (ret) { 813 ret = H_STATE; 814 break; 815 } 816 817 /* relinquish the cpu if needed */ 818 cond_resched(); 819 } 820 mutex_unlock(&kvm->arch.uvmem_lock); 821 mmap_read_unlock(kvm->mm); 822 return ret; 823 } 824 825 unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 826 { 827 struct kvm_memslots *slots; 828 struct kvm_memory_slot *memslot; 829 int srcu_idx, bkt; 830 long ret = H_SUCCESS; 831 832 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 833 return H_UNSUPPORTED; 834 835 /* migrate any unmoved normal pfn to device pfns*/ 836 srcu_idx = srcu_read_lock(&kvm->srcu); 837 slots = kvm_memslots(kvm); 838 kvm_for_each_memslot(memslot, bkt, slots) { 839 ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); 840 if (ret) { 841 /* 842 * The pages will remain transitioned. 843 * Its the callers responsibility to 844 * terminate the VM, which will undo 845 * all state of the VM. Till then 846 * this VM is in a erroneous state. 847 * Its KVMPPC_SECURE_INIT_DONE will 848 * remain unset. 849 */ 850 ret = H_STATE; 851 goto out; 852 } 853 } 854 855 kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 856 pr_info("LPID %d went secure\n", kvm->arch.lpid); 857 858 out: 859 srcu_read_unlock(&kvm->srcu, srcu_idx); 860 return ret; 861 } 862 863 /* 864 * Shares the page with HV, thus making it a normal page. 865 * 866 * - If the page is already secure, then provision a new page and share 867 * - If the page is a normal page, share the existing page 868 * 869 * In the former case, uses dev_pagemap_ops.migrate_to_ram handler 870 * to unmap the device page from QEMU's page tables. 871 */ 872 static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, 873 unsigned long page_shift) 874 { 875 876 int ret = H_PARAMETER; 877 struct page *uvmem_page; 878 struct kvmppc_uvmem_page_pvt *pvt; 879 unsigned long pfn; 880 unsigned long gfn = gpa >> page_shift; 881 int srcu_idx; 882 unsigned long uvmem_pfn; 883 884 srcu_idx = srcu_read_lock(&kvm->srcu); 885 mutex_lock(&kvm->arch.uvmem_lock); 886 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 887 uvmem_page = pfn_to_page(uvmem_pfn); 888 pvt = uvmem_page->zone_device_data; 889 pvt->skip_page_out = true; 890 /* 891 * do not drop the GFN. It is a valid GFN 892 * that is transitioned to a shared GFN. 893 */ 894 pvt->remove_gfn = false; 895 } 896 897 retry: 898 mutex_unlock(&kvm->arch.uvmem_lock); 899 pfn = gfn_to_pfn(kvm, gfn); 900 if (is_error_noslot_pfn(pfn)) 901 goto out; 902 903 mutex_lock(&kvm->arch.uvmem_lock); 904 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 905 uvmem_page = pfn_to_page(uvmem_pfn); 906 pvt = uvmem_page->zone_device_data; 907 pvt->skip_page_out = true; 908 pvt->remove_gfn = false; /* it continues to be a valid GFN */ 909 kvm_release_pfn_clean(pfn); 910 goto retry; 911 } 912 913 if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 914 page_shift)) { 915 kvmppc_gfn_shared(gfn, kvm); 916 ret = H_SUCCESS; 917 } 918 kvm_release_pfn_clean(pfn); 919 mutex_unlock(&kvm->arch.uvmem_lock); 920 out: 921 srcu_read_unlock(&kvm->srcu, srcu_idx); 922 return ret; 923 } 924 925 /* 926 * H_SVM_PAGE_IN: Move page from normal memory to secure memory. 927 * 928 * H_PAGE_IN_SHARED flag makes the page shared which means that the same 929 * memory in is visible from both UV and HV. 930 */ 931 unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 932 unsigned long flags, 933 unsigned long page_shift) 934 { 935 unsigned long start, end; 936 struct vm_area_struct *vma; 937 int srcu_idx; 938 unsigned long gfn = gpa >> page_shift; 939 int ret; 940 941 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 942 return H_UNSUPPORTED; 943 944 if (page_shift != PAGE_SHIFT) 945 return H_P3; 946 947 if (flags & ~H_PAGE_IN_SHARED) 948 return H_P2; 949 950 if (flags & H_PAGE_IN_SHARED) 951 return kvmppc_share_page(kvm, gpa, page_shift); 952 953 ret = H_PARAMETER; 954 srcu_idx = srcu_read_lock(&kvm->srcu); 955 mmap_read_lock(kvm->mm); 956 957 start = gfn_to_hva(kvm, gfn); 958 if (kvm_is_error_hva(start)) 959 goto out; 960 961 mutex_lock(&kvm->arch.uvmem_lock); 962 /* Fail the page-in request of an already paged-in page */ 963 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 964 goto out_unlock; 965 966 end = start + (1UL << page_shift); 967 vma = find_vma_intersection(kvm->mm, start, end); 968 if (!vma || vma->vm_start > start || vma->vm_end < end) 969 goto out_unlock; 970 971 if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 972 true)) 973 goto out_unlock; 974 975 ret = H_SUCCESS; 976 977 out_unlock: 978 mutex_unlock(&kvm->arch.uvmem_lock); 979 out: 980 mmap_read_unlock(kvm->mm); 981 srcu_read_unlock(&kvm->srcu, srcu_idx); 982 return ret; 983 } 984 985 986 /* 987 * Fault handler callback that gets called when HV touches any page that 988 * has been moved to secure memory, we ask UV to give back the page by 989 * issuing UV_PAGE_OUT uvcall. 990 * 991 * This eventually results in dropping of device PFN and the newly 992 * provisioned page/PFN gets populated in QEMU page tables. 993 */ 994 static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) 995 { 996 struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data; 997 998 if (kvmppc_svm_page_out(vmf->vma, vmf->address, 999 vmf->address + PAGE_SIZE, PAGE_SHIFT, 1000 pvt->kvm, pvt->gpa, vmf->page)) 1001 return VM_FAULT_SIGBUS; 1002 else 1003 return 0; 1004 } 1005 1006 /* 1007 * Release the device PFN back to the pool 1008 * 1009 * Gets called when secure GFN tranistions from a secure-PFN 1010 * to a normal PFN during H_SVM_PAGE_OUT. 1011 * Gets called with kvm->arch.uvmem_lock held. 1012 */ 1013 static void kvmppc_uvmem_page_free(struct page *page) 1014 { 1015 unsigned long pfn = page_to_pfn(page) - 1016 (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT); 1017 struct kvmppc_uvmem_page_pvt *pvt; 1018 1019 spin_lock(&kvmppc_uvmem_bitmap_lock); 1020 bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1); 1021 spin_unlock(&kvmppc_uvmem_bitmap_lock); 1022 1023 pvt = page->zone_device_data; 1024 page->zone_device_data = NULL; 1025 if (pvt->remove_gfn) 1026 kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 1027 else 1028 kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 1029 kfree(pvt); 1030 } 1031 1032 static const struct dev_pagemap_ops kvmppc_uvmem_ops = { 1033 .page_free = kvmppc_uvmem_page_free, 1034 .migrate_to_ram = kvmppc_uvmem_migrate_to_ram, 1035 }; 1036 1037 /* 1038 * H_SVM_PAGE_OUT: Move page from secure memory to normal memory. 1039 */ 1040 unsigned long 1041 kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, 1042 unsigned long flags, unsigned long page_shift) 1043 { 1044 unsigned long gfn = gpa >> page_shift; 1045 unsigned long start, end; 1046 struct vm_area_struct *vma; 1047 int srcu_idx; 1048 int ret; 1049 1050 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 1051 return H_UNSUPPORTED; 1052 1053 if (page_shift != PAGE_SHIFT) 1054 return H_P3; 1055 1056 if (flags) 1057 return H_P2; 1058 1059 ret = H_PARAMETER; 1060 srcu_idx = srcu_read_lock(&kvm->srcu); 1061 mmap_read_lock(kvm->mm); 1062 start = gfn_to_hva(kvm, gfn); 1063 if (kvm_is_error_hva(start)) 1064 goto out; 1065 1066 end = start + (1UL << page_shift); 1067 vma = find_vma_intersection(kvm->mm, start, end); 1068 if (!vma || vma->vm_start > start || vma->vm_end < end) 1069 goto out; 1070 1071 if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL)) 1072 ret = H_SUCCESS; 1073 out: 1074 mmap_read_unlock(kvm->mm); 1075 srcu_read_unlock(&kvm->srcu, srcu_idx); 1076 return ret; 1077 } 1078 1079 int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) 1080 { 1081 unsigned long pfn; 1082 int ret = U_SUCCESS; 1083 1084 pfn = gfn_to_pfn(kvm, gfn); 1085 if (is_error_noslot_pfn(pfn)) 1086 return -EFAULT; 1087 1088 mutex_lock(&kvm->arch.uvmem_lock); 1089 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 1090 goto out; 1091 1092 ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT, 1093 0, PAGE_SHIFT); 1094 out: 1095 kvm_release_pfn_clean(pfn); 1096 mutex_unlock(&kvm->arch.uvmem_lock); 1097 return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; 1098 } 1099 1100 int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) 1101 { 1102 int ret = __kvmppc_uvmem_memslot_create(kvm, new); 1103 1104 if (!ret) 1105 ret = kvmppc_uv_migrate_mem_slot(kvm, new); 1106 1107 return ret; 1108 } 1109 1110 void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) 1111 { 1112 __kvmppc_uvmem_memslot_delete(kvm, old); 1113 } 1114 1115 static u64 kvmppc_get_secmem_size(void) 1116 { 1117 struct device_node *np; 1118 int i, len; 1119 const __be32 *prop; 1120 u64 size = 0; 1121 1122 /* 1123 * First try the new ibm,secure-memory nodes which supersede the 1124 * secure-memory-ranges property. 1125 * If we found some, no need to read the deprecated ones. 1126 */ 1127 for_each_compatible_node(np, NULL, "ibm,secure-memory") { 1128 prop = of_get_property(np, "reg", &len); 1129 if (!prop) 1130 continue; 1131 size += of_read_number(prop + 2, 2); 1132 } 1133 if (size) 1134 return size; 1135 1136 np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); 1137 if (!np) 1138 goto out; 1139 1140 prop = of_get_property(np, "secure-memory-ranges", &len); 1141 if (!prop) 1142 goto out_put; 1143 1144 for (i = 0; i < len / (sizeof(*prop) * 4); i++) 1145 size += of_read_number(prop + (i * 4) + 2, 2); 1146 1147 out_put: 1148 of_node_put(np); 1149 out: 1150 return size; 1151 } 1152 1153 int kvmppc_uvmem_init(void) 1154 { 1155 int ret = 0; 1156 unsigned long size; 1157 struct resource *res; 1158 void *addr; 1159 unsigned long pfn_last, pfn_first; 1160 1161 size = kvmppc_get_secmem_size(); 1162 if (!size) { 1163 /* 1164 * Don't fail the initialization of kvm-hv module if 1165 * the platform doesn't export ibm,uv-firmware node. 1166 * Let normal guests run on such PEF-disabled platform. 1167 */ 1168 pr_info("KVMPPC-UVMEM: No support for secure guests\n"); 1169 goto out; 1170 } 1171 1172 res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem"); 1173 if (IS_ERR(res)) { 1174 ret = PTR_ERR(res); 1175 goto out; 1176 } 1177 1178 kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; 1179 kvmppc_uvmem_pgmap.range.start = res->start; 1180 kvmppc_uvmem_pgmap.range.end = res->end; 1181 kvmppc_uvmem_pgmap.nr_range = 1; 1182 kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; 1183 /* just one global instance: */ 1184 kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; 1185 addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); 1186 if (IS_ERR(addr)) { 1187 ret = PTR_ERR(addr); 1188 goto out_free_region; 1189 } 1190 1191 pfn_first = res->start >> PAGE_SHIFT; 1192 pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); 1193 kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), 1194 sizeof(unsigned long), GFP_KERNEL); 1195 if (!kvmppc_uvmem_bitmap) { 1196 ret = -ENOMEM; 1197 goto out_unmap; 1198 } 1199 1200 pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size); 1201 return ret; 1202 out_unmap: 1203 memunmap_pages(&kvmppc_uvmem_pgmap); 1204 out_free_region: 1205 release_mem_region(res->start, size); 1206 out: 1207 return ret; 1208 } 1209 1210 void kvmppc_uvmem_free(void) 1211 { 1212 if (!kvmppc_uvmem_bitmap) 1213 return; 1214 1215 memunmap_pages(&kvmppc_uvmem_pgmap); 1216 release_mem_region(kvmppc_uvmem_pgmap.range.start, 1217 range_len(&kvmppc_uvmem_pgmap.range)); 1218 kfree(kvmppc_uvmem_bitmap); 1219 } 1220