1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Secure pages management: Migration of pages between normal and secure 4 * memory of KVM guests. 5 * 6 * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com> 7 */ 8 9 /* 10 * A pseries guest can be run as secure guest on Ultravisor-enabled 11 * POWER platforms. On such platforms, this driver will be used to manage 12 * the movement of guest pages between the normal memory managed by 13 * hypervisor (HV) and secure memory managed by Ultravisor (UV). 14 * 15 * The page-in or page-out requests from UV will come to HV as hcalls and 16 * HV will call back into UV via ultracalls to satisfy these page requests. 17 * 18 * Private ZONE_DEVICE memory equal to the amount of secure memory 19 * available in the platform for running secure guests is hotplugged. 20 * Whenever a page belonging to the guest becomes secure, a page from this 21 * private device memory is used to represent and track that secure page 22 * on the HV side. Some pages (like virtio buffers, VPA pages etc) are 23 * shared between UV and HV. However such pages aren't represented by 24 * device private memory and mappings to shared memory exist in both 25 * UV and HV page tables. 26 */ 27 28 /* 29 * Notes on locking 30 * 31 * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent 32 * page-in and page-out requests for the same GPA. Concurrent accesses 33 * can either come via UV (guest vCPUs requesting for same page) 34 * or when HV and guest simultaneously access the same page. 35 * This mutex serializes the migration of page from HV(normal) to 36 * UV(secure) and vice versa. So the serialization points are around 37 * migrate_vma routines and page-in/out routines. 38 * 39 * Per-guest mutex comes with a cost though. Mainly it serializes the 40 * fault path as page-out can occur when HV faults on accessing secure 41 * guest pages. Currently UV issues page-in requests for all the guest 42 * PFNs one at a time during early boot (UV_ESM uvcall), so this is 43 * not a cause for concern. Also currently the number of page-outs caused 44 * by HV touching secure pages is very very low. If an when UV supports 45 * overcommitting, then we might see concurrent guest driven page-outs. 46 * 47 * Locking order 48 * 49 * 1. kvm->srcu - Protects KVM memslots 50 * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise 51 * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting 52 * as sync-points for page-in/out 53 */ 54 55 /* 56 * Notes on page size 57 * 58 * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN 59 * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks 60 * secure GPAs at 64K page size and maintains one device PFN for each 61 * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued 62 * for 64K page at a time. 63 * 64 * HV faulting on secure pages: When HV touches any secure page, it 65 * faults and issues a UV_PAGE_OUT request with 64K page size. Currently 66 * UV splits and remaps the 2MB page if necessary and copies out the 67 * required 64K page contents. 68 * 69 * Shared pages: Whenever guest shares a secure page, UV will split and 70 * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size. 71 * 72 * HV invalidating a page: When a regular page belonging to secure 73 * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K 74 * page size. Using 64K page size is correct here because any non-secure 75 * page will essentially be of 64K page size. Splitting by UV during sharing 76 * and page-out ensures this. 77 * 78 * Page fault handling: When HV handles page fault of a page belonging 79 * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request. 80 * Using 64K size is correct here too as UV would have split the 2MB page 81 * into 64k mappings and would have done page-outs earlier. 82 * 83 * In summary, the current secure pages handling code in HV assumes 84 * 64K page size and in fact fails any page-in/page-out requests of 85 * non-64K size upfront. If and when UV starts supporting multiple 86 * page-sizes, we need to break this assumption. 87 */ 88 89 #include <linux/pagemap.h> 90 #include <linux/migrate.h> 91 #include <linux/kvm_host.h> 92 #include <linux/ksm.h> 93 #include <asm/ultravisor.h> 94 #include <asm/mman.h> 95 #include <asm/kvm_ppc.h> 96 97 static struct dev_pagemap kvmppc_uvmem_pgmap; 98 static unsigned long *kvmppc_uvmem_bitmap; 99 static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); 100 101 #define KVMPPC_UVMEM_PFN (1UL << 63) 102 103 struct kvmppc_uvmem_slot { 104 struct list_head list; 105 unsigned long nr_pfns; 106 unsigned long base_pfn; 107 unsigned long *pfns; 108 }; 109 110 struct kvmppc_uvmem_page_pvt { 111 struct kvm *kvm; 112 unsigned long gpa; 113 bool skip_page_out; 114 }; 115 116 int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 117 { 118 struct kvmppc_uvmem_slot *p; 119 120 p = kzalloc(sizeof(*p), GFP_KERNEL); 121 if (!p) 122 return -ENOMEM; 123 p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); 124 if (!p->pfns) { 125 kfree(p); 126 return -ENOMEM; 127 } 128 p->nr_pfns = slot->npages; 129 p->base_pfn = slot->base_gfn; 130 131 mutex_lock(&kvm->arch.uvmem_lock); 132 list_add(&p->list, &kvm->arch.uvmem_pfns); 133 mutex_unlock(&kvm->arch.uvmem_lock); 134 135 return 0; 136 } 137 138 /* 139 * All device PFNs are already released by the time we come here. 140 */ 141 void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) 142 { 143 struct kvmppc_uvmem_slot *p, *next; 144 145 mutex_lock(&kvm->arch.uvmem_lock); 146 list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) { 147 if (p->base_pfn == slot->base_gfn) { 148 vfree(p->pfns); 149 list_del(&p->list); 150 kfree(p); 151 break; 152 } 153 } 154 mutex_unlock(&kvm->arch.uvmem_lock); 155 } 156 157 static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, 158 struct kvm *kvm) 159 { 160 struct kvmppc_uvmem_slot *p; 161 162 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 163 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 164 unsigned long index = gfn - p->base_pfn; 165 166 p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; 167 return; 168 } 169 } 170 } 171 172 static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) 173 { 174 struct kvmppc_uvmem_slot *p; 175 176 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 177 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 178 p->pfns[gfn - p->base_pfn] = 0; 179 return; 180 } 181 } 182 } 183 184 static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, 185 unsigned long *uvmem_pfn) 186 { 187 struct kvmppc_uvmem_slot *p; 188 189 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 190 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 191 unsigned long index = gfn - p->base_pfn; 192 193 if (p->pfns[index] & KVMPPC_UVMEM_PFN) { 194 if (uvmem_pfn) 195 *uvmem_pfn = p->pfns[index] & 196 ~KVMPPC_UVMEM_PFN; 197 return true; 198 } else 199 return false; 200 } 201 } 202 return false; 203 } 204 205 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 206 { 207 struct kvm_memslots *slots; 208 struct kvm_memory_slot *memslot; 209 int ret = H_SUCCESS; 210 int srcu_idx; 211 212 if (!kvmppc_uvmem_bitmap) 213 return H_UNSUPPORTED; 214 215 /* Only radix guests can be secure guests */ 216 if (!kvm_is_radix(kvm)) 217 return H_UNSUPPORTED; 218 219 srcu_idx = srcu_read_lock(&kvm->srcu); 220 slots = kvm_memslots(kvm); 221 kvm_for_each_memslot(memslot, slots) { 222 if (kvmppc_uvmem_slot_init(kvm, memslot)) { 223 ret = H_PARAMETER; 224 goto out; 225 } 226 ret = uv_register_mem_slot(kvm->arch.lpid, 227 memslot->base_gfn << PAGE_SHIFT, 228 memslot->npages * PAGE_SIZE, 229 0, memslot->id); 230 if (ret < 0) { 231 kvmppc_uvmem_slot_free(kvm, memslot); 232 ret = H_PARAMETER; 233 goto out; 234 } 235 } 236 kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START; 237 out: 238 srcu_read_unlock(&kvm->srcu, srcu_idx); 239 return ret; 240 } 241 242 unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 243 { 244 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 245 return H_UNSUPPORTED; 246 247 kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 248 pr_info("LPID %d went secure\n", kvm->arch.lpid); 249 return H_SUCCESS; 250 } 251 252 /* 253 * Drop device pages that we maintain for the secure guest 254 * 255 * We first mark the pages to be skipped from UV_PAGE_OUT when there 256 * is HV side fault on these pages. Next we *get* these pages, forcing 257 * fault on them, do fault time migration to replace the device PTEs in 258 * QEMU page table with normal PTEs from newly allocated pages. 259 */ 260 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 261 struct kvm *kvm, bool skip_page_out) 262 { 263 int i; 264 struct kvmppc_uvmem_page_pvt *pvt; 265 unsigned long pfn, uvmem_pfn; 266 unsigned long gfn = free->base_gfn; 267 268 for (i = free->npages; i; --i, ++gfn) { 269 struct page *uvmem_page; 270 271 mutex_lock(&kvm->arch.uvmem_lock); 272 if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 273 mutex_unlock(&kvm->arch.uvmem_lock); 274 continue; 275 } 276 277 uvmem_page = pfn_to_page(uvmem_pfn); 278 pvt = uvmem_page->zone_device_data; 279 pvt->skip_page_out = skip_page_out; 280 mutex_unlock(&kvm->arch.uvmem_lock); 281 282 pfn = gfn_to_pfn(kvm, gfn); 283 if (is_error_noslot_pfn(pfn)) 284 continue; 285 kvm_release_pfn_clean(pfn); 286 } 287 } 288 289 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) 290 { 291 int srcu_idx; 292 struct kvm_memory_slot *memslot; 293 294 /* 295 * Expect to be called only after INIT_START and before INIT_DONE. 296 * If INIT_DONE was completed, use normal VM termination sequence. 297 */ 298 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 299 return H_UNSUPPORTED; 300 301 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 302 return H_STATE; 303 304 srcu_idx = srcu_read_lock(&kvm->srcu); 305 306 kvm_for_each_memslot(memslot, kvm_memslots(kvm)) 307 kvmppc_uvmem_drop_pages(memslot, kvm, false); 308 309 srcu_read_unlock(&kvm->srcu, srcu_idx); 310 311 kvm->arch.secure_guest = 0; 312 uv_svm_terminate(kvm->arch.lpid); 313 314 return H_PARAMETER; 315 } 316 317 /* 318 * Get a free device PFN from the pool 319 * 320 * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device 321 * PFN will be used to keep track of the secure page on HV side. 322 * 323 * Called with kvm->arch.uvmem_lock held 324 */ 325 static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) 326 { 327 struct page *dpage = NULL; 328 unsigned long bit, uvmem_pfn; 329 struct kvmppc_uvmem_page_pvt *pvt; 330 unsigned long pfn_last, pfn_first; 331 332 pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; 333 pfn_last = pfn_first + 334 (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); 335 336 spin_lock(&kvmppc_uvmem_bitmap_lock); 337 bit = find_first_zero_bit(kvmppc_uvmem_bitmap, 338 pfn_last - pfn_first); 339 if (bit >= (pfn_last - pfn_first)) 340 goto out; 341 bitmap_set(kvmppc_uvmem_bitmap, bit, 1); 342 spin_unlock(&kvmppc_uvmem_bitmap_lock); 343 344 pvt = kzalloc(sizeof(*pvt), GFP_KERNEL); 345 if (!pvt) 346 goto out_clear; 347 348 uvmem_pfn = bit + pfn_first; 349 kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 350 351 pvt->gpa = gpa; 352 pvt->kvm = kvm; 353 354 dpage = pfn_to_page(uvmem_pfn); 355 dpage->zone_device_data = pvt; 356 get_page(dpage); 357 lock_page(dpage); 358 return dpage; 359 out_clear: 360 spin_lock(&kvmppc_uvmem_bitmap_lock); 361 bitmap_clear(kvmppc_uvmem_bitmap, bit, 1); 362 out: 363 spin_unlock(&kvmppc_uvmem_bitmap_lock); 364 return NULL; 365 } 366 367 /* 368 * Alloc a PFN from private device memory pool and copy page from normal 369 * memory to secure memory using UV_PAGE_IN uvcall. 370 */ 371 static int 372 kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, 373 unsigned long end, unsigned long gpa, struct kvm *kvm, 374 unsigned long page_shift, bool *downgrade) 375 { 376 unsigned long src_pfn, dst_pfn = 0; 377 struct migrate_vma mig; 378 struct page *spage; 379 unsigned long pfn; 380 struct page *dpage; 381 int ret = 0; 382 383 memset(&mig, 0, sizeof(mig)); 384 mig.vma = vma; 385 mig.start = start; 386 mig.end = end; 387 mig.src = &src_pfn; 388 mig.dst = &dst_pfn; 389 390 /* 391 * We come here with mmap_sem write lock held just for 392 * ksm_madvise(), otherwise we only need read mmap_sem. 393 * Hence downgrade to read lock once ksm_madvise() is done. 394 */ 395 ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 396 MADV_UNMERGEABLE, &vma->vm_flags); 397 downgrade_write(&kvm->mm->mmap_sem); 398 *downgrade = true; 399 if (ret) 400 return ret; 401 402 ret = migrate_vma_setup(&mig); 403 if (ret) 404 return ret; 405 406 if (!(*mig.src & MIGRATE_PFN_MIGRATE)) { 407 ret = -1; 408 goto out_finalize; 409 } 410 411 dpage = kvmppc_uvmem_get_page(gpa, kvm); 412 if (!dpage) { 413 ret = -1; 414 goto out_finalize; 415 } 416 417 pfn = *mig.src >> MIGRATE_PFN_SHIFT; 418 spage = migrate_pfn_to_page(*mig.src); 419 if (spage) 420 uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 421 page_shift); 422 423 *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; 424 migrate_vma_pages(&mig); 425 out_finalize: 426 migrate_vma_finalize(&mig); 427 return ret; 428 } 429 430 /* 431 * Shares the page with HV, thus making it a normal page. 432 * 433 * - If the page is already secure, then provision a new page and share 434 * - If the page is a normal page, share the existing page 435 * 436 * In the former case, uses dev_pagemap_ops.migrate_to_ram handler 437 * to unmap the device page from QEMU's page tables. 438 */ 439 static unsigned long 440 kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) 441 { 442 443 int ret = H_PARAMETER; 444 struct page *uvmem_page; 445 struct kvmppc_uvmem_page_pvt *pvt; 446 unsigned long pfn; 447 unsigned long gfn = gpa >> page_shift; 448 int srcu_idx; 449 unsigned long uvmem_pfn; 450 451 srcu_idx = srcu_read_lock(&kvm->srcu); 452 mutex_lock(&kvm->arch.uvmem_lock); 453 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 454 uvmem_page = pfn_to_page(uvmem_pfn); 455 pvt = uvmem_page->zone_device_data; 456 pvt->skip_page_out = true; 457 } 458 459 retry: 460 mutex_unlock(&kvm->arch.uvmem_lock); 461 pfn = gfn_to_pfn(kvm, gfn); 462 if (is_error_noslot_pfn(pfn)) 463 goto out; 464 465 mutex_lock(&kvm->arch.uvmem_lock); 466 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 467 uvmem_page = pfn_to_page(uvmem_pfn); 468 pvt = uvmem_page->zone_device_data; 469 pvt->skip_page_out = true; 470 kvm_release_pfn_clean(pfn); 471 goto retry; 472 } 473 474 if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) 475 ret = H_SUCCESS; 476 kvm_release_pfn_clean(pfn); 477 mutex_unlock(&kvm->arch.uvmem_lock); 478 out: 479 srcu_read_unlock(&kvm->srcu, srcu_idx); 480 return ret; 481 } 482 483 /* 484 * H_SVM_PAGE_IN: Move page from normal memory to secure memory. 485 * 486 * H_PAGE_IN_SHARED flag makes the page shared which means that the same 487 * memory in is visible from both UV and HV. 488 */ 489 unsigned long 490 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 491 unsigned long flags, unsigned long page_shift) 492 { 493 bool downgrade = false; 494 unsigned long start, end; 495 struct vm_area_struct *vma; 496 int srcu_idx; 497 unsigned long gfn = gpa >> page_shift; 498 int ret; 499 500 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 501 return H_UNSUPPORTED; 502 503 if (page_shift != PAGE_SHIFT) 504 return H_P3; 505 506 if (flags & ~H_PAGE_IN_SHARED) 507 return H_P2; 508 509 if (flags & H_PAGE_IN_SHARED) 510 return kvmppc_share_page(kvm, gpa, page_shift); 511 512 ret = H_PARAMETER; 513 srcu_idx = srcu_read_lock(&kvm->srcu); 514 down_write(&kvm->mm->mmap_sem); 515 516 start = gfn_to_hva(kvm, gfn); 517 if (kvm_is_error_hva(start)) 518 goto out; 519 520 mutex_lock(&kvm->arch.uvmem_lock); 521 /* Fail the page-in request of an already paged-in page */ 522 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 523 goto out_unlock; 524 525 end = start + (1UL << page_shift); 526 vma = find_vma_intersection(kvm->mm, start, end); 527 if (!vma || vma->vm_start > start || vma->vm_end < end) 528 goto out_unlock; 529 530 if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 531 &downgrade)) 532 ret = H_SUCCESS; 533 out_unlock: 534 mutex_unlock(&kvm->arch.uvmem_lock); 535 out: 536 if (downgrade) 537 up_read(&kvm->mm->mmap_sem); 538 else 539 up_write(&kvm->mm->mmap_sem); 540 srcu_read_unlock(&kvm->srcu, srcu_idx); 541 return ret; 542 } 543 544 /* 545 * Provision a new page on HV side and copy over the contents 546 * from secure memory using UV_PAGE_OUT uvcall. 547 */ 548 static int 549 kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, 550 unsigned long end, unsigned long page_shift, 551 struct kvm *kvm, unsigned long gpa) 552 { 553 unsigned long src_pfn, dst_pfn = 0; 554 struct migrate_vma mig; 555 struct page *dpage, *spage; 556 struct kvmppc_uvmem_page_pvt *pvt; 557 unsigned long pfn; 558 int ret = U_SUCCESS; 559 560 memset(&mig, 0, sizeof(mig)); 561 mig.vma = vma; 562 mig.start = start; 563 mig.end = end; 564 mig.src = &src_pfn; 565 mig.dst = &dst_pfn; 566 567 mutex_lock(&kvm->arch.uvmem_lock); 568 /* The requested page is already paged-out, nothing to do */ 569 if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 570 goto out; 571 572 ret = migrate_vma_setup(&mig); 573 if (ret) 574 goto out; 575 576 spage = migrate_pfn_to_page(*mig.src); 577 if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 578 goto out_finalize; 579 580 if (!is_zone_device_page(spage)) 581 goto out_finalize; 582 583 dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 584 if (!dpage) { 585 ret = -1; 586 goto out_finalize; 587 } 588 589 lock_page(dpage); 590 pvt = spage->zone_device_data; 591 pfn = page_to_pfn(dpage); 592 593 /* 594 * This function is used in two cases: 595 * - When HV touches a secure page, for which we do UV_PAGE_OUT 596 * - When a secure page is converted to shared page, we *get* 597 * the page to essentially unmap the device page. In this 598 * case we skip page-out. 599 */ 600 if (!pvt->skip_page_out) 601 ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 602 gpa, 0, page_shift); 603 604 if (ret == U_SUCCESS) 605 *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; 606 else { 607 unlock_page(dpage); 608 __free_page(dpage); 609 goto out_finalize; 610 } 611 612 migrate_vma_pages(&mig); 613 out_finalize: 614 migrate_vma_finalize(&mig); 615 out: 616 mutex_unlock(&kvm->arch.uvmem_lock); 617 return ret; 618 } 619 620 /* 621 * Fault handler callback that gets called when HV touches any page that 622 * has been moved to secure memory, we ask UV to give back the page by 623 * issuing UV_PAGE_OUT uvcall. 624 * 625 * This eventually results in dropping of device PFN and the newly 626 * provisioned page/PFN gets populated in QEMU page tables. 627 */ 628 static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) 629 { 630 struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data; 631 632 if (kvmppc_svm_page_out(vmf->vma, vmf->address, 633 vmf->address + PAGE_SIZE, PAGE_SHIFT, 634 pvt->kvm, pvt->gpa)) 635 return VM_FAULT_SIGBUS; 636 else 637 return 0; 638 } 639 640 /* 641 * Release the device PFN back to the pool 642 * 643 * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. 644 * Gets called with kvm->arch.uvmem_lock held. 645 */ 646 static void kvmppc_uvmem_page_free(struct page *page) 647 { 648 unsigned long pfn = page_to_pfn(page) - 649 (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); 650 struct kvmppc_uvmem_page_pvt *pvt; 651 652 spin_lock(&kvmppc_uvmem_bitmap_lock); 653 bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1); 654 spin_unlock(&kvmppc_uvmem_bitmap_lock); 655 656 pvt = page->zone_device_data; 657 page->zone_device_data = NULL; 658 kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 659 kfree(pvt); 660 } 661 662 static const struct dev_pagemap_ops kvmppc_uvmem_ops = { 663 .page_free = kvmppc_uvmem_page_free, 664 .migrate_to_ram = kvmppc_uvmem_migrate_to_ram, 665 }; 666 667 /* 668 * H_SVM_PAGE_OUT: Move page from secure memory to normal memory. 669 */ 670 unsigned long 671 kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, 672 unsigned long flags, unsigned long page_shift) 673 { 674 unsigned long gfn = gpa >> page_shift; 675 unsigned long start, end; 676 struct vm_area_struct *vma; 677 int srcu_idx; 678 int ret; 679 680 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 681 return H_UNSUPPORTED; 682 683 if (page_shift != PAGE_SHIFT) 684 return H_P3; 685 686 if (flags) 687 return H_P2; 688 689 ret = H_PARAMETER; 690 srcu_idx = srcu_read_lock(&kvm->srcu); 691 down_read(&kvm->mm->mmap_sem); 692 start = gfn_to_hva(kvm, gfn); 693 if (kvm_is_error_hva(start)) 694 goto out; 695 696 end = start + (1UL << page_shift); 697 vma = find_vma_intersection(kvm->mm, start, end); 698 if (!vma || vma->vm_start > start || vma->vm_end < end) 699 goto out; 700 701 if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) 702 ret = H_SUCCESS; 703 out: 704 up_read(&kvm->mm->mmap_sem); 705 srcu_read_unlock(&kvm->srcu, srcu_idx); 706 return ret; 707 } 708 709 int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) 710 { 711 unsigned long pfn; 712 int ret = U_SUCCESS; 713 714 pfn = gfn_to_pfn(kvm, gfn); 715 if (is_error_noslot_pfn(pfn)) 716 return -EFAULT; 717 718 mutex_lock(&kvm->arch.uvmem_lock); 719 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 720 goto out; 721 722 ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT, 723 0, PAGE_SHIFT); 724 out: 725 kvm_release_pfn_clean(pfn); 726 mutex_unlock(&kvm->arch.uvmem_lock); 727 return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; 728 } 729 730 static u64 kvmppc_get_secmem_size(void) 731 { 732 struct device_node *np; 733 int i, len; 734 const __be32 *prop; 735 u64 size = 0; 736 737 np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); 738 if (!np) 739 goto out; 740 741 prop = of_get_property(np, "secure-memory-ranges", &len); 742 if (!prop) 743 goto out_put; 744 745 for (i = 0; i < len / (sizeof(*prop) * 4); i++) 746 size += of_read_number(prop + (i * 4) + 2, 2); 747 748 out_put: 749 of_node_put(np); 750 out: 751 return size; 752 } 753 754 int kvmppc_uvmem_init(void) 755 { 756 int ret = 0; 757 unsigned long size; 758 struct resource *res; 759 void *addr; 760 unsigned long pfn_last, pfn_first; 761 762 size = kvmppc_get_secmem_size(); 763 if (!size) { 764 /* 765 * Don't fail the initialization of kvm-hv module if 766 * the platform doesn't export ibm,uv-firmware node. 767 * Let normal guests run on such PEF-disabled platform. 768 */ 769 pr_info("KVMPPC-UVMEM: No support for secure guests\n"); 770 goto out; 771 } 772 773 res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem"); 774 if (IS_ERR(res)) { 775 ret = PTR_ERR(res); 776 goto out; 777 } 778 779 kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; 780 kvmppc_uvmem_pgmap.res = *res; 781 kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; 782 addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); 783 if (IS_ERR(addr)) { 784 ret = PTR_ERR(addr); 785 goto out_free_region; 786 } 787 788 pfn_first = res->start >> PAGE_SHIFT; 789 pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); 790 kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), 791 sizeof(unsigned long), GFP_KERNEL); 792 if (!kvmppc_uvmem_bitmap) { 793 ret = -ENOMEM; 794 goto out_unmap; 795 } 796 797 pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size); 798 return ret; 799 out_unmap: 800 memunmap_pages(&kvmppc_uvmem_pgmap); 801 out_free_region: 802 release_mem_region(res->start, size); 803 out: 804 return ret; 805 } 806 807 void kvmppc_uvmem_free(void) 808 { 809 memunmap_pages(&kvmppc_uvmem_pgmap); 810 release_mem_region(kvmppc_uvmem_pgmap.res.start, 811 resource_size(&kvmppc_uvmem_pgmap.res)); 812 kfree(kvmppc_uvmem_bitmap); 813 } 814