1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM-SEV support 6 * 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 */ 9 10 #include <linux/kvm_types.h> 11 #include <linux/kvm_host.h> 12 #include <linux/kernel.h> 13 #include <linux/highmem.h> 14 #include <linux/psp-sev.h> 15 #include <linux/pagemap.h> 16 #include <linux/swap.h> 17 #include <linux/misc_cgroup.h> 18 #include <linux/processor.h> 19 #include <linux/trace_events.h> 20 21 #include <asm/pkru.h> 22 #include <asm/trapnr.h> 23 #include <asm/fpu/xcr.h> 24 25 #include "mmu.h" 26 #include "x86.h" 27 #include "svm.h" 28 #include "svm_ops.h" 29 #include "cpuid.h" 30 #include "trace.h" 31 32 #ifndef CONFIG_KVM_AMD_SEV 33 /* 34 * When this config is not defined, SEV feature is not supported and APIs in 35 * this file are not used but this file still gets compiled into the KVM AMD 36 * module. 37 * 38 * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum 39 * misc_res_type {} defined in linux/misc_cgroup.h. 40 * 41 * Below macros allow compilation to succeed. 42 */ 43 #define MISC_CG_RES_SEV MISC_CG_RES_TYPES 44 #define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES 45 #endif 46 47 #ifdef CONFIG_KVM_AMD_SEV 48 /* enable/disable SEV support */ 49 static bool sev_enabled = true; 50 module_param_named(sev, sev_enabled, bool, 0444); 51 52 /* enable/disable SEV-ES support */ 53 static bool sev_es_enabled = true; 54 module_param_named(sev_es, sev_es_enabled, bool, 0444); 55 #else 56 #define sev_enabled false 57 #define sev_es_enabled false 58 #endif /* CONFIG_KVM_AMD_SEV */ 59 60 static u8 sev_enc_bit; 61 static DECLARE_RWSEM(sev_deactivate_lock); 62 static DEFINE_MUTEX(sev_bitmap_lock); 63 unsigned int max_sev_asid; 64 static unsigned int min_sev_asid; 65 static unsigned long sev_me_mask; 66 static unsigned int nr_asids; 67 static unsigned long *sev_asid_bitmap; 68 static unsigned long *sev_reclaim_asid_bitmap; 69 70 struct enc_region { 71 struct list_head list; 72 unsigned long npages; 73 struct page **pages; 74 unsigned long uaddr; 75 unsigned long size; 76 }; 77 78 /* Called with the sev_bitmap_lock held, or on shutdown */ 79 static int sev_flush_asids(int min_asid, int max_asid) 80 { 81 int ret, asid, error = 0; 82 83 /* Check if there are any ASIDs to reclaim before performing a flush */ 84 asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); 85 if (asid > max_asid) 86 return -EBUSY; 87 88 /* 89 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, 90 * so it must be guarded. 91 */ 92 down_write(&sev_deactivate_lock); 93 94 wbinvd_on_all_cpus(); 95 ret = sev_guest_df_flush(&error); 96 97 up_write(&sev_deactivate_lock); 98 99 if (ret) 100 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error); 101 102 return ret; 103 } 104 105 static inline bool is_mirroring_enc_context(struct kvm *kvm) 106 { 107 return !!to_kvm_svm(kvm)->sev_info.enc_context_owner; 108 } 109 110 /* Must be called with the sev_bitmap_lock held */ 111 static bool __sev_recycle_asids(int min_asid, int max_asid) 112 { 113 if (sev_flush_asids(min_asid, max_asid)) 114 return false; 115 116 /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ 117 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, 118 nr_asids); 119 bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); 120 121 return true; 122 } 123 124 static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) 125 { 126 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 127 return misc_cg_try_charge(type, sev->misc_cg, 1); 128 } 129 130 static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) 131 { 132 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 133 misc_cg_uncharge(type, sev->misc_cg, 1); 134 } 135 136 static int sev_asid_new(struct kvm_sev_info *sev) 137 { 138 int asid, min_asid, max_asid, ret; 139 bool retry = true; 140 141 WARN_ON(sev->misc_cg); 142 sev->misc_cg = get_current_misc_cg(); 143 ret = sev_misc_cg_try_charge(sev); 144 if (ret) { 145 put_misc_cg(sev->misc_cg); 146 sev->misc_cg = NULL; 147 return ret; 148 } 149 150 mutex_lock(&sev_bitmap_lock); 151 152 /* 153 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. 154 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. 155 */ 156 min_asid = sev->es_active ? 1 : min_sev_asid; 157 max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; 158 again: 159 asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); 160 if (asid > max_asid) { 161 if (retry && __sev_recycle_asids(min_asid, max_asid)) { 162 retry = false; 163 goto again; 164 } 165 mutex_unlock(&sev_bitmap_lock); 166 ret = -EBUSY; 167 goto e_uncharge; 168 } 169 170 __set_bit(asid, sev_asid_bitmap); 171 172 mutex_unlock(&sev_bitmap_lock); 173 174 return asid; 175 e_uncharge: 176 sev_misc_cg_uncharge(sev); 177 put_misc_cg(sev->misc_cg); 178 sev->misc_cg = NULL; 179 return ret; 180 } 181 182 static int sev_get_asid(struct kvm *kvm) 183 { 184 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 185 186 return sev->asid; 187 } 188 189 static void sev_asid_free(struct kvm_sev_info *sev) 190 { 191 struct svm_cpu_data *sd; 192 int cpu; 193 194 mutex_lock(&sev_bitmap_lock); 195 196 __set_bit(sev->asid, sev_reclaim_asid_bitmap); 197 198 for_each_possible_cpu(cpu) { 199 sd = per_cpu_ptr(&svm_data, cpu); 200 sd->sev_vmcbs[sev->asid] = NULL; 201 } 202 203 mutex_unlock(&sev_bitmap_lock); 204 205 sev_misc_cg_uncharge(sev); 206 put_misc_cg(sev->misc_cg); 207 sev->misc_cg = NULL; 208 } 209 210 static void sev_decommission(unsigned int handle) 211 { 212 struct sev_data_decommission decommission; 213 214 if (!handle) 215 return; 216 217 decommission.handle = handle; 218 sev_guest_decommission(&decommission, NULL); 219 } 220 221 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 222 { 223 struct sev_data_deactivate deactivate; 224 225 if (!handle) 226 return; 227 228 deactivate.handle = handle; 229 230 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ 231 down_read(&sev_deactivate_lock); 232 sev_guest_deactivate(&deactivate, NULL); 233 up_read(&sev_deactivate_lock); 234 235 sev_decommission(handle); 236 } 237 238 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) 239 { 240 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 241 int asid, ret; 242 243 if (kvm->created_vcpus) 244 return -EINVAL; 245 246 ret = -EBUSY; 247 if (unlikely(sev->active)) 248 return ret; 249 250 sev->active = true; 251 sev->es_active = argp->id == KVM_SEV_ES_INIT; 252 asid = sev_asid_new(sev); 253 if (asid < 0) 254 goto e_no_asid; 255 sev->asid = asid; 256 257 ret = sev_platform_init(&argp->error); 258 if (ret) 259 goto e_free; 260 261 INIT_LIST_HEAD(&sev->regions_list); 262 INIT_LIST_HEAD(&sev->mirror_vms); 263 264 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); 265 266 return 0; 267 268 e_free: 269 sev_asid_free(sev); 270 sev->asid = 0; 271 e_no_asid: 272 sev->es_active = false; 273 sev->active = false; 274 return ret; 275 } 276 277 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) 278 { 279 struct sev_data_activate activate; 280 int asid = sev_get_asid(kvm); 281 int ret; 282 283 /* activate ASID on the given handle */ 284 activate.handle = handle; 285 activate.asid = asid; 286 ret = sev_guest_activate(&activate, error); 287 288 return ret; 289 } 290 291 static int __sev_issue_cmd(int fd, int id, void *data, int *error) 292 { 293 struct fd f; 294 int ret; 295 296 f = fdget(fd); 297 if (!f.file) 298 return -EBADF; 299 300 ret = sev_issue_cmd_external_user(f.file, id, data, error); 301 302 fdput(f); 303 return ret; 304 } 305 306 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) 307 { 308 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 309 310 return __sev_issue_cmd(sev->fd, id, data, error); 311 } 312 313 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 314 { 315 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 316 struct sev_data_launch_start start; 317 struct kvm_sev_launch_start params; 318 void *dh_blob, *session_blob; 319 int *error = &argp->error; 320 int ret; 321 322 if (!sev_guest(kvm)) 323 return -ENOTTY; 324 325 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 326 return -EFAULT; 327 328 memset(&start, 0, sizeof(start)); 329 330 dh_blob = NULL; 331 if (params.dh_uaddr) { 332 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); 333 if (IS_ERR(dh_blob)) 334 return PTR_ERR(dh_blob); 335 336 start.dh_cert_address = __sme_set(__pa(dh_blob)); 337 start.dh_cert_len = params.dh_len; 338 } 339 340 session_blob = NULL; 341 if (params.session_uaddr) { 342 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); 343 if (IS_ERR(session_blob)) { 344 ret = PTR_ERR(session_blob); 345 goto e_free_dh; 346 } 347 348 start.session_address = __sme_set(__pa(session_blob)); 349 start.session_len = params.session_len; 350 } 351 352 start.handle = params.handle; 353 start.policy = params.policy; 354 355 /* create memory encryption context */ 356 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error); 357 if (ret) 358 goto e_free_session; 359 360 /* Bind ASID to this guest */ 361 ret = sev_bind_asid(kvm, start.handle, error); 362 if (ret) { 363 sev_decommission(start.handle); 364 goto e_free_session; 365 } 366 367 /* return handle to userspace */ 368 params.handle = start.handle; 369 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { 370 sev_unbind_asid(kvm, start.handle); 371 ret = -EFAULT; 372 goto e_free_session; 373 } 374 375 sev->handle = start.handle; 376 sev->fd = argp->sev_fd; 377 378 e_free_session: 379 kfree(session_blob); 380 e_free_dh: 381 kfree(dh_blob); 382 return ret; 383 } 384 385 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, 386 unsigned long ulen, unsigned long *n, 387 int write) 388 { 389 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 390 unsigned long npages, size; 391 int npinned; 392 unsigned long locked, lock_limit; 393 struct page **pages; 394 unsigned long first, last; 395 int ret; 396 397 lockdep_assert_held(&kvm->lock); 398 399 if (ulen == 0 || uaddr + ulen < uaddr) 400 return ERR_PTR(-EINVAL); 401 402 /* Calculate number of pages. */ 403 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; 404 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; 405 npages = (last - first + 1); 406 407 locked = sev->pages_locked + npages; 408 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 409 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 410 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); 411 return ERR_PTR(-ENOMEM); 412 } 413 414 if (WARN_ON_ONCE(npages > INT_MAX)) 415 return ERR_PTR(-EINVAL); 416 417 /* Avoid using vmalloc for smaller buffers. */ 418 size = npages * sizeof(struct page *); 419 if (size > PAGE_SIZE) 420 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 421 else 422 pages = kmalloc(size, GFP_KERNEL_ACCOUNT); 423 424 if (!pages) 425 return ERR_PTR(-ENOMEM); 426 427 /* Pin the user virtual address. */ 428 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); 429 if (npinned != npages) { 430 pr_err("SEV: Failure locking %lu pages.\n", npages); 431 ret = -ENOMEM; 432 goto err; 433 } 434 435 *n = npages; 436 sev->pages_locked = locked; 437 438 return pages; 439 440 err: 441 if (npinned > 0) 442 unpin_user_pages(pages, npinned); 443 444 kvfree(pages); 445 return ERR_PTR(ret); 446 } 447 448 static void sev_unpin_memory(struct kvm *kvm, struct page **pages, 449 unsigned long npages) 450 { 451 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 452 453 unpin_user_pages(pages, npages); 454 kvfree(pages); 455 sev->pages_locked -= npages; 456 } 457 458 static void sev_clflush_pages(struct page *pages[], unsigned long npages) 459 { 460 uint8_t *page_virtual; 461 unsigned long i; 462 463 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || 464 pages == NULL) 465 return; 466 467 for (i = 0; i < npages; i++) { 468 page_virtual = kmap_atomic(pages[i]); 469 clflush_cache_range(page_virtual, PAGE_SIZE); 470 kunmap_atomic(page_virtual); 471 cond_resched(); 472 } 473 } 474 475 static unsigned long get_num_contig_pages(unsigned long idx, 476 struct page **inpages, unsigned long npages) 477 { 478 unsigned long paddr, next_paddr; 479 unsigned long i = idx + 1, pages = 1; 480 481 /* find the number of contiguous pages starting from idx */ 482 paddr = __sme_page_pa(inpages[idx]); 483 while (i < npages) { 484 next_paddr = __sme_page_pa(inpages[i++]); 485 if ((paddr + PAGE_SIZE) == next_paddr) { 486 pages++; 487 paddr = next_paddr; 488 continue; 489 } 490 break; 491 } 492 493 return pages; 494 } 495 496 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 497 { 498 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; 499 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 500 struct kvm_sev_launch_update_data params; 501 struct sev_data_launch_update_data data; 502 struct page **inpages; 503 int ret; 504 505 if (!sev_guest(kvm)) 506 return -ENOTTY; 507 508 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 509 return -EFAULT; 510 511 vaddr = params.uaddr; 512 size = params.len; 513 vaddr_end = vaddr + size; 514 515 /* Lock the user memory. */ 516 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); 517 if (IS_ERR(inpages)) 518 return PTR_ERR(inpages); 519 520 /* 521 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in 522 * place; the cache may contain the data that was written unencrypted. 523 */ 524 sev_clflush_pages(inpages, npages); 525 526 data.reserved = 0; 527 data.handle = sev->handle; 528 529 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { 530 int offset, len; 531 532 /* 533 * If the user buffer is not page-aligned, calculate the offset 534 * within the page. 535 */ 536 offset = vaddr & (PAGE_SIZE - 1); 537 538 /* Calculate the number of pages that can be encrypted in one go. */ 539 pages = get_num_contig_pages(i, inpages, npages); 540 541 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); 542 543 data.len = len; 544 data.address = __sme_page_pa(inpages[i]) + offset; 545 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error); 546 if (ret) 547 goto e_unpin; 548 549 size -= len; 550 next_vaddr = vaddr + len; 551 } 552 553 e_unpin: 554 /* content of memory is updated, mark pages dirty */ 555 for (i = 0; i < npages; i++) { 556 set_page_dirty_lock(inpages[i]); 557 mark_page_accessed(inpages[i]); 558 } 559 /* unlock the user pages */ 560 sev_unpin_memory(kvm, inpages, npages); 561 return ret; 562 } 563 564 static int sev_es_sync_vmsa(struct vcpu_svm *svm) 565 { 566 struct sev_es_save_area *save = svm->sev_es.vmsa; 567 568 /* Check some debug related fields before encrypting the VMSA */ 569 if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) 570 return -EINVAL; 571 572 /* 573 * SEV-ES will use a VMSA that is pointed to by the VMCB, not 574 * the traditional VMSA that is part of the VMCB. Copy the 575 * traditional VMSA as it has been built so far (in prep 576 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. 577 */ 578 memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save)); 579 580 /* Sync registgers */ 581 save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; 582 save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; 583 save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 584 save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; 585 save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; 586 save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; 587 save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; 588 save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; 589 #ifdef CONFIG_X86_64 590 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; 591 save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; 592 save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; 593 save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; 594 save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; 595 save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; 596 save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; 597 save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; 598 #endif 599 save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; 600 601 /* Sync some non-GPR registers before encrypting */ 602 save->xcr0 = svm->vcpu.arch.xcr0; 603 save->pkru = svm->vcpu.arch.pkru; 604 save->xss = svm->vcpu.arch.ia32_xss; 605 save->dr6 = svm->vcpu.arch.dr6; 606 607 pr_debug("Virtual Machine Save Area (VMSA):\n"); 608 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); 609 610 return 0; 611 } 612 613 static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, 614 int *error) 615 { 616 struct sev_data_launch_update_vmsa vmsa; 617 struct vcpu_svm *svm = to_svm(vcpu); 618 int ret; 619 620 /* Perform some pre-encryption checks against the VMSA */ 621 ret = sev_es_sync_vmsa(svm); 622 if (ret) 623 return ret; 624 625 /* 626 * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of 627 * the VMSA memory content (i.e it will write the same memory region 628 * with the guest's key), so invalidate it first. 629 */ 630 clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); 631 632 vmsa.reserved = 0; 633 vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; 634 vmsa.address = __sme_pa(svm->sev_es.vmsa); 635 vmsa.len = PAGE_SIZE; 636 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); 637 if (ret) 638 return ret; 639 640 vcpu->arch.guest_state_protected = true; 641 return 0; 642 } 643 644 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 645 { 646 struct kvm_vcpu *vcpu; 647 unsigned long i; 648 int ret; 649 650 if (!sev_es_guest(kvm)) 651 return -ENOTTY; 652 653 kvm_for_each_vcpu(i, vcpu, kvm) { 654 ret = mutex_lock_killable(&vcpu->mutex); 655 if (ret) 656 return ret; 657 658 ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); 659 660 mutex_unlock(&vcpu->mutex); 661 if (ret) 662 return ret; 663 } 664 665 return 0; 666 } 667 668 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 669 { 670 void __user *measure = (void __user *)(uintptr_t)argp->data; 671 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 672 struct sev_data_launch_measure data; 673 struct kvm_sev_launch_measure params; 674 void __user *p = NULL; 675 void *blob = NULL; 676 int ret; 677 678 if (!sev_guest(kvm)) 679 return -ENOTTY; 680 681 if (copy_from_user(¶ms, measure, sizeof(params))) 682 return -EFAULT; 683 684 memset(&data, 0, sizeof(data)); 685 686 /* User wants to query the blob length */ 687 if (!params.len) 688 goto cmd; 689 690 p = (void __user *)(uintptr_t)params.uaddr; 691 if (p) { 692 if (params.len > SEV_FW_BLOB_MAX_SIZE) 693 return -EINVAL; 694 695 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 696 if (!blob) 697 return -ENOMEM; 698 699 data.address = __psp_pa(blob); 700 data.len = params.len; 701 } 702 703 cmd: 704 data.handle = sev->handle; 705 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error); 706 707 /* 708 * If we query the session length, FW responded with expected data. 709 */ 710 if (!params.len) 711 goto done; 712 713 if (ret) 714 goto e_free_blob; 715 716 if (blob) { 717 if (copy_to_user(p, blob, params.len)) 718 ret = -EFAULT; 719 } 720 721 done: 722 params.len = data.len; 723 if (copy_to_user(measure, ¶ms, sizeof(params))) 724 ret = -EFAULT; 725 e_free_blob: 726 kfree(blob); 727 return ret; 728 } 729 730 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 731 { 732 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 733 struct sev_data_launch_finish data; 734 735 if (!sev_guest(kvm)) 736 return -ENOTTY; 737 738 data.handle = sev->handle; 739 return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error); 740 } 741 742 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) 743 { 744 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 745 struct kvm_sev_guest_status params; 746 struct sev_data_guest_status data; 747 int ret; 748 749 if (!sev_guest(kvm)) 750 return -ENOTTY; 751 752 memset(&data, 0, sizeof(data)); 753 754 data.handle = sev->handle; 755 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error); 756 if (ret) 757 return ret; 758 759 params.policy = data.policy; 760 params.state = data.state; 761 params.handle = data.handle; 762 763 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) 764 ret = -EFAULT; 765 766 return ret; 767 } 768 769 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, 770 unsigned long dst, int size, 771 int *error, bool enc) 772 { 773 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 774 struct sev_data_dbg data; 775 776 data.reserved = 0; 777 data.handle = sev->handle; 778 data.dst_addr = dst; 779 data.src_addr = src; 780 data.len = size; 781 782 return sev_issue_cmd(kvm, 783 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, 784 &data, error); 785 } 786 787 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, 788 unsigned long dst_paddr, int sz, int *err) 789 { 790 int offset; 791 792 /* 793 * Its safe to read more than we are asked, caller should ensure that 794 * destination has enough space. 795 */ 796 offset = src_paddr & 15; 797 src_paddr = round_down(src_paddr, 16); 798 sz = round_up(sz + offset, 16); 799 800 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); 801 } 802 803 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, 804 void __user *dst_uaddr, 805 unsigned long dst_paddr, 806 int size, int *err) 807 { 808 struct page *tpage = NULL; 809 int ret, offset; 810 811 /* if inputs are not 16-byte then use intermediate buffer */ 812 if (!IS_ALIGNED(dst_paddr, 16) || 813 !IS_ALIGNED(paddr, 16) || 814 !IS_ALIGNED(size, 16)) { 815 tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); 816 if (!tpage) 817 return -ENOMEM; 818 819 dst_paddr = __sme_page_pa(tpage); 820 } 821 822 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); 823 if (ret) 824 goto e_free; 825 826 if (tpage) { 827 offset = paddr & 15; 828 if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) 829 ret = -EFAULT; 830 } 831 832 e_free: 833 if (tpage) 834 __free_page(tpage); 835 836 return ret; 837 } 838 839 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, 840 void __user *vaddr, 841 unsigned long dst_paddr, 842 void __user *dst_vaddr, 843 int size, int *error) 844 { 845 struct page *src_tpage = NULL; 846 struct page *dst_tpage = NULL; 847 int ret, len = size; 848 849 /* If source buffer is not aligned then use an intermediate buffer */ 850 if (!IS_ALIGNED((unsigned long)vaddr, 16)) { 851 src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 852 if (!src_tpage) 853 return -ENOMEM; 854 855 if (copy_from_user(page_address(src_tpage), vaddr, size)) { 856 __free_page(src_tpage); 857 return -EFAULT; 858 } 859 860 paddr = __sme_page_pa(src_tpage); 861 } 862 863 /* 864 * If destination buffer or length is not aligned then do read-modify-write: 865 * - decrypt destination in an intermediate buffer 866 * - copy the source buffer in an intermediate buffer 867 * - use the intermediate buffer as source buffer 868 */ 869 if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { 870 int dst_offset; 871 872 dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 873 if (!dst_tpage) { 874 ret = -ENOMEM; 875 goto e_free; 876 } 877 878 ret = __sev_dbg_decrypt(kvm, dst_paddr, 879 __sme_page_pa(dst_tpage), size, error); 880 if (ret) 881 goto e_free; 882 883 /* 884 * If source is kernel buffer then use memcpy() otherwise 885 * copy_from_user(). 886 */ 887 dst_offset = dst_paddr & 15; 888 889 if (src_tpage) 890 memcpy(page_address(dst_tpage) + dst_offset, 891 page_address(src_tpage), size); 892 else { 893 if (copy_from_user(page_address(dst_tpage) + dst_offset, 894 vaddr, size)) { 895 ret = -EFAULT; 896 goto e_free; 897 } 898 } 899 900 paddr = __sme_page_pa(dst_tpage); 901 dst_paddr = round_down(dst_paddr, 16); 902 len = round_up(size, 16); 903 } 904 905 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); 906 907 e_free: 908 if (src_tpage) 909 __free_page(src_tpage); 910 if (dst_tpage) 911 __free_page(dst_tpage); 912 return ret; 913 } 914 915 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 916 { 917 unsigned long vaddr, vaddr_end, next_vaddr; 918 unsigned long dst_vaddr; 919 struct page **src_p, **dst_p; 920 struct kvm_sev_dbg debug; 921 unsigned long n; 922 unsigned int size; 923 int ret; 924 925 if (!sev_guest(kvm)) 926 return -ENOTTY; 927 928 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) 929 return -EFAULT; 930 931 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) 932 return -EINVAL; 933 if (!debug.dst_uaddr) 934 return -EINVAL; 935 936 vaddr = debug.src_uaddr; 937 size = debug.len; 938 vaddr_end = vaddr + size; 939 dst_vaddr = debug.dst_uaddr; 940 941 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 942 int len, s_off, d_off; 943 944 /* lock userspace source and destination page */ 945 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); 946 if (IS_ERR(src_p)) 947 return PTR_ERR(src_p); 948 949 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); 950 if (IS_ERR(dst_p)) { 951 sev_unpin_memory(kvm, src_p, n); 952 return PTR_ERR(dst_p); 953 } 954 955 /* 956 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify 957 * the pages; flush the destination too so that future accesses do not 958 * see stale data. 959 */ 960 sev_clflush_pages(src_p, 1); 961 sev_clflush_pages(dst_p, 1); 962 963 /* 964 * Since user buffer may not be page aligned, calculate the 965 * offset within the page. 966 */ 967 s_off = vaddr & ~PAGE_MASK; 968 d_off = dst_vaddr & ~PAGE_MASK; 969 len = min_t(size_t, (PAGE_SIZE - s_off), size); 970 971 if (dec) 972 ret = __sev_dbg_decrypt_user(kvm, 973 __sme_page_pa(src_p[0]) + s_off, 974 (void __user *)dst_vaddr, 975 __sme_page_pa(dst_p[0]) + d_off, 976 len, &argp->error); 977 else 978 ret = __sev_dbg_encrypt_user(kvm, 979 __sme_page_pa(src_p[0]) + s_off, 980 (void __user *)vaddr, 981 __sme_page_pa(dst_p[0]) + d_off, 982 (void __user *)dst_vaddr, 983 len, &argp->error); 984 985 sev_unpin_memory(kvm, src_p, n); 986 sev_unpin_memory(kvm, dst_p, n); 987 988 if (ret) 989 goto err; 990 991 next_vaddr = vaddr + len; 992 dst_vaddr = dst_vaddr + len; 993 size -= len; 994 } 995 err: 996 return ret; 997 } 998 999 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) 1000 { 1001 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1002 struct sev_data_launch_secret data; 1003 struct kvm_sev_launch_secret params; 1004 struct page **pages; 1005 void *blob, *hdr; 1006 unsigned long n, i; 1007 int ret, offset; 1008 1009 if (!sev_guest(kvm)) 1010 return -ENOTTY; 1011 1012 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 1013 return -EFAULT; 1014 1015 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); 1016 if (IS_ERR(pages)) 1017 return PTR_ERR(pages); 1018 1019 /* 1020 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in 1021 * place; the cache may contain the data that was written unencrypted. 1022 */ 1023 sev_clflush_pages(pages, n); 1024 1025 /* 1026 * The secret must be copied into contiguous memory region, lets verify 1027 * that userspace memory pages are contiguous before we issue command. 1028 */ 1029 if (get_num_contig_pages(0, pages, n) != n) { 1030 ret = -EINVAL; 1031 goto e_unpin_memory; 1032 } 1033 1034 memset(&data, 0, sizeof(data)); 1035 1036 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1037 data.guest_address = __sme_page_pa(pages[0]) + offset; 1038 data.guest_len = params.guest_len; 1039 1040 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1041 if (IS_ERR(blob)) { 1042 ret = PTR_ERR(blob); 1043 goto e_unpin_memory; 1044 } 1045 1046 data.trans_address = __psp_pa(blob); 1047 data.trans_len = params.trans_len; 1048 1049 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1050 if (IS_ERR(hdr)) { 1051 ret = PTR_ERR(hdr); 1052 goto e_free_blob; 1053 } 1054 data.hdr_address = __psp_pa(hdr); 1055 data.hdr_len = params.hdr_len; 1056 1057 data.handle = sev->handle; 1058 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error); 1059 1060 kfree(hdr); 1061 1062 e_free_blob: 1063 kfree(blob); 1064 e_unpin_memory: 1065 /* content of memory is updated, mark pages dirty */ 1066 for (i = 0; i < n; i++) { 1067 set_page_dirty_lock(pages[i]); 1068 mark_page_accessed(pages[i]); 1069 } 1070 sev_unpin_memory(kvm, pages, n); 1071 return ret; 1072 } 1073 1074 static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) 1075 { 1076 void __user *report = (void __user *)(uintptr_t)argp->data; 1077 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1078 struct sev_data_attestation_report data; 1079 struct kvm_sev_attestation_report params; 1080 void __user *p; 1081 void *blob = NULL; 1082 int ret; 1083 1084 if (!sev_guest(kvm)) 1085 return -ENOTTY; 1086 1087 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 1088 return -EFAULT; 1089 1090 memset(&data, 0, sizeof(data)); 1091 1092 /* User wants to query the blob length */ 1093 if (!params.len) 1094 goto cmd; 1095 1096 p = (void __user *)(uintptr_t)params.uaddr; 1097 if (p) { 1098 if (params.len > SEV_FW_BLOB_MAX_SIZE) 1099 return -EINVAL; 1100 1101 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 1102 if (!blob) 1103 return -ENOMEM; 1104 1105 data.address = __psp_pa(blob); 1106 data.len = params.len; 1107 memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce)); 1108 } 1109 cmd: 1110 data.handle = sev->handle; 1111 ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error); 1112 /* 1113 * If we query the session length, FW responded with expected data. 1114 */ 1115 if (!params.len) 1116 goto done; 1117 1118 if (ret) 1119 goto e_free_blob; 1120 1121 if (blob) { 1122 if (copy_to_user(p, blob, params.len)) 1123 ret = -EFAULT; 1124 } 1125 1126 done: 1127 params.len = data.len; 1128 if (copy_to_user(report, ¶ms, sizeof(params))) 1129 ret = -EFAULT; 1130 e_free_blob: 1131 kfree(blob); 1132 return ret; 1133 } 1134 1135 /* Userspace wants to query session length. */ 1136 static int 1137 __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, 1138 struct kvm_sev_send_start *params) 1139 { 1140 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1141 struct sev_data_send_start data; 1142 int ret; 1143 1144 memset(&data, 0, sizeof(data)); 1145 data.handle = sev->handle; 1146 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1147 1148 params->session_len = data.session_len; 1149 if (copy_to_user((void __user *)(uintptr_t)argp->data, params, 1150 sizeof(struct kvm_sev_send_start))) 1151 ret = -EFAULT; 1152 1153 return ret; 1154 } 1155 1156 static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1157 { 1158 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1159 struct sev_data_send_start data; 1160 struct kvm_sev_send_start params; 1161 void *amd_certs, *session_data; 1162 void *pdh_cert, *plat_certs; 1163 int ret; 1164 1165 if (!sev_guest(kvm)) 1166 return -ENOTTY; 1167 1168 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1169 sizeof(struct kvm_sev_send_start))) 1170 return -EFAULT; 1171 1172 /* if session_len is zero, userspace wants to query the session length */ 1173 if (!params.session_len) 1174 return __sev_send_start_query_session_length(kvm, argp, 1175 ¶ms); 1176 1177 /* some sanity checks */ 1178 if (!params.pdh_cert_uaddr || !params.pdh_cert_len || 1179 !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE) 1180 return -EINVAL; 1181 1182 /* allocate the memory to hold the session data blob */ 1183 session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); 1184 if (!session_data) 1185 return -ENOMEM; 1186 1187 /* copy the certificate blobs from userspace */ 1188 pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr, 1189 params.pdh_cert_len); 1190 if (IS_ERR(pdh_cert)) { 1191 ret = PTR_ERR(pdh_cert); 1192 goto e_free_session; 1193 } 1194 1195 plat_certs = psp_copy_user_blob(params.plat_certs_uaddr, 1196 params.plat_certs_len); 1197 if (IS_ERR(plat_certs)) { 1198 ret = PTR_ERR(plat_certs); 1199 goto e_free_pdh; 1200 } 1201 1202 amd_certs = psp_copy_user_blob(params.amd_certs_uaddr, 1203 params.amd_certs_len); 1204 if (IS_ERR(amd_certs)) { 1205 ret = PTR_ERR(amd_certs); 1206 goto e_free_plat_cert; 1207 } 1208 1209 /* populate the FW SEND_START field with system physical address */ 1210 memset(&data, 0, sizeof(data)); 1211 data.pdh_cert_address = __psp_pa(pdh_cert); 1212 data.pdh_cert_len = params.pdh_cert_len; 1213 data.plat_certs_address = __psp_pa(plat_certs); 1214 data.plat_certs_len = params.plat_certs_len; 1215 data.amd_certs_address = __psp_pa(amd_certs); 1216 data.amd_certs_len = params.amd_certs_len; 1217 data.session_address = __psp_pa(session_data); 1218 data.session_len = params.session_len; 1219 data.handle = sev->handle; 1220 1221 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1222 1223 if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr, 1224 session_data, params.session_len)) { 1225 ret = -EFAULT; 1226 goto e_free_amd_cert; 1227 } 1228 1229 params.policy = data.policy; 1230 params.session_len = data.session_len; 1231 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, 1232 sizeof(struct kvm_sev_send_start))) 1233 ret = -EFAULT; 1234 1235 e_free_amd_cert: 1236 kfree(amd_certs); 1237 e_free_plat_cert: 1238 kfree(plat_certs); 1239 e_free_pdh: 1240 kfree(pdh_cert); 1241 e_free_session: 1242 kfree(session_data); 1243 return ret; 1244 } 1245 1246 /* Userspace wants to query either header or trans length. */ 1247 static int 1248 __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, 1249 struct kvm_sev_send_update_data *params) 1250 { 1251 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1252 struct sev_data_send_update_data data; 1253 int ret; 1254 1255 memset(&data, 0, sizeof(data)); 1256 data.handle = sev->handle; 1257 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1258 1259 params->hdr_len = data.hdr_len; 1260 params->trans_len = data.trans_len; 1261 1262 if (copy_to_user((void __user *)(uintptr_t)argp->data, params, 1263 sizeof(struct kvm_sev_send_update_data))) 1264 ret = -EFAULT; 1265 1266 return ret; 1267 } 1268 1269 static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1270 { 1271 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1272 struct sev_data_send_update_data data; 1273 struct kvm_sev_send_update_data params; 1274 void *hdr, *trans_data; 1275 struct page **guest_page; 1276 unsigned long n; 1277 int ret, offset; 1278 1279 if (!sev_guest(kvm)) 1280 return -ENOTTY; 1281 1282 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1283 sizeof(struct kvm_sev_send_update_data))) 1284 return -EFAULT; 1285 1286 /* userspace wants to query either header or trans length */ 1287 if (!params.trans_len || !params.hdr_len) 1288 return __sev_send_update_data_query_lengths(kvm, argp, ¶ms); 1289 1290 if (!params.trans_uaddr || !params.guest_uaddr || 1291 !params.guest_len || !params.hdr_uaddr) 1292 return -EINVAL; 1293 1294 /* Check if we are crossing the page boundary */ 1295 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1296 if ((params.guest_len + offset > PAGE_SIZE)) 1297 return -EINVAL; 1298 1299 /* Pin guest memory */ 1300 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1301 PAGE_SIZE, &n, 0); 1302 if (IS_ERR(guest_page)) 1303 return PTR_ERR(guest_page); 1304 1305 /* allocate memory for header and transport buffer */ 1306 ret = -ENOMEM; 1307 hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); 1308 if (!hdr) 1309 goto e_unpin; 1310 1311 trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); 1312 if (!trans_data) 1313 goto e_free_hdr; 1314 1315 memset(&data, 0, sizeof(data)); 1316 data.hdr_address = __psp_pa(hdr); 1317 data.hdr_len = params.hdr_len; 1318 data.trans_address = __psp_pa(trans_data); 1319 data.trans_len = params.trans_len; 1320 1321 /* The SEND_UPDATE_DATA command requires C-bit to be always set. */ 1322 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1323 data.guest_address |= sev_me_mask; 1324 data.guest_len = params.guest_len; 1325 data.handle = sev->handle; 1326 1327 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1328 1329 if (ret) 1330 goto e_free_trans_data; 1331 1332 /* copy transport buffer to user space */ 1333 if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr, 1334 trans_data, params.trans_len)) { 1335 ret = -EFAULT; 1336 goto e_free_trans_data; 1337 } 1338 1339 /* Copy packet header to userspace. */ 1340 if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, 1341 params.hdr_len)) 1342 ret = -EFAULT; 1343 1344 e_free_trans_data: 1345 kfree(trans_data); 1346 e_free_hdr: 1347 kfree(hdr); 1348 e_unpin: 1349 sev_unpin_memory(kvm, guest_page, n); 1350 1351 return ret; 1352 } 1353 1354 static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1355 { 1356 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1357 struct sev_data_send_finish data; 1358 1359 if (!sev_guest(kvm)) 1360 return -ENOTTY; 1361 1362 data.handle = sev->handle; 1363 return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error); 1364 } 1365 1366 static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp) 1367 { 1368 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1369 struct sev_data_send_cancel data; 1370 1371 if (!sev_guest(kvm)) 1372 return -ENOTTY; 1373 1374 data.handle = sev->handle; 1375 return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error); 1376 } 1377 1378 static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1379 { 1380 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1381 struct sev_data_receive_start start; 1382 struct kvm_sev_receive_start params; 1383 int *error = &argp->error; 1384 void *session_data; 1385 void *pdh_data; 1386 int ret; 1387 1388 if (!sev_guest(kvm)) 1389 return -ENOTTY; 1390 1391 /* Get parameter from the userspace */ 1392 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1393 sizeof(struct kvm_sev_receive_start))) 1394 return -EFAULT; 1395 1396 /* some sanity checks */ 1397 if (!params.pdh_uaddr || !params.pdh_len || 1398 !params.session_uaddr || !params.session_len) 1399 return -EINVAL; 1400 1401 pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len); 1402 if (IS_ERR(pdh_data)) 1403 return PTR_ERR(pdh_data); 1404 1405 session_data = psp_copy_user_blob(params.session_uaddr, 1406 params.session_len); 1407 if (IS_ERR(session_data)) { 1408 ret = PTR_ERR(session_data); 1409 goto e_free_pdh; 1410 } 1411 1412 memset(&start, 0, sizeof(start)); 1413 start.handle = params.handle; 1414 start.policy = params.policy; 1415 start.pdh_cert_address = __psp_pa(pdh_data); 1416 start.pdh_cert_len = params.pdh_len; 1417 start.session_address = __psp_pa(session_data); 1418 start.session_len = params.session_len; 1419 1420 /* create memory encryption context */ 1421 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start, 1422 error); 1423 if (ret) 1424 goto e_free_session; 1425 1426 /* Bind ASID to this guest */ 1427 ret = sev_bind_asid(kvm, start.handle, error); 1428 if (ret) { 1429 sev_decommission(start.handle); 1430 goto e_free_session; 1431 } 1432 1433 params.handle = start.handle; 1434 if (copy_to_user((void __user *)(uintptr_t)argp->data, 1435 ¶ms, sizeof(struct kvm_sev_receive_start))) { 1436 ret = -EFAULT; 1437 sev_unbind_asid(kvm, start.handle); 1438 goto e_free_session; 1439 } 1440 1441 sev->handle = start.handle; 1442 sev->fd = argp->sev_fd; 1443 1444 e_free_session: 1445 kfree(session_data); 1446 e_free_pdh: 1447 kfree(pdh_data); 1448 1449 return ret; 1450 } 1451 1452 static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1453 { 1454 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1455 struct kvm_sev_receive_update_data params; 1456 struct sev_data_receive_update_data data; 1457 void *hdr = NULL, *trans = NULL; 1458 struct page **guest_page; 1459 unsigned long n; 1460 int ret, offset; 1461 1462 if (!sev_guest(kvm)) 1463 return -EINVAL; 1464 1465 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1466 sizeof(struct kvm_sev_receive_update_data))) 1467 return -EFAULT; 1468 1469 if (!params.hdr_uaddr || !params.hdr_len || 1470 !params.guest_uaddr || !params.guest_len || 1471 !params.trans_uaddr || !params.trans_len) 1472 return -EINVAL; 1473 1474 /* Check if we are crossing the page boundary */ 1475 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1476 if ((params.guest_len + offset > PAGE_SIZE)) 1477 return -EINVAL; 1478 1479 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1480 if (IS_ERR(hdr)) 1481 return PTR_ERR(hdr); 1482 1483 trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1484 if (IS_ERR(trans)) { 1485 ret = PTR_ERR(trans); 1486 goto e_free_hdr; 1487 } 1488 1489 memset(&data, 0, sizeof(data)); 1490 data.hdr_address = __psp_pa(hdr); 1491 data.hdr_len = params.hdr_len; 1492 data.trans_address = __psp_pa(trans); 1493 data.trans_len = params.trans_len; 1494 1495 /* Pin guest memory */ 1496 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1497 PAGE_SIZE, &n, 1); 1498 if (IS_ERR(guest_page)) { 1499 ret = PTR_ERR(guest_page); 1500 goto e_free_trans; 1501 } 1502 1503 /* 1504 * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP 1505 * encrypts the written data with the guest's key, and the cache may 1506 * contain dirty, unencrypted data. 1507 */ 1508 sev_clflush_pages(guest_page, n); 1509 1510 /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ 1511 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1512 data.guest_address |= sev_me_mask; 1513 data.guest_len = params.guest_len; 1514 data.handle = sev->handle; 1515 1516 ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data, 1517 &argp->error); 1518 1519 sev_unpin_memory(kvm, guest_page, n); 1520 1521 e_free_trans: 1522 kfree(trans); 1523 e_free_hdr: 1524 kfree(hdr); 1525 1526 return ret; 1527 } 1528 1529 static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1530 { 1531 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1532 struct sev_data_receive_finish data; 1533 1534 if (!sev_guest(kvm)) 1535 return -ENOTTY; 1536 1537 data.handle = sev->handle; 1538 return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); 1539 } 1540 1541 static bool is_cmd_allowed_from_mirror(u32 cmd_id) 1542 { 1543 /* 1544 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES 1545 * active mirror VMs. Also allow the debugging and status commands. 1546 */ 1547 if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA || 1548 cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT || 1549 cmd_id == KVM_SEV_DBG_ENCRYPT) 1550 return true; 1551 1552 return false; 1553 } 1554 1555 static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1556 { 1557 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1558 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1559 int r = -EBUSY; 1560 1561 if (dst_kvm == src_kvm) 1562 return -EINVAL; 1563 1564 /* 1565 * Bail if these VMs are already involved in a migration to avoid 1566 * deadlock between two VMs trying to migrate to/from each other. 1567 */ 1568 if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1)) 1569 return -EBUSY; 1570 1571 if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1)) 1572 goto release_dst; 1573 1574 r = -EINTR; 1575 if (mutex_lock_killable(&dst_kvm->lock)) 1576 goto release_src; 1577 if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) 1578 goto unlock_dst; 1579 return 0; 1580 1581 unlock_dst: 1582 mutex_unlock(&dst_kvm->lock); 1583 release_src: 1584 atomic_set_release(&src_sev->migration_in_progress, 0); 1585 release_dst: 1586 atomic_set_release(&dst_sev->migration_in_progress, 0); 1587 return r; 1588 } 1589 1590 static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1591 { 1592 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1593 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1594 1595 mutex_unlock(&dst_kvm->lock); 1596 mutex_unlock(&src_kvm->lock); 1597 atomic_set_release(&dst_sev->migration_in_progress, 0); 1598 atomic_set_release(&src_sev->migration_in_progress, 0); 1599 } 1600 1601 /* vCPU mutex subclasses. */ 1602 enum sev_migration_role { 1603 SEV_MIGRATION_SOURCE = 0, 1604 SEV_MIGRATION_TARGET, 1605 SEV_NR_MIGRATION_ROLES, 1606 }; 1607 1608 static int sev_lock_vcpus_for_migration(struct kvm *kvm, 1609 enum sev_migration_role role) 1610 { 1611 struct kvm_vcpu *vcpu; 1612 unsigned long i, j; 1613 1614 kvm_for_each_vcpu(i, vcpu, kvm) { 1615 if (mutex_lock_killable_nested(&vcpu->mutex, role)) 1616 goto out_unlock; 1617 1618 #ifdef CONFIG_PROVE_LOCKING 1619 if (!i) 1620 /* 1621 * Reset the role to one that avoids colliding with 1622 * the role used for the first vcpu mutex. 1623 */ 1624 role = SEV_NR_MIGRATION_ROLES; 1625 else 1626 mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); 1627 #endif 1628 } 1629 1630 return 0; 1631 1632 out_unlock: 1633 1634 kvm_for_each_vcpu(j, vcpu, kvm) { 1635 if (i == j) 1636 break; 1637 1638 #ifdef CONFIG_PROVE_LOCKING 1639 if (j) 1640 mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); 1641 #endif 1642 1643 mutex_unlock(&vcpu->mutex); 1644 } 1645 return -EINTR; 1646 } 1647 1648 static void sev_unlock_vcpus_for_migration(struct kvm *kvm) 1649 { 1650 struct kvm_vcpu *vcpu; 1651 unsigned long i; 1652 bool first = true; 1653 1654 kvm_for_each_vcpu(i, vcpu, kvm) { 1655 if (first) 1656 first = false; 1657 else 1658 mutex_acquire(&vcpu->mutex.dep_map, 1659 SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); 1660 1661 mutex_unlock(&vcpu->mutex); 1662 } 1663 } 1664 1665 static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) 1666 { 1667 struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; 1668 struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; 1669 struct kvm_vcpu *dst_vcpu, *src_vcpu; 1670 struct vcpu_svm *dst_svm, *src_svm; 1671 struct kvm_sev_info *mirror; 1672 unsigned long i; 1673 1674 dst->active = true; 1675 dst->asid = src->asid; 1676 dst->handle = src->handle; 1677 dst->pages_locked = src->pages_locked; 1678 dst->enc_context_owner = src->enc_context_owner; 1679 dst->es_active = src->es_active; 1680 1681 src->asid = 0; 1682 src->active = false; 1683 src->handle = 0; 1684 src->pages_locked = 0; 1685 src->enc_context_owner = NULL; 1686 src->es_active = false; 1687 1688 list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); 1689 1690 /* 1691 * If this VM has mirrors, "transfer" each mirror's refcount of the 1692 * source to the destination (this KVM). The caller holds a reference 1693 * to the source, so there's no danger of use-after-free. 1694 */ 1695 list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms); 1696 list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) { 1697 kvm_get_kvm(dst_kvm); 1698 kvm_put_kvm(src_kvm); 1699 mirror->enc_context_owner = dst_kvm; 1700 } 1701 1702 /* 1703 * If this VM is a mirror, remove the old mirror from the owners list 1704 * and add the new mirror to the list. 1705 */ 1706 if (is_mirroring_enc_context(dst_kvm)) { 1707 struct kvm_sev_info *owner_sev_info = 1708 &to_kvm_svm(dst->enc_context_owner)->sev_info; 1709 1710 list_del(&src->mirror_entry); 1711 list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); 1712 } 1713 1714 kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { 1715 dst_svm = to_svm(dst_vcpu); 1716 1717 sev_init_vmcb(dst_svm); 1718 1719 if (!dst->es_active) 1720 continue; 1721 1722 /* 1723 * Note, the source is not required to have the same number of 1724 * vCPUs as the destination when migrating a vanilla SEV VM. 1725 */ 1726 src_vcpu = kvm_get_vcpu(dst_kvm, i); 1727 src_svm = to_svm(src_vcpu); 1728 1729 /* 1730 * Transfer VMSA and GHCB state to the destination. Nullify and 1731 * clear source fields as appropriate, the state now belongs to 1732 * the destination. 1733 */ 1734 memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); 1735 dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; 1736 dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; 1737 dst_vcpu->arch.guest_state_protected = true; 1738 1739 memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); 1740 src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; 1741 src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; 1742 src_vcpu->arch.guest_state_protected = false; 1743 } 1744 } 1745 1746 static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) 1747 { 1748 struct kvm_vcpu *src_vcpu; 1749 unsigned long i; 1750 1751 if (!sev_es_guest(src)) 1752 return 0; 1753 1754 if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) 1755 return -EINVAL; 1756 1757 kvm_for_each_vcpu(i, src_vcpu, src) { 1758 if (!src_vcpu->arch.guest_state_protected) 1759 return -EINVAL; 1760 } 1761 1762 return 0; 1763 } 1764 1765 int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) 1766 { 1767 struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; 1768 struct kvm_sev_info *src_sev, *cg_cleanup_sev; 1769 struct file *source_kvm_file; 1770 struct kvm *source_kvm; 1771 bool charged = false; 1772 int ret; 1773 1774 source_kvm_file = fget(source_fd); 1775 if (!file_is_kvm(source_kvm_file)) { 1776 ret = -EBADF; 1777 goto out_fput; 1778 } 1779 1780 source_kvm = source_kvm_file->private_data; 1781 ret = sev_lock_two_vms(kvm, source_kvm); 1782 if (ret) 1783 goto out_fput; 1784 1785 if (sev_guest(kvm) || !sev_guest(source_kvm)) { 1786 ret = -EINVAL; 1787 goto out_unlock; 1788 } 1789 1790 src_sev = &to_kvm_svm(source_kvm)->sev_info; 1791 1792 dst_sev->misc_cg = get_current_misc_cg(); 1793 cg_cleanup_sev = dst_sev; 1794 if (dst_sev->misc_cg != src_sev->misc_cg) { 1795 ret = sev_misc_cg_try_charge(dst_sev); 1796 if (ret) 1797 goto out_dst_cgroup; 1798 charged = true; 1799 } 1800 1801 ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); 1802 if (ret) 1803 goto out_dst_cgroup; 1804 ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); 1805 if (ret) 1806 goto out_dst_vcpu; 1807 1808 ret = sev_check_source_vcpus(kvm, source_kvm); 1809 if (ret) 1810 goto out_source_vcpu; 1811 1812 sev_migrate_from(kvm, source_kvm); 1813 kvm_vm_dead(source_kvm); 1814 cg_cleanup_sev = src_sev; 1815 ret = 0; 1816 1817 out_source_vcpu: 1818 sev_unlock_vcpus_for_migration(source_kvm); 1819 out_dst_vcpu: 1820 sev_unlock_vcpus_for_migration(kvm); 1821 out_dst_cgroup: 1822 /* Operates on the source on success, on the destination on failure. */ 1823 if (charged) 1824 sev_misc_cg_uncharge(cg_cleanup_sev); 1825 put_misc_cg(cg_cleanup_sev->misc_cg); 1826 cg_cleanup_sev->misc_cg = NULL; 1827 out_unlock: 1828 sev_unlock_two_vms(kvm, source_kvm); 1829 out_fput: 1830 if (source_kvm_file) 1831 fput(source_kvm_file); 1832 return ret; 1833 } 1834 1835 int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) 1836 { 1837 struct kvm_sev_cmd sev_cmd; 1838 int r; 1839 1840 if (!sev_enabled) 1841 return -ENOTTY; 1842 1843 if (!argp) 1844 return 0; 1845 1846 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) 1847 return -EFAULT; 1848 1849 mutex_lock(&kvm->lock); 1850 1851 /* Only the enc_context_owner handles some memory enc operations. */ 1852 if (is_mirroring_enc_context(kvm) && 1853 !is_cmd_allowed_from_mirror(sev_cmd.id)) { 1854 r = -EINVAL; 1855 goto out; 1856 } 1857 1858 switch (sev_cmd.id) { 1859 case KVM_SEV_ES_INIT: 1860 if (!sev_es_enabled) { 1861 r = -ENOTTY; 1862 goto out; 1863 } 1864 fallthrough; 1865 case KVM_SEV_INIT: 1866 r = sev_guest_init(kvm, &sev_cmd); 1867 break; 1868 case KVM_SEV_LAUNCH_START: 1869 r = sev_launch_start(kvm, &sev_cmd); 1870 break; 1871 case KVM_SEV_LAUNCH_UPDATE_DATA: 1872 r = sev_launch_update_data(kvm, &sev_cmd); 1873 break; 1874 case KVM_SEV_LAUNCH_UPDATE_VMSA: 1875 r = sev_launch_update_vmsa(kvm, &sev_cmd); 1876 break; 1877 case KVM_SEV_LAUNCH_MEASURE: 1878 r = sev_launch_measure(kvm, &sev_cmd); 1879 break; 1880 case KVM_SEV_LAUNCH_FINISH: 1881 r = sev_launch_finish(kvm, &sev_cmd); 1882 break; 1883 case KVM_SEV_GUEST_STATUS: 1884 r = sev_guest_status(kvm, &sev_cmd); 1885 break; 1886 case KVM_SEV_DBG_DECRYPT: 1887 r = sev_dbg_crypt(kvm, &sev_cmd, true); 1888 break; 1889 case KVM_SEV_DBG_ENCRYPT: 1890 r = sev_dbg_crypt(kvm, &sev_cmd, false); 1891 break; 1892 case KVM_SEV_LAUNCH_SECRET: 1893 r = sev_launch_secret(kvm, &sev_cmd); 1894 break; 1895 case KVM_SEV_GET_ATTESTATION_REPORT: 1896 r = sev_get_attestation_report(kvm, &sev_cmd); 1897 break; 1898 case KVM_SEV_SEND_START: 1899 r = sev_send_start(kvm, &sev_cmd); 1900 break; 1901 case KVM_SEV_SEND_UPDATE_DATA: 1902 r = sev_send_update_data(kvm, &sev_cmd); 1903 break; 1904 case KVM_SEV_SEND_FINISH: 1905 r = sev_send_finish(kvm, &sev_cmd); 1906 break; 1907 case KVM_SEV_SEND_CANCEL: 1908 r = sev_send_cancel(kvm, &sev_cmd); 1909 break; 1910 case KVM_SEV_RECEIVE_START: 1911 r = sev_receive_start(kvm, &sev_cmd); 1912 break; 1913 case KVM_SEV_RECEIVE_UPDATE_DATA: 1914 r = sev_receive_update_data(kvm, &sev_cmd); 1915 break; 1916 case KVM_SEV_RECEIVE_FINISH: 1917 r = sev_receive_finish(kvm, &sev_cmd); 1918 break; 1919 default: 1920 r = -EINVAL; 1921 goto out; 1922 } 1923 1924 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) 1925 r = -EFAULT; 1926 1927 out: 1928 mutex_unlock(&kvm->lock); 1929 return r; 1930 } 1931 1932 int sev_mem_enc_register_region(struct kvm *kvm, 1933 struct kvm_enc_region *range) 1934 { 1935 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1936 struct enc_region *region; 1937 int ret = 0; 1938 1939 if (!sev_guest(kvm)) 1940 return -ENOTTY; 1941 1942 /* If kvm is mirroring encryption context it isn't responsible for it */ 1943 if (is_mirroring_enc_context(kvm)) 1944 return -EINVAL; 1945 1946 if (range->addr > ULONG_MAX || range->size > ULONG_MAX) 1947 return -EINVAL; 1948 1949 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); 1950 if (!region) 1951 return -ENOMEM; 1952 1953 mutex_lock(&kvm->lock); 1954 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); 1955 if (IS_ERR(region->pages)) { 1956 ret = PTR_ERR(region->pages); 1957 mutex_unlock(&kvm->lock); 1958 goto e_free; 1959 } 1960 1961 region->uaddr = range->addr; 1962 region->size = range->size; 1963 1964 list_add_tail(®ion->list, &sev->regions_list); 1965 mutex_unlock(&kvm->lock); 1966 1967 /* 1968 * The guest may change the memory encryption attribute from C=0 -> C=1 1969 * or vice versa for this memory range. Lets make sure caches are 1970 * flushed to ensure that guest data gets written into memory with 1971 * correct C-bit. 1972 */ 1973 sev_clflush_pages(region->pages, region->npages); 1974 1975 return ret; 1976 1977 e_free: 1978 kfree(region); 1979 return ret; 1980 } 1981 1982 static struct enc_region * 1983 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) 1984 { 1985 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1986 struct list_head *head = &sev->regions_list; 1987 struct enc_region *i; 1988 1989 list_for_each_entry(i, head, list) { 1990 if (i->uaddr == range->addr && 1991 i->size == range->size) 1992 return i; 1993 } 1994 1995 return NULL; 1996 } 1997 1998 static void __unregister_enc_region_locked(struct kvm *kvm, 1999 struct enc_region *region) 2000 { 2001 sev_unpin_memory(kvm, region->pages, region->npages); 2002 list_del(®ion->list); 2003 kfree(region); 2004 } 2005 2006 int sev_mem_enc_unregister_region(struct kvm *kvm, 2007 struct kvm_enc_region *range) 2008 { 2009 struct enc_region *region; 2010 int ret; 2011 2012 /* If kvm is mirroring encryption context it isn't responsible for it */ 2013 if (is_mirroring_enc_context(kvm)) 2014 return -EINVAL; 2015 2016 mutex_lock(&kvm->lock); 2017 2018 if (!sev_guest(kvm)) { 2019 ret = -ENOTTY; 2020 goto failed; 2021 } 2022 2023 region = find_enc_region(kvm, range); 2024 if (!region) { 2025 ret = -EINVAL; 2026 goto failed; 2027 } 2028 2029 /* 2030 * Ensure that all guest tagged cache entries are flushed before 2031 * releasing the pages back to the system for use. CLFLUSH will 2032 * not do this, so issue a WBINVD. 2033 */ 2034 wbinvd_on_all_cpus(); 2035 2036 __unregister_enc_region_locked(kvm, region); 2037 2038 mutex_unlock(&kvm->lock); 2039 return 0; 2040 2041 failed: 2042 mutex_unlock(&kvm->lock); 2043 return ret; 2044 } 2045 2046 int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) 2047 { 2048 struct file *source_kvm_file; 2049 struct kvm *source_kvm; 2050 struct kvm_sev_info *source_sev, *mirror_sev; 2051 int ret; 2052 2053 source_kvm_file = fget(source_fd); 2054 if (!file_is_kvm(source_kvm_file)) { 2055 ret = -EBADF; 2056 goto e_source_fput; 2057 } 2058 2059 source_kvm = source_kvm_file->private_data; 2060 ret = sev_lock_two_vms(kvm, source_kvm); 2061 if (ret) 2062 goto e_source_fput; 2063 2064 /* 2065 * Mirrors of mirrors should work, but let's not get silly. Also 2066 * disallow out-of-band SEV/SEV-ES init if the target is already an 2067 * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being 2068 * created after SEV/SEV-ES initialization, e.g. to init intercepts. 2069 */ 2070 if (sev_guest(kvm) || !sev_guest(source_kvm) || 2071 is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) { 2072 ret = -EINVAL; 2073 goto e_unlock; 2074 } 2075 2076 /* 2077 * The mirror kvm holds an enc_context_owner ref so its asid can't 2078 * disappear until we're done with it 2079 */ 2080 source_sev = &to_kvm_svm(source_kvm)->sev_info; 2081 kvm_get_kvm(source_kvm); 2082 mirror_sev = &to_kvm_svm(kvm)->sev_info; 2083 list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); 2084 2085 /* Set enc_context_owner and copy its encryption context over */ 2086 mirror_sev->enc_context_owner = source_kvm; 2087 mirror_sev->active = true; 2088 mirror_sev->asid = source_sev->asid; 2089 mirror_sev->fd = source_sev->fd; 2090 mirror_sev->es_active = source_sev->es_active; 2091 mirror_sev->handle = source_sev->handle; 2092 INIT_LIST_HEAD(&mirror_sev->regions_list); 2093 INIT_LIST_HEAD(&mirror_sev->mirror_vms); 2094 ret = 0; 2095 2096 /* 2097 * Do not copy ap_jump_table. Since the mirror does not share the same 2098 * KVM contexts as the original, and they may have different 2099 * memory-views. 2100 */ 2101 2102 e_unlock: 2103 sev_unlock_two_vms(kvm, source_kvm); 2104 e_source_fput: 2105 if (source_kvm_file) 2106 fput(source_kvm_file); 2107 return ret; 2108 } 2109 2110 void sev_vm_destroy(struct kvm *kvm) 2111 { 2112 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2113 struct list_head *head = &sev->regions_list; 2114 struct list_head *pos, *q; 2115 2116 if (!sev_guest(kvm)) 2117 return; 2118 2119 WARN_ON(!list_empty(&sev->mirror_vms)); 2120 2121 /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ 2122 if (is_mirroring_enc_context(kvm)) { 2123 struct kvm *owner_kvm = sev->enc_context_owner; 2124 2125 mutex_lock(&owner_kvm->lock); 2126 list_del(&sev->mirror_entry); 2127 mutex_unlock(&owner_kvm->lock); 2128 kvm_put_kvm(owner_kvm); 2129 return; 2130 } 2131 2132 /* 2133 * Ensure that all guest tagged cache entries are flushed before 2134 * releasing the pages back to the system for use. CLFLUSH will 2135 * not do this, so issue a WBINVD. 2136 */ 2137 wbinvd_on_all_cpus(); 2138 2139 /* 2140 * if userspace was terminated before unregistering the memory regions 2141 * then lets unpin all the registered memory. 2142 */ 2143 if (!list_empty(head)) { 2144 list_for_each_safe(pos, q, head) { 2145 __unregister_enc_region_locked(kvm, 2146 list_entry(pos, struct enc_region, list)); 2147 cond_resched(); 2148 } 2149 } 2150 2151 sev_unbind_asid(kvm, sev->handle); 2152 sev_asid_free(sev); 2153 } 2154 2155 void __init sev_set_cpu_caps(void) 2156 { 2157 if (!sev_enabled) 2158 kvm_cpu_cap_clear(X86_FEATURE_SEV); 2159 if (!sev_es_enabled) 2160 kvm_cpu_cap_clear(X86_FEATURE_SEV_ES); 2161 } 2162 2163 void __init sev_hardware_setup(void) 2164 { 2165 #ifdef CONFIG_KVM_AMD_SEV 2166 unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; 2167 bool sev_es_supported = false; 2168 bool sev_supported = false; 2169 2170 if (!sev_enabled || !npt_enabled) 2171 goto out; 2172 2173 /* 2174 * SEV must obviously be supported in hardware. Sanity check that the 2175 * CPU supports decode assists, which is mandatory for SEV guests to 2176 * support instruction emulation. 2177 */ 2178 if (!boot_cpu_has(X86_FEATURE_SEV) || 2179 WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) 2180 goto out; 2181 2182 /* Retrieve SEV CPUID information */ 2183 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); 2184 2185 /* Set encryption bit location for SEV-ES guests */ 2186 sev_enc_bit = ebx & 0x3f; 2187 2188 /* Maximum number of encrypted guests supported simultaneously */ 2189 max_sev_asid = ecx; 2190 if (!max_sev_asid) 2191 goto out; 2192 2193 /* Minimum ASID value that should be used for SEV guest */ 2194 min_sev_asid = edx; 2195 sev_me_mask = 1UL << (ebx & 0x3f); 2196 2197 /* 2198 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, 2199 * even though it's never used, so that the bitmap is indexed by the 2200 * actual ASID. 2201 */ 2202 nr_asids = max_sev_asid + 1; 2203 sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2204 if (!sev_asid_bitmap) 2205 goto out; 2206 2207 sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2208 if (!sev_reclaim_asid_bitmap) { 2209 bitmap_free(sev_asid_bitmap); 2210 sev_asid_bitmap = NULL; 2211 goto out; 2212 } 2213 2214 sev_asid_count = max_sev_asid - min_sev_asid + 1; 2215 if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) 2216 goto out; 2217 2218 pr_info("SEV supported: %u ASIDs\n", sev_asid_count); 2219 sev_supported = true; 2220 2221 /* SEV-ES support requested? */ 2222 if (!sev_es_enabled) 2223 goto out; 2224 2225 /* 2226 * SEV-ES requires MMIO caching as KVM doesn't have access to the guest 2227 * instruction stream, i.e. can't emulate in response to a #NPF and 2228 * instead relies on #NPF(RSVD) being reflected into the guest as #VC 2229 * (the guest can then do a #VMGEXIT to request MMIO emulation). 2230 */ 2231 if (!enable_mmio_caching) 2232 goto out; 2233 2234 /* Does the CPU support SEV-ES? */ 2235 if (!boot_cpu_has(X86_FEATURE_SEV_ES)) 2236 goto out; 2237 2238 /* Has the system been allocated ASIDs for SEV-ES? */ 2239 if (min_sev_asid == 1) 2240 goto out; 2241 2242 sev_es_asid_count = min_sev_asid - 1; 2243 if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) 2244 goto out; 2245 2246 pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count); 2247 sev_es_supported = true; 2248 2249 out: 2250 sev_enabled = sev_supported; 2251 sev_es_enabled = sev_es_supported; 2252 #endif 2253 } 2254 2255 void sev_hardware_unsetup(void) 2256 { 2257 if (!sev_enabled) 2258 return; 2259 2260 /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ 2261 sev_flush_asids(1, max_sev_asid); 2262 2263 bitmap_free(sev_asid_bitmap); 2264 bitmap_free(sev_reclaim_asid_bitmap); 2265 2266 misc_cg_set_capacity(MISC_CG_RES_SEV, 0); 2267 misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0); 2268 } 2269 2270 int sev_cpu_init(struct svm_cpu_data *sd) 2271 { 2272 if (!sev_enabled) 2273 return 0; 2274 2275 sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); 2276 if (!sd->sev_vmcbs) 2277 return -ENOMEM; 2278 2279 return 0; 2280 } 2281 2282 /* 2283 * Pages used by hardware to hold guest encrypted state must be flushed before 2284 * returning them to the system. 2285 */ 2286 static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) 2287 { 2288 int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; 2289 2290 /* 2291 * Note! The address must be a kernel address, as regular page walk 2292 * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user 2293 * address is non-deterministic and unsafe. This function deliberately 2294 * takes a pointer to deter passing in a user address. 2295 */ 2296 unsigned long addr = (unsigned long)va; 2297 2298 /* 2299 * If CPU enforced cache coherency for encrypted mappings of the 2300 * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache 2301 * flush is still needed in order to work properly with DMA devices. 2302 */ 2303 if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { 2304 clflush_cache_range(va, PAGE_SIZE); 2305 return; 2306 } 2307 2308 /* 2309 * VM Page Flush takes a host virtual address and a guest ASID. Fall 2310 * back to WBINVD if this faults so as not to make any problems worse 2311 * by leaving stale encrypted data in the cache. 2312 */ 2313 if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) 2314 goto do_wbinvd; 2315 2316 return; 2317 2318 do_wbinvd: 2319 wbinvd_on_all_cpus(); 2320 } 2321 2322 void sev_guest_memory_reclaimed(struct kvm *kvm) 2323 { 2324 if (!sev_guest(kvm)) 2325 return; 2326 2327 wbinvd_on_all_cpus(); 2328 } 2329 2330 void sev_free_vcpu(struct kvm_vcpu *vcpu) 2331 { 2332 struct vcpu_svm *svm; 2333 2334 if (!sev_es_guest(vcpu->kvm)) 2335 return; 2336 2337 svm = to_svm(vcpu); 2338 2339 if (vcpu->arch.guest_state_protected) 2340 sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); 2341 2342 __free_page(virt_to_page(svm->sev_es.vmsa)); 2343 2344 if (svm->sev_es.ghcb_sa_free) 2345 kvfree(svm->sev_es.ghcb_sa); 2346 } 2347 2348 static void dump_ghcb(struct vcpu_svm *svm) 2349 { 2350 struct ghcb *ghcb = svm->sev_es.ghcb; 2351 unsigned int nbits; 2352 2353 /* Re-use the dump_invalid_vmcb module parameter */ 2354 if (!dump_invalid_vmcb) { 2355 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); 2356 return; 2357 } 2358 2359 nbits = sizeof(ghcb->save.valid_bitmap) * 8; 2360 2361 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); 2362 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", 2363 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); 2364 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", 2365 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); 2366 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", 2367 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); 2368 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", 2369 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); 2370 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); 2371 } 2372 2373 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) 2374 { 2375 struct kvm_vcpu *vcpu = &svm->vcpu; 2376 struct ghcb *ghcb = svm->sev_es.ghcb; 2377 2378 /* 2379 * The GHCB protocol so far allows for the following data 2380 * to be returned: 2381 * GPRs RAX, RBX, RCX, RDX 2382 * 2383 * Copy their values, even if they may not have been written during the 2384 * VM-Exit. It's the guest's responsibility to not consume random data. 2385 */ 2386 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); 2387 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); 2388 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); 2389 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); 2390 } 2391 2392 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) 2393 { 2394 struct vmcb_control_area *control = &svm->vmcb->control; 2395 struct kvm_vcpu *vcpu = &svm->vcpu; 2396 struct ghcb *ghcb = svm->sev_es.ghcb; 2397 u64 exit_code; 2398 2399 /* 2400 * The GHCB protocol so far allows for the following data 2401 * to be supplied: 2402 * GPRs RAX, RBX, RCX, RDX 2403 * XCR0 2404 * CPL 2405 * 2406 * VMMCALL allows the guest to provide extra registers. KVM also 2407 * expects RSI for hypercalls, so include that, too. 2408 * 2409 * Copy their values to the appropriate location if supplied. 2410 */ 2411 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); 2412 2413 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); 2414 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); 2415 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); 2416 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); 2417 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); 2418 2419 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); 2420 2421 if (ghcb_xcr0_is_valid(ghcb)) { 2422 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); 2423 kvm_update_cpuid_runtime(vcpu); 2424 } 2425 2426 /* Copy the GHCB exit information into the VMCB fields */ 2427 exit_code = ghcb_get_sw_exit_code(ghcb); 2428 control->exit_code = lower_32_bits(exit_code); 2429 control->exit_code_hi = upper_32_bits(exit_code); 2430 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); 2431 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); 2432 2433 /* Clear the valid entries fields */ 2434 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 2435 } 2436 2437 static int sev_es_validate_vmgexit(struct vcpu_svm *svm) 2438 { 2439 struct kvm_vcpu *vcpu; 2440 struct ghcb *ghcb; 2441 u64 exit_code; 2442 u64 reason; 2443 2444 ghcb = svm->sev_es.ghcb; 2445 2446 /* 2447 * Retrieve the exit code now even though it may not be marked valid 2448 * as it could help with debugging. 2449 */ 2450 exit_code = ghcb_get_sw_exit_code(ghcb); 2451 2452 /* Only GHCB Usage code 0 is supported */ 2453 if (ghcb->ghcb_usage) { 2454 reason = GHCB_ERR_INVALID_USAGE; 2455 goto vmgexit_err; 2456 } 2457 2458 reason = GHCB_ERR_MISSING_INPUT; 2459 2460 if (!ghcb_sw_exit_code_is_valid(ghcb) || 2461 !ghcb_sw_exit_info_1_is_valid(ghcb) || 2462 !ghcb_sw_exit_info_2_is_valid(ghcb)) 2463 goto vmgexit_err; 2464 2465 switch (ghcb_get_sw_exit_code(ghcb)) { 2466 case SVM_EXIT_READ_DR7: 2467 break; 2468 case SVM_EXIT_WRITE_DR7: 2469 if (!ghcb_rax_is_valid(ghcb)) 2470 goto vmgexit_err; 2471 break; 2472 case SVM_EXIT_RDTSC: 2473 break; 2474 case SVM_EXIT_RDPMC: 2475 if (!ghcb_rcx_is_valid(ghcb)) 2476 goto vmgexit_err; 2477 break; 2478 case SVM_EXIT_CPUID: 2479 if (!ghcb_rax_is_valid(ghcb) || 2480 !ghcb_rcx_is_valid(ghcb)) 2481 goto vmgexit_err; 2482 if (ghcb_get_rax(ghcb) == 0xd) 2483 if (!ghcb_xcr0_is_valid(ghcb)) 2484 goto vmgexit_err; 2485 break; 2486 case SVM_EXIT_INVD: 2487 break; 2488 case SVM_EXIT_IOIO: 2489 if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { 2490 if (!ghcb_sw_scratch_is_valid(ghcb)) 2491 goto vmgexit_err; 2492 } else { 2493 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) 2494 if (!ghcb_rax_is_valid(ghcb)) 2495 goto vmgexit_err; 2496 } 2497 break; 2498 case SVM_EXIT_MSR: 2499 if (!ghcb_rcx_is_valid(ghcb)) 2500 goto vmgexit_err; 2501 if (ghcb_get_sw_exit_info_1(ghcb)) { 2502 if (!ghcb_rax_is_valid(ghcb) || 2503 !ghcb_rdx_is_valid(ghcb)) 2504 goto vmgexit_err; 2505 } 2506 break; 2507 case SVM_EXIT_VMMCALL: 2508 if (!ghcb_rax_is_valid(ghcb) || 2509 !ghcb_cpl_is_valid(ghcb)) 2510 goto vmgexit_err; 2511 break; 2512 case SVM_EXIT_RDTSCP: 2513 break; 2514 case SVM_EXIT_WBINVD: 2515 break; 2516 case SVM_EXIT_MONITOR: 2517 if (!ghcb_rax_is_valid(ghcb) || 2518 !ghcb_rcx_is_valid(ghcb) || 2519 !ghcb_rdx_is_valid(ghcb)) 2520 goto vmgexit_err; 2521 break; 2522 case SVM_EXIT_MWAIT: 2523 if (!ghcb_rax_is_valid(ghcb) || 2524 !ghcb_rcx_is_valid(ghcb)) 2525 goto vmgexit_err; 2526 break; 2527 case SVM_VMGEXIT_MMIO_READ: 2528 case SVM_VMGEXIT_MMIO_WRITE: 2529 if (!ghcb_sw_scratch_is_valid(ghcb)) 2530 goto vmgexit_err; 2531 break; 2532 case SVM_VMGEXIT_NMI_COMPLETE: 2533 case SVM_VMGEXIT_AP_HLT_LOOP: 2534 case SVM_VMGEXIT_AP_JUMP_TABLE: 2535 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 2536 break; 2537 default: 2538 reason = GHCB_ERR_INVALID_EVENT; 2539 goto vmgexit_err; 2540 } 2541 2542 return 0; 2543 2544 vmgexit_err: 2545 vcpu = &svm->vcpu; 2546 2547 if (reason == GHCB_ERR_INVALID_USAGE) { 2548 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", 2549 ghcb->ghcb_usage); 2550 } else if (reason == GHCB_ERR_INVALID_EVENT) { 2551 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", 2552 exit_code); 2553 } else { 2554 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", 2555 exit_code); 2556 dump_ghcb(svm); 2557 } 2558 2559 /* Clear the valid entries fields */ 2560 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 2561 2562 ghcb_set_sw_exit_info_1(ghcb, 2); 2563 ghcb_set_sw_exit_info_2(ghcb, reason); 2564 2565 /* Resume the guest to "return" the error code. */ 2566 return 1; 2567 } 2568 2569 void sev_es_unmap_ghcb(struct vcpu_svm *svm) 2570 { 2571 if (!svm->sev_es.ghcb) 2572 return; 2573 2574 if (svm->sev_es.ghcb_sa_free) { 2575 /* 2576 * The scratch area lives outside the GHCB, so there is a 2577 * buffer that, depending on the operation performed, may 2578 * need to be synced, then freed. 2579 */ 2580 if (svm->sev_es.ghcb_sa_sync) { 2581 kvm_write_guest(svm->vcpu.kvm, 2582 ghcb_get_sw_scratch(svm->sev_es.ghcb), 2583 svm->sev_es.ghcb_sa, 2584 svm->sev_es.ghcb_sa_len); 2585 svm->sev_es.ghcb_sa_sync = false; 2586 } 2587 2588 kvfree(svm->sev_es.ghcb_sa); 2589 svm->sev_es.ghcb_sa = NULL; 2590 svm->sev_es.ghcb_sa_free = false; 2591 } 2592 2593 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); 2594 2595 sev_es_sync_to_ghcb(svm); 2596 2597 kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true); 2598 svm->sev_es.ghcb = NULL; 2599 } 2600 2601 void pre_sev_run(struct vcpu_svm *svm, int cpu) 2602 { 2603 struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); 2604 int asid = sev_get_asid(svm->vcpu.kvm); 2605 2606 /* Assign the asid allocated with this SEV guest */ 2607 svm->asid = asid; 2608 2609 /* 2610 * Flush guest TLB: 2611 * 2612 * 1) when different VMCB for the same ASID is to be run on the same host CPU. 2613 * 2) or this VMCB was executed on different host CPU in previous VMRUNs. 2614 */ 2615 if (sd->sev_vmcbs[asid] == svm->vmcb && 2616 svm->vcpu.arch.last_vmentry_cpu == cpu) 2617 return; 2618 2619 sd->sev_vmcbs[asid] = svm->vmcb; 2620 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 2621 vmcb_mark_dirty(svm->vmcb, VMCB_ASID); 2622 } 2623 2624 #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) 2625 static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) 2626 { 2627 struct vmcb_control_area *control = &svm->vmcb->control; 2628 struct ghcb *ghcb = svm->sev_es.ghcb; 2629 u64 ghcb_scratch_beg, ghcb_scratch_end; 2630 u64 scratch_gpa_beg, scratch_gpa_end; 2631 void *scratch_va; 2632 2633 scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); 2634 if (!scratch_gpa_beg) { 2635 pr_err("vmgexit: scratch gpa not provided\n"); 2636 goto e_scratch; 2637 } 2638 2639 scratch_gpa_end = scratch_gpa_beg + len; 2640 if (scratch_gpa_end < scratch_gpa_beg) { 2641 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", 2642 len, scratch_gpa_beg); 2643 goto e_scratch; 2644 } 2645 2646 if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { 2647 /* Scratch area begins within GHCB */ 2648 ghcb_scratch_beg = control->ghcb_gpa + 2649 offsetof(struct ghcb, shared_buffer); 2650 ghcb_scratch_end = control->ghcb_gpa + 2651 offsetof(struct ghcb, reserved_1); 2652 2653 /* 2654 * If the scratch area begins within the GHCB, it must be 2655 * completely contained in the GHCB shared buffer area. 2656 */ 2657 if (scratch_gpa_beg < ghcb_scratch_beg || 2658 scratch_gpa_end > ghcb_scratch_end) { 2659 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", 2660 scratch_gpa_beg, scratch_gpa_end); 2661 goto e_scratch; 2662 } 2663 2664 scratch_va = (void *)svm->sev_es.ghcb; 2665 scratch_va += (scratch_gpa_beg - control->ghcb_gpa); 2666 } else { 2667 /* 2668 * The guest memory must be read into a kernel buffer, so 2669 * limit the size 2670 */ 2671 if (len > GHCB_SCRATCH_AREA_LIMIT) { 2672 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", 2673 len, GHCB_SCRATCH_AREA_LIMIT); 2674 goto e_scratch; 2675 } 2676 scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); 2677 if (!scratch_va) 2678 return -ENOMEM; 2679 2680 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { 2681 /* Unable to copy scratch area from guest */ 2682 pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); 2683 2684 kvfree(scratch_va); 2685 return -EFAULT; 2686 } 2687 2688 /* 2689 * The scratch area is outside the GHCB. The operation will 2690 * dictate whether the buffer needs to be synced before running 2691 * the vCPU next time (i.e. a read was requested so the data 2692 * must be written back to the guest memory). 2693 */ 2694 svm->sev_es.ghcb_sa_sync = sync; 2695 svm->sev_es.ghcb_sa_free = true; 2696 } 2697 2698 svm->sev_es.ghcb_sa = scratch_va; 2699 svm->sev_es.ghcb_sa_len = len; 2700 2701 return 0; 2702 2703 e_scratch: 2704 ghcb_set_sw_exit_info_1(ghcb, 2); 2705 ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); 2706 2707 return 1; 2708 } 2709 2710 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, 2711 unsigned int pos) 2712 { 2713 svm->vmcb->control.ghcb_gpa &= ~(mask << pos); 2714 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; 2715 } 2716 2717 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) 2718 { 2719 return (svm->vmcb->control.ghcb_gpa >> pos) & mask; 2720 } 2721 2722 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) 2723 { 2724 svm->vmcb->control.ghcb_gpa = value; 2725 } 2726 2727 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) 2728 { 2729 struct vmcb_control_area *control = &svm->vmcb->control; 2730 struct kvm_vcpu *vcpu = &svm->vcpu; 2731 u64 ghcb_info; 2732 int ret = 1; 2733 2734 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; 2735 2736 trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, 2737 control->ghcb_gpa); 2738 2739 switch (ghcb_info) { 2740 case GHCB_MSR_SEV_INFO_REQ: 2741 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 2742 GHCB_VERSION_MIN, 2743 sev_enc_bit)); 2744 break; 2745 case GHCB_MSR_CPUID_REQ: { 2746 u64 cpuid_fn, cpuid_reg, cpuid_value; 2747 2748 cpuid_fn = get_ghcb_msr_bits(svm, 2749 GHCB_MSR_CPUID_FUNC_MASK, 2750 GHCB_MSR_CPUID_FUNC_POS); 2751 2752 /* Initialize the registers needed by the CPUID intercept */ 2753 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; 2754 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 2755 2756 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); 2757 if (!ret) { 2758 /* Error, keep GHCB MSR value as-is */ 2759 break; 2760 } 2761 2762 cpuid_reg = get_ghcb_msr_bits(svm, 2763 GHCB_MSR_CPUID_REG_MASK, 2764 GHCB_MSR_CPUID_REG_POS); 2765 if (cpuid_reg == 0) 2766 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; 2767 else if (cpuid_reg == 1) 2768 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; 2769 else if (cpuid_reg == 2) 2770 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; 2771 else 2772 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; 2773 2774 set_ghcb_msr_bits(svm, cpuid_value, 2775 GHCB_MSR_CPUID_VALUE_MASK, 2776 GHCB_MSR_CPUID_VALUE_POS); 2777 2778 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, 2779 GHCB_MSR_INFO_MASK, 2780 GHCB_MSR_INFO_POS); 2781 break; 2782 } 2783 case GHCB_MSR_TERM_REQ: { 2784 u64 reason_set, reason_code; 2785 2786 reason_set = get_ghcb_msr_bits(svm, 2787 GHCB_MSR_TERM_REASON_SET_MASK, 2788 GHCB_MSR_TERM_REASON_SET_POS); 2789 reason_code = get_ghcb_msr_bits(svm, 2790 GHCB_MSR_TERM_REASON_MASK, 2791 GHCB_MSR_TERM_REASON_POS); 2792 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", 2793 reason_set, reason_code); 2794 2795 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 2796 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; 2797 vcpu->run->system_event.ndata = 1; 2798 vcpu->run->system_event.data[0] = control->ghcb_gpa; 2799 2800 return 0; 2801 } 2802 default: 2803 /* Error, keep GHCB MSR value as-is */ 2804 break; 2805 } 2806 2807 trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, 2808 control->ghcb_gpa, ret); 2809 2810 return ret; 2811 } 2812 2813 int sev_handle_vmgexit(struct kvm_vcpu *vcpu) 2814 { 2815 struct vcpu_svm *svm = to_svm(vcpu); 2816 struct vmcb_control_area *control = &svm->vmcb->control; 2817 u64 ghcb_gpa, exit_code; 2818 struct ghcb *ghcb; 2819 int ret; 2820 2821 /* Validate the GHCB */ 2822 ghcb_gpa = control->ghcb_gpa; 2823 if (ghcb_gpa & GHCB_MSR_INFO_MASK) 2824 return sev_handle_vmgexit_msr_protocol(svm); 2825 2826 if (!ghcb_gpa) { 2827 vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n"); 2828 2829 /* Without a GHCB, just return right back to the guest */ 2830 return 1; 2831 } 2832 2833 if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { 2834 /* Unable to map GHCB from guest */ 2835 vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", 2836 ghcb_gpa); 2837 2838 /* Without a GHCB, just return right back to the guest */ 2839 return 1; 2840 } 2841 2842 svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; 2843 ghcb = svm->sev_es.ghcb_map.hva; 2844 2845 trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb); 2846 2847 exit_code = ghcb_get_sw_exit_code(ghcb); 2848 2849 ret = sev_es_validate_vmgexit(svm); 2850 if (ret) 2851 return ret; 2852 2853 sev_es_sync_from_ghcb(svm); 2854 ghcb_set_sw_exit_info_1(ghcb, 0); 2855 ghcb_set_sw_exit_info_2(ghcb, 0); 2856 2857 switch (exit_code) { 2858 case SVM_VMGEXIT_MMIO_READ: 2859 ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); 2860 if (ret) 2861 break; 2862 2863 ret = kvm_sev_es_mmio_read(vcpu, 2864 control->exit_info_1, 2865 control->exit_info_2, 2866 svm->sev_es.ghcb_sa); 2867 break; 2868 case SVM_VMGEXIT_MMIO_WRITE: 2869 ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); 2870 if (ret) 2871 break; 2872 2873 ret = kvm_sev_es_mmio_write(vcpu, 2874 control->exit_info_1, 2875 control->exit_info_2, 2876 svm->sev_es.ghcb_sa); 2877 break; 2878 case SVM_VMGEXIT_NMI_COMPLETE: 2879 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); 2880 break; 2881 case SVM_VMGEXIT_AP_HLT_LOOP: 2882 ret = kvm_emulate_ap_reset_hold(vcpu); 2883 break; 2884 case SVM_VMGEXIT_AP_JUMP_TABLE: { 2885 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 2886 2887 switch (control->exit_info_1) { 2888 case 0: 2889 /* Set AP jump table address */ 2890 sev->ap_jump_table = control->exit_info_2; 2891 break; 2892 case 1: 2893 /* Get AP jump table address */ 2894 ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); 2895 break; 2896 default: 2897 pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", 2898 control->exit_info_1); 2899 ghcb_set_sw_exit_info_1(ghcb, 2); 2900 ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); 2901 } 2902 2903 ret = 1; 2904 break; 2905 } 2906 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 2907 vcpu_unimpl(vcpu, 2908 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", 2909 control->exit_info_1, control->exit_info_2); 2910 ret = -EINVAL; 2911 break; 2912 default: 2913 ret = svm_invoke_exit_handler(vcpu, exit_code); 2914 } 2915 2916 return ret; 2917 } 2918 2919 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) 2920 { 2921 int count; 2922 int bytes; 2923 int r; 2924 2925 if (svm->vmcb->control.exit_info_2 > INT_MAX) 2926 return -EINVAL; 2927 2928 count = svm->vmcb->control.exit_info_2; 2929 if (unlikely(check_mul_overflow(count, size, &bytes))) 2930 return -EINVAL; 2931 2932 r = setup_vmgexit_scratch(svm, in, bytes); 2933 if (r) 2934 return r; 2935 2936 return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, 2937 count, in); 2938 } 2939 2940 static void sev_es_init_vmcb(struct vcpu_svm *svm) 2941 { 2942 struct kvm_vcpu *vcpu = &svm->vcpu; 2943 2944 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; 2945 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; 2946 2947 /* 2948 * An SEV-ES guest requires a VMSA area that is a separate from the 2949 * VMCB page. Do not include the encryption mask on the VMSA physical 2950 * address since hardware will access it using the guest key. 2951 */ 2952 svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); 2953 2954 /* Can't intercept CR register access, HV can't modify CR registers */ 2955 svm_clr_intercept(svm, INTERCEPT_CR0_READ); 2956 svm_clr_intercept(svm, INTERCEPT_CR4_READ); 2957 svm_clr_intercept(svm, INTERCEPT_CR8_READ); 2958 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); 2959 svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); 2960 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); 2961 2962 svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); 2963 2964 /* Track EFER/CR register changes */ 2965 svm_set_intercept(svm, TRAP_EFER_WRITE); 2966 svm_set_intercept(svm, TRAP_CR0_WRITE); 2967 svm_set_intercept(svm, TRAP_CR4_WRITE); 2968 svm_set_intercept(svm, TRAP_CR8_WRITE); 2969 2970 /* No support for enable_vmware_backdoor */ 2971 clr_exception_intercept(svm, GP_VECTOR); 2972 2973 /* Can't intercept XSETBV, HV can't modify XCR0 directly */ 2974 svm_clr_intercept(svm, INTERCEPT_XSETBV); 2975 2976 /* Clear intercepts on selected MSRs */ 2977 set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); 2978 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); 2979 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 2980 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 2981 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 2982 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 2983 2984 if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && 2985 (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || 2986 guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) { 2987 set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); 2988 if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) 2989 svm_clr_intercept(svm, INTERCEPT_RDTSCP); 2990 } 2991 } 2992 2993 void sev_init_vmcb(struct vcpu_svm *svm) 2994 { 2995 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; 2996 clr_exception_intercept(svm, UD_VECTOR); 2997 2998 if (sev_es_guest(svm->vcpu.kvm)) 2999 sev_es_init_vmcb(svm); 3000 } 3001 3002 void sev_es_vcpu_reset(struct vcpu_svm *svm) 3003 { 3004 /* 3005 * Set the GHCB MSR value as per the GHCB specification when emulating 3006 * vCPU RESET for an SEV-ES guest. 3007 */ 3008 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 3009 GHCB_VERSION_MIN, 3010 sev_enc_bit)); 3011 } 3012 3013 void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) 3014 { 3015 /* 3016 * As an SEV-ES guest, hardware will restore the host state on VMEXIT, 3017 * of which one step is to perform a VMLOAD. KVM performs the 3018 * corresponding VMSAVE in svm_prepare_guest_switch for both 3019 * traditional and SEV-ES guests. 3020 */ 3021 3022 /* XCR0 is restored on VMEXIT, save the current host value */ 3023 hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 3024 3025 /* PKRU is restored on VMEXIT, save the current host value */ 3026 hostsa->pkru = read_pkru(); 3027 3028 /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ 3029 hostsa->xss = host_xss; 3030 } 3031 3032 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) 3033 { 3034 struct vcpu_svm *svm = to_svm(vcpu); 3035 3036 /* First SIPI: Use the values as initially set by the VMM */ 3037 if (!svm->sev_es.received_first_sipi) { 3038 svm->sev_es.received_first_sipi = true; 3039 return; 3040 } 3041 3042 /* 3043 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where 3044 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a 3045 * non-zero value. 3046 */ 3047 if (!svm->sev_es.ghcb) 3048 return; 3049 3050 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); 3051 } 3052