1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 22 #include <asm/cpu_entry_area.h> 23 #include <asm/stacktrace.h> 24 #include <asm/sev.h> 25 #include <asm/insn-eval.h> 26 #include <asm/fpu/xcr.h> 27 #include <asm/processor.h> 28 #include <asm/realmode.h> 29 #include <asm/traps.h> 30 #include <asm/svm.h> 31 #include <asm/smp.h> 32 #include <asm/cpu.h> 33 34 #define DR7_RESET_VALUE 0x400 35 36 /* For early boot hypervisor communication in SEV-ES enabled guests */ 37 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 38 39 /* 40 * Needs to be in the .data section because we need it NULL before bss is 41 * cleared 42 */ 43 static struct ghcb __initdata *boot_ghcb; 44 45 /* #VC handler runtime per-CPU data */ 46 struct sev_es_runtime_data { 47 struct ghcb ghcb_page; 48 49 /* 50 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 51 * It is needed when an NMI happens while the #VC handler uses the real 52 * GHCB, and the NMI handler itself is causing another #VC exception. In 53 * that case the GHCB content of the first handler needs to be backed up 54 * and restored. 55 */ 56 struct ghcb backup_ghcb; 57 58 /* 59 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 60 * There is no need for it to be atomic, because nothing is written to 61 * the GHCB between the read and the write of ghcb_active. So it is safe 62 * to use it when a nested #VC exception happens before the write. 63 * 64 * This is necessary for example in the #VC->NMI->#VC case when the NMI 65 * happens while the first #VC handler uses the GHCB. When the NMI code 66 * raises a second #VC handler it might overwrite the contents of the 67 * GHCB written by the first handler. To avoid this the content of the 68 * GHCB is saved and restored when the GHCB is detected to be in use 69 * already. 70 */ 71 bool ghcb_active; 72 bool backup_ghcb_active; 73 74 /* 75 * Cached DR7 value - write it on DR7 writes and return it on reads. 76 * That value will never make it to the real hardware DR7 as debugging 77 * is currently unsupported in SEV-ES guests. 78 */ 79 unsigned long dr7; 80 }; 81 82 struct ghcb_state { 83 struct ghcb *ghcb; 84 }; 85 86 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 87 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); 88 89 /* Needed in vc_early_forward_exception */ 90 void do_early_exception(struct pt_regs *regs, int trapnr); 91 92 static __always_inline bool on_vc_stack(struct pt_regs *regs) 93 { 94 unsigned long sp = regs->sp; 95 96 /* User-mode RSP is not trusted */ 97 if (user_mode(regs)) 98 return false; 99 100 /* SYSCALL gap still has user-mode RSP */ 101 if (ip_within_syscall_gap(regs)) 102 return false; 103 104 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 105 } 106 107 /* 108 * This function handles the case when an NMI is raised in the #VC 109 * exception handler entry code, before the #VC handler has switched off 110 * its IST stack. In this case, the IST entry for #VC must be adjusted, 111 * so that any nested #VC exception will not overwrite the stack 112 * contents of the interrupted #VC handler. 113 * 114 * The IST entry is adjusted unconditionally so that it can be also be 115 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 116 * nested sev_es_ist_exit() call may adjust back the IST entry too 117 * early. 118 * 119 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 120 * on the NMI IST stack, as they are only called from NMI handling code 121 * right now. 122 */ 123 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 124 { 125 unsigned long old_ist, new_ist; 126 127 /* Read old IST entry */ 128 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 129 130 /* 131 * If NMI happened while on the #VC IST stack, set the new IST 132 * value below regs->sp, so that the interrupted stack frame is 133 * not overwritten by subsequent #VC exceptions. 134 */ 135 if (on_vc_stack(regs)) 136 new_ist = regs->sp; 137 138 /* 139 * Reserve additional 8 bytes and store old IST value so this 140 * adjustment can be unrolled in __sev_es_ist_exit(). 141 */ 142 new_ist -= sizeof(old_ist); 143 *(unsigned long *)new_ist = old_ist; 144 145 /* Set new IST entry */ 146 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 147 } 148 149 void noinstr __sev_es_ist_exit(void) 150 { 151 unsigned long ist; 152 153 /* Read IST entry */ 154 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 155 156 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 157 return; 158 159 /* Read back old IST entry and write it to the TSS */ 160 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 161 } 162 163 /* 164 * Nothing shall interrupt this code path while holding the per-CPU 165 * GHCB. The backup GHCB is only for NMIs interrupting this path. 166 * 167 * Callers must disable local interrupts around it. 168 */ 169 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 170 { 171 struct sev_es_runtime_data *data; 172 struct ghcb *ghcb; 173 174 WARN_ON(!irqs_disabled()); 175 176 data = this_cpu_read(runtime_data); 177 ghcb = &data->ghcb_page; 178 179 if (unlikely(data->ghcb_active)) { 180 /* GHCB is already in use - save its contents */ 181 182 if (unlikely(data->backup_ghcb_active)) { 183 /* 184 * Backup-GHCB is also already in use. There is no way 185 * to continue here so just kill the machine. To make 186 * panic() work, mark GHCBs inactive so that messages 187 * can be printed out. 188 */ 189 data->ghcb_active = false; 190 data->backup_ghcb_active = false; 191 192 instrumentation_begin(); 193 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 194 instrumentation_end(); 195 } 196 197 /* Mark backup_ghcb active before writing to it */ 198 data->backup_ghcb_active = true; 199 200 state->ghcb = &data->backup_ghcb; 201 202 /* Backup GHCB content */ 203 *state->ghcb = *ghcb; 204 } else { 205 state->ghcb = NULL; 206 data->ghcb_active = true; 207 } 208 209 return ghcb; 210 } 211 212 /* Needed in vc_early_forward_exception */ 213 void do_early_exception(struct pt_regs *regs, int trapnr); 214 215 static inline u64 sev_es_rd_ghcb_msr(void) 216 { 217 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 218 } 219 220 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 221 { 222 u32 low, high; 223 224 low = (u32)(val); 225 high = (u32)(val >> 32); 226 227 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 228 } 229 230 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 231 unsigned char *buffer) 232 { 233 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 234 } 235 236 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 237 { 238 char buffer[MAX_INSN_SIZE]; 239 int insn_bytes; 240 241 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 242 if (insn_bytes == 0) { 243 /* Nothing could be copied */ 244 ctxt->fi.vector = X86_TRAP_PF; 245 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 246 ctxt->fi.cr2 = ctxt->regs->ip; 247 return ES_EXCEPTION; 248 } else if (insn_bytes == -EINVAL) { 249 /* Effective RIP could not be calculated */ 250 ctxt->fi.vector = X86_TRAP_GP; 251 ctxt->fi.error_code = 0; 252 ctxt->fi.cr2 = 0; 253 return ES_EXCEPTION; 254 } 255 256 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 257 return ES_DECODE_FAILED; 258 259 if (ctxt->insn.immediate.got) 260 return ES_OK; 261 else 262 return ES_DECODE_FAILED; 263 } 264 265 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 266 { 267 char buffer[MAX_INSN_SIZE]; 268 int res, ret; 269 270 res = vc_fetch_insn_kernel(ctxt, buffer); 271 if (res) { 272 ctxt->fi.vector = X86_TRAP_PF; 273 ctxt->fi.error_code = X86_PF_INSTR; 274 ctxt->fi.cr2 = ctxt->regs->ip; 275 return ES_EXCEPTION; 276 } 277 278 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 279 if (ret < 0) 280 return ES_DECODE_FAILED; 281 else 282 return ES_OK; 283 } 284 285 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 286 { 287 if (user_mode(ctxt->regs)) 288 return __vc_decode_user_insn(ctxt); 289 else 290 return __vc_decode_kern_insn(ctxt); 291 } 292 293 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 294 char *dst, char *buf, size_t size) 295 { 296 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 297 298 /* 299 * This function uses __put_user() independent of whether kernel or user 300 * memory is accessed. This works fine because __put_user() does no 301 * sanity checks of the pointer being accessed. All that it does is 302 * to report when the access failed. 303 * 304 * Also, this function runs in atomic context, so __put_user() is not 305 * allowed to sleep. The page-fault handler detects that it is running 306 * in atomic context and will not try to take mmap_sem and handle the 307 * fault, so additional pagefault_enable()/disable() calls are not 308 * needed. 309 * 310 * The access can't be done via copy_to_user() here because 311 * vc_write_mem() must not use string instructions to access unsafe 312 * memory. The reason is that MOVS is emulated by the #VC handler by 313 * splitting the move up into a read and a write and taking a nested #VC 314 * exception on whatever of them is the MMIO access. Using string 315 * instructions here would cause infinite nesting. 316 */ 317 switch (size) { 318 case 1: { 319 u8 d1; 320 u8 __user *target = (u8 __user *)dst; 321 322 memcpy(&d1, buf, 1); 323 if (__put_user(d1, target)) 324 goto fault; 325 break; 326 } 327 case 2: { 328 u16 d2; 329 u16 __user *target = (u16 __user *)dst; 330 331 memcpy(&d2, buf, 2); 332 if (__put_user(d2, target)) 333 goto fault; 334 break; 335 } 336 case 4: { 337 u32 d4; 338 u32 __user *target = (u32 __user *)dst; 339 340 memcpy(&d4, buf, 4); 341 if (__put_user(d4, target)) 342 goto fault; 343 break; 344 } 345 case 8: { 346 u64 d8; 347 u64 __user *target = (u64 __user *)dst; 348 349 memcpy(&d8, buf, 8); 350 if (__put_user(d8, target)) 351 goto fault; 352 break; 353 } 354 default: 355 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 356 return ES_UNSUPPORTED; 357 } 358 359 return ES_OK; 360 361 fault: 362 if (user_mode(ctxt->regs)) 363 error_code |= X86_PF_USER; 364 365 ctxt->fi.vector = X86_TRAP_PF; 366 ctxt->fi.error_code = error_code; 367 ctxt->fi.cr2 = (unsigned long)dst; 368 369 return ES_EXCEPTION; 370 } 371 372 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 373 char *src, char *buf, size_t size) 374 { 375 unsigned long error_code = X86_PF_PROT; 376 377 /* 378 * This function uses __get_user() independent of whether kernel or user 379 * memory is accessed. This works fine because __get_user() does no 380 * sanity checks of the pointer being accessed. All that it does is 381 * to report when the access failed. 382 * 383 * Also, this function runs in atomic context, so __get_user() is not 384 * allowed to sleep. The page-fault handler detects that it is running 385 * in atomic context and will not try to take mmap_sem and handle the 386 * fault, so additional pagefault_enable()/disable() calls are not 387 * needed. 388 * 389 * The access can't be done via copy_from_user() here because 390 * vc_read_mem() must not use string instructions to access unsafe 391 * memory. The reason is that MOVS is emulated by the #VC handler by 392 * splitting the move up into a read and a write and taking a nested #VC 393 * exception on whatever of them is the MMIO access. Using string 394 * instructions here would cause infinite nesting. 395 */ 396 switch (size) { 397 case 1: { 398 u8 d1; 399 u8 __user *s = (u8 __user *)src; 400 401 if (__get_user(d1, s)) 402 goto fault; 403 memcpy(buf, &d1, 1); 404 break; 405 } 406 case 2: { 407 u16 d2; 408 u16 __user *s = (u16 __user *)src; 409 410 if (__get_user(d2, s)) 411 goto fault; 412 memcpy(buf, &d2, 2); 413 break; 414 } 415 case 4: { 416 u32 d4; 417 u32 __user *s = (u32 __user *)src; 418 419 if (__get_user(d4, s)) 420 goto fault; 421 memcpy(buf, &d4, 4); 422 break; 423 } 424 case 8: { 425 u64 d8; 426 u64 __user *s = (u64 __user *)src; 427 if (__get_user(d8, s)) 428 goto fault; 429 memcpy(buf, &d8, 8); 430 break; 431 } 432 default: 433 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 434 return ES_UNSUPPORTED; 435 } 436 437 return ES_OK; 438 439 fault: 440 if (user_mode(ctxt->regs)) 441 error_code |= X86_PF_USER; 442 443 ctxt->fi.vector = X86_TRAP_PF; 444 ctxt->fi.error_code = error_code; 445 ctxt->fi.cr2 = (unsigned long)src; 446 447 return ES_EXCEPTION; 448 } 449 450 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 451 unsigned long vaddr, phys_addr_t *paddr) 452 { 453 unsigned long va = (unsigned long)vaddr; 454 unsigned int level; 455 phys_addr_t pa; 456 pgd_t *pgd; 457 pte_t *pte; 458 459 pgd = __va(read_cr3_pa()); 460 pgd = &pgd[pgd_index(va)]; 461 pte = lookup_address_in_pgd(pgd, va, &level); 462 if (!pte) { 463 ctxt->fi.vector = X86_TRAP_PF; 464 ctxt->fi.cr2 = vaddr; 465 ctxt->fi.error_code = 0; 466 467 if (user_mode(ctxt->regs)) 468 ctxt->fi.error_code |= X86_PF_USER; 469 470 return ES_EXCEPTION; 471 } 472 473 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 474 /* Emulated MMIO to/from encrypted memory not supported */ 475 return ES_UNSUPPORTED; 476 477 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 478 pa |= va & ~page_level_mask(level); 479 480 *paddr = pa; 481 482 return ES_OK; 483 } 484 485 /* Include code shared with pre-decompression boot stage */ 486 #include "sev-shared.c" 487 488 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 489 { 490 struct sev_es_runtime_data *data; 491 struct ghcb *ghcb; 492 493 WARN_ON(!irqs_disabled()); 494 495 data = this_cpu_read(runtime_data); 496 ghcb = &data->ghcb_page; 497 498 if (state->ghcb) { 499 /* Restore GHCB from Backup */ 500 *ghcb = *state->ghcb; 501 data->backup_ghcb_active = false; 502 state->ghcb = NULL; 503 } else { 504 /* 505 * Invalidate the GHCB so a VMGEXIT instruction issued 506 * from userspace won't appear to be valid. 507 */ 508 vc_ghcb_invalidate(ghcb); 509 data->ghcb_active = false; 510 } 511 } 512 513 void noinstr __sev_es_nmi_complete(void) 514 { 515 struct ghcb_state state; 516 struct ghcb *ghcb; 517 518 ghcb = __sev_get_ghcb(&state); 519 520 vc_ghcb_invalidate(ghcb); 521 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 522 ghcb_set_sw_exit_info_1(ghcb, 0); 523 ghcb_set_sw_exit_info_2(ghcb, 0); 524 525 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 526 VMGEXIT(); 527 528 __sev_put_ghcb(&state); 529 } 530 531 static u64 get_jump_table_addr(void) 532 { 533 struct ghcb_state state; 534 unsigned long flags; 535 struct ghcb *ghcb; 536 u64 ret = 0; 537 538 local_irq_save(flags); 539 540 ghcb = __sev_get_ghcb(&state); 541 542 vc_ghcb_invalidate(ghcb); 543 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 544 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 545 ghcb_set_sw_exit_info_2(ghcb, 0); 546 547 sev_es_wr_ghcb_msr(__pa(ghcb)); 548 VMGEXIT(); 549 550 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 551 ghcb_sw_exit_info_2_is_valid(ghcb)) 552 ret = ghcb->save.sw_exit_info_2; 553 554 __sev_put_ghcb(&state); 555 556 local_irq_restore(flags); 557 558 return ret; 559 } 560 561 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 562 { 563 u16 startup_cs, startup_ip; 564 phys_addr_t jump_table_pa; 565 u64 jump_table_addr; 566 u16 __iomem *jump_table; 567 568 jump_table_addr = get_jump_table_addr(); 569 570 /* On UP guests there is no jump table so this is not a failure */ 571 if (!jump_table_addr) 572 return 0; 573 574 /* Check if AP Jump Table is page-aligned */ 575 if (jump_table_addr & ~PAGE_MASK) 576 return -EINVAL; 577 578 jump_table_pa = jump_table_addr & PAGE_MASK; 579 580 startup_cs = (u16)(rmh->trampoline_start >> 4); 581 startup_ip = (u16)(rmh->sev_es_trampoline_start - 582 rmh->trampoline_start); 583 584 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 585 if (!jump_table) 586 return -EIO; 587 588 writew(startup_ip, &jump_table[0]); 589 writew(startup_cs, &jump_table[1]); 590 591 iounmap(jump_table); 592 593 return 0; 594 } 595 596 /* 597 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 598 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 599 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 600 */ 601 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 602 { 603 struct sev_es_runtime_data *data; 604 unsigned long address, pflags; 605 int cpu; 606 u64 pfn; 607 608 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 609 return 0; 610 611 pflags = _PAGE_NX | _PAGE_RW; 612 613 for_each_possible_cpu(cpu) { 614 data = per_cpu(runtime_data, cpu); 615 616 address = __pa(&data->ghcb_page); 617 pfn = address >> PAGE_SHIFT; 618 619 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 620 return 1; 621 } 622 623 return 0; 624 } 625 626 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 627 { 628 struct pt_regs *regs = ctxt->regs; 629 enum es_result ret; 630 u64 exit_info_1; 631 632 /* Is it a WRMSR? */ 633 exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; 634 635 ghcb_set_rcx(ghcb, regs->cx); 636 if (exit_info_1) { 637 ghcb_set_rax(ghcb, regs->ax); 638 ghcb_set_rdx(ghcb, regs->dx); 639 } 640 641 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR, 642 exit_info_1, 0); 643 644 if ((ret == ES_OK) && (!exit_info_1)) { 645 regs->ax = ghcb->save.rax; 646 regs->dx = ghcb->save.rdx; 647 } 648 649 return ret; 650 } 651 652 /* 653 * This function runs on the first #VC exception after the kernel 654 * switched to virtual addresses. 655 */ 656 static bool __init sev_es_setup_ghcb(void) 657 { 658 /* First make sure the hypervisor talks a supported protocol. */ 659 if (!sev_es_negotiate_protocol()) 660 return false; 661 662 /* 663 * Clear the boot_ghcb. The first exception comes in before the bss 664 * section is cleared. 665 */ 666 memset(&boot_ghcb_page, 0, PAGE_SIZE); 667 668 /* Alright - Make the boot-ghcb public */ 669 boot_ghcb = &boot_ghcb_page; 670 671 return true; 672 } 673 674 #ifdef CONFIG_HOTPLUG_CPU 675 static void sev_es_ap_hlt_loop(void) 676 { 677 struct ghcb_state state; 678 struct ghcb *ghcb; 679 680 ghcb = __sev_get_ghcb(&state); 681 682 while (true) { 683 vc_ghcb_invalidate(ghcb); 684 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 685 ghcb_set_sw_exit_info_1(ghcb, 0); 686 ghcb_set_sw_exit_info_2(ghcb, 0); 687 688 sev_es_wr_ghcb_msr(__pa(ghcb)); 689 VMGEXIT(); 690 691 /* Wakeup signal? */ 692 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 693 ghcb->save.sw_exit_info_2) 694 break; 695 } 696 697 __sev_put_ghcb(&state); 698 } 699 700 /* 701 * Play_dead handler when running under SEV-ES. This is needed because 702 * the hypervisor can't deliver an SIPI request to restart the AP. 703 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 704 * hypervisor wakes it up again. 705 */ 706 static void sev_es_play_dead(void) 707 { 708 play_dead_common(); 709 710 /* IRQs now disabled */ 711 712 sev_es_ap_hlt_loop(); 713 714 /* 715 * If we get here, the VCPU was woken up again. Jump to CPU 716 * startup code to get it back online. 717 */ 718 start_cpu0(); 719 } 720 #else /* CONFIG_HOTPLUG_CPU */ 721 #define sev_es_play_dead native_play_dead 722 #endif /* CONFIG_HOTPLUG_CPU */ 723 724 #ifdef CONFIG_SMP 725 static void __init sev_es_setup_play_dead(void) 726 { 727 smp_ops.play_dead = sev_es_play_dead; 728 } 729 #else 730 static inline void sev_es_setup_play_dead(void) { } 731 #endif 732 733 static void __init alloc_runtime_data(int cpu) 734 { 735 struct sev_es_runtime_data *data; 736 737 data = memblock_alloc(sizeof(*data), PAGE_SIZE); 738 if (!data) 739 panic("Can't allocate SEV-ES runtime data"); 740 741 per_cpu(runtime_data, cpu) = data; 742 } 743 744 static void __init init_ghcb(int cpu) 745 { 746 struct sev_es_runtime_data *data; 747 int err; 748 749 data = per_cpu(runtime_data, cpu); 750 751 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 752 sizeof(data->ghcb_page)); 753 if (err) 754 panic("Can't map GHCBs unencrypted"); 755 756 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 757 758 data->ghcb_active = false; 759 data->backup_ghcb_active = false; 760 } 761 762 void __init sev_es_init_vc_handling(void) 763 { 764 int cpu; 765 766 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 767 768 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 769 return; 770 771 if (!sev_es_check_cpu_features()) 772 panic("SEV-ES CPU Features missing"); 773 774 /* Enable SEV-ES special handling */ 775 static_branch_enable(&sev_es_enable_key); 776 777 /* Initialize per-cpu GHCB pages */ 778 for_each_possible_cpu(cpu) { 779 alloc_runtime_data(cpu); 780 init_ghcb(cpu); 781 } 782 783 sev_es_setup_play_dead(); 784 785 /* Secondary CPUs use the runtime #VC handler */ 786 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 787 } 788 789 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 790 { 791 int trapnr = ctxt->fi.vector; 792 793 if (trapnr == X86_TRAP_PF) 794 native_write_cr2(ctxt->fi.cr2); 795 796 ctxt->regs->orig_ax = ctxt->fi.error_code; 797 do_early_exception(ctxt->regs, trapnr); 798 } 799 800 static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) 801 { 802 long *reg_array; 803 int offset; 804 805 reg_array = (long *)ctxt->regs; 806 offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); 807 808 if (offset < 0) 809 return NULL; 810 811 offset /= sizeof(long); 812 813 return reg_array + offset; 814 } 815 816 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 817 { 818 long *reg_array; 819 int offset; 820 821 reg_array = (long *)ctxt->regs; 822 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 823 824 if (offset < 0) 825 return NULL; 826 827 offset /= sizeof(long); 828 829 return reg_array + offset; 830 } 831 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 832 unsigned int bytes, bool read) 833 { 834 u64 exit_code, exit_info_1, exit_info_2; 835 unsigned long ghcb_pa = __pa(ghcb); 836 enum es_result res; 837 phys_addr_t paddr; 838 void __user *ref; 839 840 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 841 if (ref == (void __user *)-1L) 842 return ES_UNSUPPORTED; 843 844 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 845 846 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 847 if (res != ES_OK) { 848 if (res == ES_EXCEPTION && !read) 849 ctxt->fi.error_code |= X86_PF_WRITE; 850 851 return res; 852 } 853 854 exit_info_1 = paddr; 855 /* Can never be greater than 8 */ 856 exit_info_2 = bytes; 857 858 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 859 860 return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2); 861 } 862 863 static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, 864 struct es_em_ctxt *ctxt) 865 { 866 struct insn *insn = &ctxt->insn; 867 unsigned int bytes = 0; 868 enum es_result ret; 869 int sign_byte; 870 long *reg_data; 871 872 switch (insn->opcode.bytes[1]) { 873 /* MMIO Read w/ zero-extension */ 874 case 0xb6: 875 bytes = 1; 876 fallthrough; 877 case 0xb7: 878 if (!bytes) 879 bytes = 2; 880 881 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 882 if (ret) 883 break; 884 885 /* Zero extend based on operand size */ 886 reg_data = vc_insn_get_reg(ctxt); 887 if (!reg_data) 888 return ES_DECODE_FAILED; 889 890 memset(reg_data, 0, insn->opnd_bytes); 891 892 memcpy(reg_data, ghcb->shared_buffer, bytes); 893 break; 894 895 /* MMIO Read w/ sign-extension */ 896 case 0xbe: 897 bytes = 1; 898 fallthrough; 899 case 0xbf: 900 if (!bytes) 901 bytes = 2; 902 903 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 904 if (ret) 905 break; 906 907 /* Sign extend based on operand size */ 908 reg_data = vc_insn_get_reg(ctxt); 909 if (!reg_data) 910 return ES_DECODE_FAILED; 911 912 if (bytes == 1) { 913 u8 *val = (u8 *)ghcb->shared_buffer; 914 915 sign_byte = (*val & 0x80) ? 0xff : 0x00; 916 } else { 917 u16 *val = (u16 *)ghcb->shared_buffer; 918 919 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 920 } 921 memset(reg_data, sign_byte, insn->opnd_bytes); 922 923 memcpy(reg_data, ghcb->shared_buffer, bytes); 924 break; 925 926 default: 927 ret = ES_UNSUPPORTED; 928 } 929 930 return ret; 931 } 932 933 /* 934 * The MOVS instruction has two memory operands, which raises the 935 * problem that it is not known whether the access to the source or the 936 * destination caused the #VC exception (and hence whether an MMIO read 937 * or write operation needs to be emulated). 938 * 939 * Instead of playing games with walking page-tables and trying to guess 940 * whether the source or destination is an MMIO range, split the move 941 * into two operations, a read and a write with only one memory operand. 942 * This will cause a nested #VC exception on the MMIO address which can 943 * then be handled. 944 * 945 * This implementation has the benefit that it also supports MOVS where 946 * source _and_ destination are MMIO regions. 947 * 948 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 949 * rare operation. If it turns out to be a performance problem the split 950 * operations can be moved to memcpy_fromio() and memcpy_toio(). 951 */ 952 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 953 unsigned int bytes) 954 { 955 unsigned long ds_base, es_base; 956 unsigned char *src, *dst; 957 unsigned char buffer[8]; 958 enum es_result ret; 959 bool rep; 960 int off; 961 962 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 963 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 964 965 if (ds_base == -1L || es_base == -1L) { 966 ctxt->fi.vector = X86_TRAP_GP; 967 ctxt->fi.error_code = 0; 968 return ES_EXCEPTION; 969 } 970 971 src = ds_base + (unsigned char *)ctxt->regs->si; 972 dst = es_base + (unsigned char *)ctxt->regs->di; 973 974 ret = vc_read_mem(ctxt, src, buffer, bytes); 975 if (ret != ES_OK) 976 return ret; 977 978 ret = vc_write_mem(ctxt, dst, buffer, bytes); 979 if (ret != ES_OK) 980 return ret; 981 982 if (ctxt->regs->flags & X86_EFLAGS_DF) 983 off = -bytes; 984 else 985 off = bytes; 986 987 ctxt->regs->si += off; 988 ctxt->regs->di += off; 989 990 rep = insn_has_rep_prefix(&ctxt->insn); 991 if (rep) 992 ctxt->regs->cx -= 1; 993 994 if (!rep || ctxt->regs->cx == 0) 995 return ES_OK; 996 else 997 return ES_RETRY; 998 } 999 1000 static enum es_result vc_handle_mmio(struct ghcb *ghcb, 1001 struct es_em_ctxt *ctxt) 1002 { 1003 struct insn *insn = &ctxt->insn; 1004 unsigned int bytes = 0; 1005 enum es_result ret; 1006 long *reg_data; 1007 1008 switch (insn->opcode.bytes[0]) { 1009 /* MMIO Write */ 1010 case 0x88: 1011 bytes = 1; 1012 fallthrough; 1013 case 0x89: 1014 if (!bytes) 1015 bytes = insn->opnd_bytes; 1016 1017 reg_data = vc_insn_get_reg(ctxt); 1018 if (!reg_data) 1019 return ES_DECODE_FAILED; 1020 1021 memcpy(ghcb->shared_buffer, reg_data, bytes); 1022 1023 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1024 break; 1025 1026 case 0xc6: 1027 bytes = 1; 1028 fallthrough; 1029 case 0xc7: 1030 if (!bytes) 1031 bytes = insn->opnd_bytes; 1032 1033 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1034 1035 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1036 break; 1037 1038 /* MMIO Read */ 1039 case 0x8a: 1040 bytes = 1; 1041 fallthrough; 1042 case 0x8b: 1043 if (!bytes) 1044 bytes = insn->opnd_bytes; 1045 1046 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1047 if (ret) 1048 break; 1049 1050 reg_data = vc_insn_get_reg(ctxt); 1051 if (!reg_data) 1052 return ES_DECODE_FAILED; 1053 1054 /* Zero-extend for 32-bit operation */ 1055 if (bytes == 4) 1056 *reg_data = 0; 1057 1058 memcpy(reg_data, ghcb->shared_buffer, bytes); 1059 break; 1060 1061 /* MOVS instruction */ 1062 case 0xa4: 1063 bytes = 1; 1064 fallthrough; 1065 case 0xa5: 1066 if (!bytes) 1067 bytes = insn->opnd_bytes; 1068 1069 ret = vc_handle_mmio_movs(ctxt, bytes); 1070 break; 1071 /* Two-Byte Opcodes */ 1072 case 0x0f: 1073 ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); 1074 break; 1075 default: 1076 ret = ES_UNSUPPORTED; 1077 } 1078 1079 return ret; 1080 } 1081 1082 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1083 struct es_em_ctxt *ctxt) 1084 { 1085 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1086 long val, *reg = vc_insn_get_rm(ctxt); 1087 enum es_result ret; 1088 1089 if (!reg) 1090 return ES_DECODE_FAILED; 1091 1092 val = *reg; 1093 1094 /* Upper 32 bits must be written as zeroes */ 1095 if (val >> 32) { 1096 ctxt->fi.vector = X86_TRAP_GP; 1097 ctxt->fi.error_code = 0; 1098 return ES_EXCEPTION; 1099 } 1100 1101 /* Clear out other reserved bits and set bit 10 */ 1102 val = (val & 0xffff23ffL) | BIT(10); 1103 1104 /* Early non-zero writes to DR7 are not supported */ 1105 if (!data && (val & ~DR7_RESET_VALUE)) 1106 return ES_UNSUPPORTED; 1107 1108 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1109 ghcb_set_rax(ghcb, val); 1110 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1111 if (ret != ES_OK) 1112 return ret; 1113 1114 if (data) 1115 data->dr7 = val; 1116 1117 return ES_OK; 1118 } 1119 1120 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1121 struct es_em_ctxt *ctxt) 1122 { 1123 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1124 long *reg = vc_insn_get_rm(ctxt); 1125 1126 if (!reg) 1127 return ES_DECODE_FAILED; 1128 1129 if (data) 1130 *reg = data->dr7; 1131 else 1132 *reg = DR7_RESET_VALUE; 1133 1134 return ES_OK; 1135 } 1136 1137 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1138 struct es_em_ctxt *ctxt) 1139 { 1140 return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0); 1141 } 1142 1143 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1144 { 1145 enum es_result ret; 1146 1147 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1148 1149 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0); 1150 if (ret != ES_OK) 1151 return ret; 1152 1153 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 1154 return ES_VMM_ERROR; 1155 1156 ctxt->regs->ax = ghcb->save.rax; 1157 ctxt->regs->dx = ghcb->save.rdx; 1158 1159 return ES_OK; 1160 } 1161 1162 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 1163 struct es_em_ctxt *ctxt) 1164 { 1165 /* 1166 * Treat it as a NOP and do not leak a physical address to the 1167 * hypervisor. 1168 */ 1169 return ES_OK; 1170 } 1171 1172 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 1173 struct es_em_ctxt *ctxt) 1174 { 1175 /* Treat the same as MONITOR/MONITORX */ 1176 return ES_OK; 1177 } 1178 1179 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 1180 struct es_em_ctxt *ctxt) 1181 { 1182 enum es_result ret; 1183 1184 ghcb_set_rax(ghcb, ctxt->regs->ax); 1185 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 1186 1187 if (x86_platform.hyper.sev_es_hcall_prepare) 1188 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 1189 1190 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0); 1191 if (ret != ES_OK) 1192 return ret; 1193 1194 if (!ghcb_rax_is_valid(ghcb)) 1195 return ES_VMM_ERROR; 1196 1197 ctxt->regs->ax = ghcb->save.rax; 1198 1199 /* 1200 * Call sev_es_hcall_finish() after regs->ax is already set. 1201 * This allows the hypervisor handler to overwrite it again if 1202 * necessary. 1203 */ 1204 if (x86_platform.hyper.sev_es_hcall_finish && 1205 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 1206 return ES_VMM_ERROR; 1207 1208 return ES_OK; 1209 } 1210 1211 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 1212 struct es_em_ctxt *ctxt) 1213 { 1214 /* 1215 * Calling ecx_alignment_check() directly does not work, because it 1216 * enables IRQs and the GHCB is active. Forward the exception and call 1217 * it later from vc_forward_exception(). 1218 */ 1219 ctxt->fi.vector = X86_TRAP_AC; 1220 ctxt->fi.error_code = 0; 1221 return ES_EXCEPTION; 1222 } 1223 1224 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 1225 struct ghcb *ghcb, 1226 unsigned long exit_code) 1227 { 1228 enum es_result result; 1229 1230 switch (exit_code) { 1231 case SVM_EXIT_READ_DR7: 1232 result = vc_handle_dr7_read(ghcb, ctxt); 1233 break; 1234 case SVM_EXIT_WRITE_DR7: 1235 result = vc_handle_dr7_write(ghcb, ctxt); 1236 break; 1237 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 1238 result = vc_handle_trap_ac(ghcb, ctxt); 1239 break; 1240 case SVM_EXIT_RDTSC: 1241 case SVM_EXIT_RDTSCP: 1242 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 1243 break; 1244 case SVM_EXIT_RDPMC: 1245 result = vc_handle_rdpmc(ghcb, ctxt); 1246 break; 1247 case SVM_EXIT_INVD: 1248 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 1249 result = ES_UNSUPPORTED; 1250 break; 1251 case SVM_EXIT_CPUID: 1252 result = vc_handle_cpuid(ghcb, ctxt); 1253 break; 1254 case SVM_EXIT_IOIO: 1255 result = vc_handle_ioio(ghcb, ctxt); 1256 break; 1257 case SVM_EXIT_MSR: 1258 result = vc_handle_msr(ghcb, ctxt); 1259 break; 1260 case SVM_EXIT_VMMCALL: 1261 result = vc_handle_vmmcall(ghcb, ctxt); 1262 break; 1263 case SVM_EXIT_WBINVD: 1264 result = vc_handle_wbinvd(ghcb, ctxt); 1265 break; 1266 case SVM_EXIT_MONITOR: 1267 result = vc_handle_monitor(ghcb, ctxt); 1268 break; 1269 case SVM_EXIT_MWAIT: 1270 result = vc_handle_mwait(ghcb, ctxt); 1271 break; 1272 case SVM_EXIT_NPF: 1273 result = vc_handle_mmio(ghcb, ctxt); 1274 break; 1275 default: 1276 /* 1277 * Unexpected #VC exception 1278 */ 1279 result = ES_UNSUPPORTED; 1280 } 1281 1282 return result; 1283 } 1284 1285 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 1286 { 1287 long error_code = ctxt->fi.error_code; 1288 int trapnr = ctxt->fi.vector; 1289 1290 ctxt->regs->orig_ax = ctxt->fi.error_code; 1291 1292 switch (trapnr) { 1293 case X86_TRAP_GP: 1294 exc_general_protection(ctxt->regs, error_code); 1295 break; 1296 case X86_TRAP_UD: 1297 exc_invalid_op(ctxt->regs); 1298 break; 1299 case X86_TRAP_PF: 1300 write_cr2(ctxt->fi.cr2); 1301 exc_page_fault(ctxt->regs, error_code); 1302 break; 1303 case X86_TRAP_AC: 1304 exc_alignment_check(ctxt->regs, error_code); 1305 break; 1306 default: 1307 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 1308 BUG(); 1309 } 1310 } 1311 1312 static __always_inline bool is_vc2_stack(unsigned long sp) 1313 { 1314 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 1315 } 1316 1317 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 1318 { 1319 unsigned long sp, prev_sp; 1320 1321 sp = (unsigned long)regs; 1322 prev_sp = regs->sp; 1323 1324 /* 1325 * If the code was already executing on the VC2 stack when the #VC 1326 * happened, let it proceed to the normal handling routine. This way the 1327 * code executing on the VC2 stack can cause #VC exceptions to get handled. 1328 */ 1329 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 1330 } 1331 1332 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 1333 { 1334 struct ghcb_state state; 1335 struct es_em_ctxt ctxt; 1336 enum es_result result; 1337 struct ghcb *ghcb; 1338 bool ret = true; 1339 1340 ghcb = __sev_get_ghcb(&state); 1341 1342 vc_ghcb_invalidate(ghcb); 1343 result = vc_init_em_ctxt(&ctxt, regs, error_code); 1344 1345 if (result == ES_OK) 1346 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 1347 1348 __sev_put_ghcb(&state); 1349 1350 /* Done - now check the result */ 1351 switch (result) { 1352 case ES_OK: 1353 vc_finish_insn(&ctxt); 1354 break; 1355 case ES_UNSUPPORTED: 1356 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 1357 error_code, regs->ip); 1358 ret = false; 1359 break; 1360 case ES_VMM_ERROR: 1361 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1362 error_code, regs->ip); 1363 ret = false; 1364 break; 1365 case ES_DECODE_FAILED: 1366 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1367 error_code, regs->ip); 1368 ret = false; 1369 break; 1370 case ES_EXCEPTION: 1371 vc_forward_exception(&ctxt); 1372 break; 1373 case ES_RETRY: 1374 /* Nothing to do */ 1375 break; 1376 default: 1377 pr_emerg("Unknown result in %s():%d\n", __func__, result); 1378 /* 1379 * Emulating the instruction which caused the #VC exception 1380 * failed - can't continue so print debug information 1381 */ 1382 BUG(); 1383 } 1384 1385 return ret; 1386 } 1387 1388 static __always_inline bool vc_is_db(unsigned long error_code) 1389 { 1390 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 1391 } 1392 1393 /* 1394 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 1395 * and will panic when an error happens. 1396 */ 1397 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 1398 { 1399 irqentry_state_t irq_state; 1400 1401 /* 1402 * With the current implementation it is always possible to switch to a 1403 * safe stack because #VC exceptions only happen at known places, like 1404 * intercepted instructions or accesses to MMIO areas/IO ports. They can 1405 * also happen with code instrumentation when the hypervisor intercepts 1406 * #DB, but the critical paths are forbidden to be instrumented, so #DB 1407 * exceptions currently also only happen in safe places. 1408 * 1409 * But keep this here in case the noinstr annotations are violated due 1410 * to bug elsewhere. 1411 */ 1412 if (unlikely(vc_from_invalid_context(regs))) { 1413 instrumentation_begin(); 1414 panic("Can't handle #VC exception from unsupported context\n"); 1415 instrumentation_end(); 1416 } 1417 1418 /* 1419 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1420 */ 1421 if (vc_is_db(error_code)) { 1422 exc_debug(regs); 1423 return; 1424 } 1425 1426 irq_state = irqentry_nmi_enter(regs); 1427 1428 instrumentation_begin(); 1429 1430 if (!vc_raw_handle_exception(regs, error_code)) { 1431 /* Show some debug info */ 1432 show_regs(regs); 1433 1434 /* Ask hypervisor to sev_es_terminate */ 1435 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1436 1437 /* If that fails and we get here - just panic */ 1438 panic("Returned from Terminate-Request to Hypervisor\n"); 1439 } 1440 1441 instrumentation_end(); 1442 irqentry_nmi_exit(regs, irq_state); 1443 } 1444 1445 /* 1446 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 1447 * and will kill the current task with SIGBUS when an error happens. 1448 */ 1449 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 1450 { 1451 /* 1452 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1453 */ 1454 if (vc_is_db(error_code)) { 1455 noist_exc_debug(regs); 1456 return; 1457 } 1458 1459 irqentry_enter_from_user_mode(regs); 1460 instrumentation_begin(); 1461 1462 if (!vc_raw_handle_exception(regs, error_code)) { 1463 /* 1464 * Do not kill the machine if user-space triggered the 1465 * exception. Send SIGBUS instead and let user-space deal with 1466 * it. 1467 */ 1468 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 1469 } 1470 1471 instrumentation_end(); 1472 irqentry_exit_to_user_mode(regs); 1473 } 1474 1475 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 1476 { 1477 unsigned long exit_code = regs->orig_ax; 1478 struct es_em_ctxt ctxt; 1479 enum es_result result; 1480 1481 /* Do initial setup or terminate the guest */ 1482 if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) 1483 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1484 1485 vc_ghcb_invalidate(boot_ghcb); 1486 1487 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 1488 if (result == ES_OK) 1489 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 1490 1491 /* Done - now check the result */ 1492 switch (result) { 1493 case ES_OK: 1494 vc_finish_insn(&ctxt); 1495 break; 1496 case ES_UNSUPPORTED: 1497 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 1498 exit_code, regs->ip); 1499 goto fail; 1500 case ES_VMM_ERROR: 1501 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1502 exit_code, regs->ip); 1503 goto fail; 1504 case ES_DECODE_FAILED: 1505 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1506 exit_code, regs->ip); 1507 goto fail; 1508 case ES_EXCEPTION: 1509 vc_early_forward_exception(&ctxt); 1510 break; 1511 case ES_RETRY: 1512 /* Nothing to do */ 1513 break; 1514 default: 1515 BUG(); 1516 } 1517 1518 return true; 1519 1520 fail: 1521 show_regs(regs); 1522 1523 while (true) 1524 halt(); 1525 } 1526