1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/mem_encrypt.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 22 #include <asm/cpu_entry_area.h> 23 #include <asm/stacktrace.h> 24 #include <asm/sev.h> 25 #include <asm/insn-eval.h> 26 #include <asm/fpu/internal.h> 27 #include <asm/processor.h> 28 #include <asm/realmode.h> 29 #include <asm/traps.h> 30 #include <asm/svm.h> 31 #include <asm/smp.h> 32 #include <asm/cpu.h> 33 34 #define DR7_RESET_VALUE 0x400 35 36 /* For early boot hypervisor communication in SEV-ES enabled guests */ 37 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 38 39 /* 40 * Needs to be in the .data section because we need it NULL before bss is 41 * cleared 42 */ 43 static struct ghcb __initdata *boot_ghcb; 44 45 /* #VC handler runtime per-CPU data */ 46 struct sev_es_runtime_data { 47 struct ghcb ghcb_page; 48 49 /* Physical storage for the per-CPU IST stack of the #VC handler */ 50 char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); 51 52 /* 53 * Physical storage for the per-CPU fall-back stack of the #VC handler. 54 * The fall-back stack is used when it is not safe to switch back to the 55 * interrupted stack in the #VC entry code. 56 */ 57 char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); 58 59 /* 60 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 61 * It is needed when an NMI happens while the #VC handler uses the real 62 * GHCB, and the NMI handler itself is causing another #VC exception. In 63 * that case the GHCB content of the first handler needs to be backed up 64 * and restored. 65 */ 66 struct ghcb backup_ghcb; 67 68 /* 69 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 70 * There is no need for it to be atomic, because nothing is written to 71 * the GHCB between the read and the write of ghcb_active. So it is safe 72 * to use it when a nested #VC exception happens before the write. 73 * 74 * This is necessary for example in the #VC->NMI->#VC case when the NMI 75 * happens while the first #VC handler uses the GHCB. When the NMI code 76 * raises a second #VC handler it might overwrite the contents of the 77 * GHCB written by the first handler. To avoid this the content of the 78 * GHCB is saved and restored when the GHCB is detected to be in use 79 * already. 80 */ 81 bool ghcb_active; 82 bool backup_ghcb_active; 83 84 /* 85 * Cached DR7 value - write it on DR7 writes and return it on reads. 86 * That value will never make it to the real hardware DR7 as debugging 87 * is currently unsupported in SEV-ES guests. 88 */ 89 unsigned long dr7; 90 }; 91 92 struct ghcb_state { 93 struct ghcb *ghcb; 94 }; 95 96 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 97 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); 98 99 /* Needed in vc_early_forward_exception */ 100 void do_early_exception(struct pt_regs *regs, int trapnr); 101 102 static void __init setup_vc_stacks(int cpu) 103 { 104 struct sev_es_runtime_data *data; 105 struct cpu_entry_area *cea; 106 unsigned long vaddr; 107 phys_addr_t pa; 108 109 data = per_cpu(runtime_data, cpu); 110 cea = get_cpu_entry_area(cpu); 111 112 /* Map #VC IST stack */ 113 vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); 114 pa = __pa(data->ist_stack); 115 cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); 116 117 /* Map VC fall-back stack */ 118 vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); 119 pa = __pa(data->fallback_stack); 120 cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); 121 } 122 123 static __always_inline bool on_vc_stack(struct pt_regs *regs) 124 { 125 unsigned long sp = regs->sp; 126 127 /* User-mode RSP is not trusted */ 128 if (user_mode(regs)) 129 return false; 130 131 /* SYSCALL gap still has user-mode RSP */ 132 if (ip_within_syscall_gap(regs)) 133 return false; 134 135 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 136 } 137 138 /* 139 * This function handles the case when an NMI is raised in the #VC 140 * exception handler entry code, before the #VC handler has switched off 141 * its IST stack. In this case, the IST entry for #VC must be adjusted, 142 * so that any nested #VC exception will not overwrite the stack 143 * contents of the interrupted #VC handler. 144 * 145 * The IST entry is adjusted unconditionally so that it can be also be 146 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 147 * nested sev_es_ist_exit() call may adjust back the IST entry too 148 * early. 149 * 150 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 151 * on the NMI IST stack, as they are only called from NMI handling code 152 * right now. 153 */ 154 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 155 { 156 unsigned long old_ist, new_ist; 157 158 /* Read old IST entry */ 159 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 160 161 /* 162 * If NMI happened while on the #VC IST stack, set the new IST 163 * value below regs->sp, so that the interrupted stack frame is 164 * not overwritten by subsequent #VC exceptions. 165 */ 166 if (on_vc_stack(regs)) 167 new_ist = regs->sp; 168 169 /* 170 * Reserve additional 8 bytes and store old IST value so this 171 * adjustment can be unrolled in __sev_es_ist_exit(). 172 */ 173 new_ist -= sizeof(old_ist); 174 *(unsigned long *)new_ist = old_ist; 175 176 /* Set new IST entry */ 177 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 178 } 179 180 void noinstr __sev_es_ist_exit(void) 181 { 182 unsigned long ist; 183 184 /* Read IST entry */ 185 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 186 187 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 188 return; 189 190 /* Read back old IST entry and write it to the TSS */ 191 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 192 } 193 194 /* 195 * Nothing shall interrupt this code path while holding the per-CPU 196 * GHCB. The backup GHCB is only for NMIs interrupting this path. 197 * 198 * Callers must disable local interrupts around it. 199 */ 200 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 201 { 202 struct sev_es_runtime_data *data; 203 struct ghcb *ghcb; 204 205 WARN_ON(!irqs_disabled()); 206 207 data = this_cpu_read(runtime_data); 208 ghcb = &data->ghcb_page; 209 210 if (unlikely(data->ghcb_active)) { 211 /* GHCB is already in use - save its contents */ 212 213 if (unlikely(data->backup_ghcb_active)) { 214 /* 215 * Backup-GHCB is also already in use. There is no way 216 * to continue here so just kill the machine. To make 217 * panic() work, mark GHCBs inactive so that messages 218 * can be printed out. 219 */ 220 data->ghcb_active = false; 221 data->backup_ghcb_active = false; 222 223 instrumentation_begin(); 224 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 225 instrumentation_end(); 226 } 227 228 /* Mark backup_ghcb active before writing to it */ 229 data->backup_ghcb_active = true; 230 231 state->ghcb = &data->backup_ghcb; 232 233 /* Backup GHCB content */ 234 *state->ghcb = *ghcb; 235 } else { 236 state->ghcb = NULL; 237 data->ghcb_active = true; 238 } 239 240 return ghcb; 241 } 242 243 /* Needed in vc_early_forward_exception */ 244 void do_early_exception(struct pt_regs *regs, int trapnr); 245 246 static inline u64 sev_es_rd_ghcb_msr(void) 247 { 248 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 249 } 250 251 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 252 { 253 u32 low, high; 254 255 low = (u32)(val); 256 high = (u32)(val >> 32); 257 258 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 259 } 260 261 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 262 unsigned char *buffer) 263 { 264 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 265 } 266 267 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 268 { 269 char buffer[MAX_INSN_SIZE]; 270 int insn_bytes; 271 272 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 273 if (insn_bytes == 0) { 274 /* Nothing could be copied */ 275 ctxt->fi.vector = X86_TRAP_PF; 276 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 277 ctxt->fi.cr2 = ctxt->regs->ip; 278 return ES_EXCEPTION; 279 } else if (insn_bytes == -EINVAL) { 280 /* Effective RIP could not be calculated */ 281 ctxt->fi.vector = X86_TRAP_GP; 282 ctxt->fi.error_code = 0; 283 ctxt->fi.cr2 = 0; 284 return ES_EXCEPTION; 285 } 286 287 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 288 return ES_DECODE_FAILED; 289 290 if (ctxt->insn.immediate.got) 291 return ES_OK; 292 else 293 return ES_DECODE_FAILED; 294 } 295 296 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 297 { 298 char buffer[MAX_INSN_SIZE]; 299 int res, ret; 300 301 res = vc_fetch_insn_kernel(ctxt, buffer); 302 if (res) { 303 ctxt->fi.vector = X86_TRAP_PF; 304 ctxt->fi.error_code = X86_PF_INSTR; 305 ctxt->fi.cr2 = ctxt->regs->ip; 306 return ES_EXCEPTION; 307 } 308 309 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 310 if (ret < 0) 311 return ES_DECODE_FAILED; 312 else 313 return ES_OK; 314 } 315 316 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 317 { 318 if (user_mode(ctxt->regs)) 319 return __vc_decode_user_insn(ctxt); 320 else 321 return __vc_decode_kern_insn(ctxt); 322 } 323 324 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 325 char *dst, char *buf, size_t size) 326 { 327 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 328 char __user *target = (char __user *)dst; 329 u64 d8; 330 u32 d4; 331 u16 d2; 332 u8 d1; 333 334 /* 335 * This function uses __put_user() independent of whether kernel or user 336 * memory is accessed. This works fine because __put_user() does no 337 * sanity checks of the pointer being accessed. All that it does is 338 * to report when the access failed. 339 * 340 * Also, this function runs in atomic context, so __put_user() is not 341 * allowed to sleep. The page-fault handler detects that it is running 342 * in atomic context and will not try to take mmap_sem and handle the 343 * fault, so additional pagefault_enable()/disable() calls are not 344 * needed. 345 * 346 * The access can't be done via copy_to_user() here because 347 * vc_write_mem() must not use string instructions to access unsafe 348 * memory. The reason is that MOVS is emulated by the #VC handler by 349 * splitting the move up into a read and a write and taking a nested #VC 350 * exception on whatever of them is the MMIO access. Using string 351 * instructions here would cause infinite nesting. 352 */ 353 switch (size) { 354 case 1: 355 memcpy(&d1, buf, 1); 356 if (__put_user(d1, target)) 357 goto fault; 358 break; 359 case 2: 360 memcpy(&d2, buf, 2); 361 if (__put_user(d2, target)) 362 goto fault; 363 break; 364 case 4: 365 memcpy(&d4, buf, 4); 366 if (__put_user(d4, target)) 367 goto fault; 368 break; 369 case 8: 370 memcpy(&d8, buf, 8); 371 if (__put_user(d8, target)) 372 goto fault; 373 break; 374 default: 375 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 376 return ES_UNSUPPORTED; 377 } 378 379 return ES_OK; 380 381 fault: 382 if (user_mode(ctxt->regs)) 383 error_code |= X86_PF_USER; 384 385 ctxt->fi.vector = X86_TRAP_PF; 386 ctxt->fi.error_code = error_code; 387 ctxt->fi.cr2 = (unsigned long)dst; 388 389 return ES_EXCEPTION; 390 } 391 392 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 393 char *src, char *buf, size_t size) 394 { 395 unsigned long error_code = X86_PF_PROT; 396 char __user *s = (char __user *)src; 397 u64 d8; 398 u32 d4; 399 u16 d2; 400 u8 d1; 401 402 /* 403 * This function uses __get_user() independent of whether kernel or user 404 * memory is accessed. This works fine because __get_user() does no 405 * sanity checks of the pointer being accessed. All that it does is 406 * to report when the access failed. 407 * 408 * Also, this function runs in atomic context, so __get_user() is not 409 * allowed to sleep. The page-fault handler detects that it is running 410 * in atomic context and will not try to take mmap_sem and handle the 411 * fault, so additional pagefault_enable()/disable() calls are not 412 * needed. 413 * 414 * The access can't be done via copy_from_user() here because 415 * vc_read_mem() must not use string instructions to access unsafe 416 * memory. The reason is that MOVS is emulated by the #VC handler by 417 * splitting the move up into a read and a write and taking a nested #VC 418 * exception on whatever of them is the MMIO access. Using string 419 * instructions here would cause infinite nesting. 420 */ 421 switch (size) { 422 case 1: 423 if (__get_user(d1, s)) 424 goto fault; 425 memcpy(buf, &d1, 1); 426 break; 427 case 2: 428 if (__get_user(d2, s)) 429 goto fault; 430 memcpy(buf, &d2, 2); 431 break; 432 case 4: 433 if (__get_user(d4, s)) 434 goto fault; 435 memcpy(buf, &d4, 4); 436 break; 437 case 8: 438 if (__get_user(d8, s)) 439 goto fault; 440 memcpy(buf, &d8, 8); 441 break; 442 default: 443 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 444 return ES_UNSUPPORTED; 445 } 446 447 return ES_OK; 448 449 fault: 450 if (user_mode(ctxt->regs)) 451 error_code |= X86_PF_USER; 452 453 ctxt->fi.vector = X86_TRAP_PF; 454 ctxt->fi.error_code = error_code; 455 ctxt->fi.cr2 = (unsigned long)src; 456 457 return ES_EXCEPTION; 458 } 459 460 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 461 unsigned long vaddr, phys_addr_t *paddr) 462 { 463 unsigned long va = (unsigned long)vaddr; 464 unsigned int level; 465 phys_addr_t pa; 466 pgd_t *pgd; 467 pte_t *pte; 468 469 pgd = __va(read_cr3_pa()); 470 pgd = &pgd[pgd_index(va)]; 471 pte = lookup_address_in_pgd(pgd, va, &level); 472 if (!pte) { 473 ctxt->fi.vector = X86_TRAP_PF; 474 ctxt->fi.cr2 = vaddr; 475 ctxt->fi.error_code = 0; 476 477 if (user_mode(ctxt->regs)) 478 ctxt->fi.error_code |= X86_PF_USER; 479 480 return ES_EXCEPTION; 481 } 482 483 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 484 /* Emulated MMIO to/from encrypted memory not supported */ 485 return ES_UNSUPPORTED; 486 487 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 488 pa |= va & ~page_level_mask(level); 489 490 *paddr = pa; 491 492 return ES_OK; 493 } 494 495 /* Include code shared with pre-decompression boot stage */ 496 #include "sev-shared.c" 497 498 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 499 { 500 struct sev_es_runtime_data *data; 501 struct ghcb *ghcb; 502 503 WARN_ON(!irqs_disabled()); 504 505 data = this_cpu_read(runtime_data); 506 ghcb = &data->ghcb_page; 507 508 if (state->ghcb) { 509 /* Restore GHCB from Backup */ 510 *ghcb = *state->ghcb; 511 data->backup_ghcb_active = false; 512 state->ghcb = NULL; 513 } else { 514 /* 515 * Invalidate the GHCB so a VMGEXIT instruction issued 516 * from userspace won't appear to be valid. 517 */ 518 vc_ghcb_invalidate(ghcb); 519 data->ghcb_active = false; 520 } 521 } 522 523 void noinstr __sev_es_nmi_complete(void) 524 { 525 struct ghcb_state state; 526 struct ghcb *ghcb; 527 528 ghcb = __sev_get_ghcb(&state); 529 530 vc_ghcb_invalidate(ghcb); 531 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 532 ghcb_set_sw_exit_info_1(ghcb, 0); 533 ghcb_set_sw_exit_info_2(ghcb, 0); 534 535 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 536 VMGEXIT(); 537 538 __sev_put_ghcb(&state); 539 } 540 541 static u64 get_jump_table_addr(void) 542 { 543 struct ghcb_state state; 544 unsigned long flags; 545 struct ghcb *ghcb; 546 u64 ret = 0; 547 548 local_irq_save(flags); 549 550 ghcb = __sev_get_ghcb(&state); 551 552 vc_ghcb_invalidate(ghcb); 553 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 554 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 555 ghcb_set_sw_exit_info_2(ghcb, 0); 556 557 sev_es_wr_ghcb_msr(__pa(ghcb)); 558 VMGEXIT(); 559 560 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 561 ghcb_sw_exit_info_2_is_valid(ghcb)) 562 ret = ghcb->save.sw_exit_info_2; 563 564 __sev_put_ghcb(&state); 565 566 local_irq_restore(flags); 567 568 return ret; 569 } 570 571 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 572 { 573 u16 startup_cs, startup_ip; 574 phys_addr_t jump_table_pa; 575 u64 jump_table_addr; 576 u16 __iomem *jump_table; 577 578 jump_table_addr = get_jump_table_addr(); 579 580 /* On UP guests there is no jump table so this is not a failure */ 581 if (!jump_table_addr) 582 return 0; 583 584 /* Check if AP Jump Table is page-aligned */ 585 if (jump_table_addr & ~PAGE_MASK) 586 return -EINVAL; 587 588 jump_table_pa = jump_table_addr & PAGE_MASK; 589 590 startup_cs = (u16)(rmh->trampoline_start >> 4); 591 startup_ip = (u16)(rmh->sev_es_trampoline_start - 592 rmh->trampoline_start); 593 594 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 595 if (!jump_table) 596 return -EIO; 597 598 writew(startup_ip, &jump_table[0]); 599 writew(startup_cs, &jump_table[1]); 600 601 iounmap(jump_table); 602 603 return 0; 604 } 605 606 /* 607 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 608 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 609 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 610 */ 611 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 612 { 613 struct sev_es_runtime_data *data; 614 unsigned long address, pflags; 615 int cpu; 616 u64 pfn; 617 618 if (!sev_es_active()) 619 return 0; 620 621 pflags = _PAGE_NX | _PAGE_RW; 622 623 for_each_possible_cpu(cpu) { 624 data = per_cpu(runtime_data, cpu); 625 626 address = __pa(&data->ghcb_page); 627 pfn = address >> PAGE_SHIFT; 628 629 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 630 return 1; 631 } 632 633 return 0; 634 } 635 636 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 637 { 638 struct pt_regs *regs = ctxt->regs; 639 enum es_result ret; 640 u64 exit_info_1; 641 642 /* Is it a WRMSR? */ 643 exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; 644 645 ghcb_set_rcx(ghcb, regs->cx); 646 if (exit_info_1) { 647 ghcb_set_rax(ghcb, regs->ax); 648 ghcb_set_rdx(ghcb, regs->dx); 649 } 650 651 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); 652 653 if ((ret == ES_OK) && (!exit_info_1)) { 654 regs->ax = ghcb->save.rax; 655 regs->dx = ghcb->save.rdx; 656 } 657 658 return ret; 659 } 660 661 /* 662 * This function runs on the first #VC exception after the kernel 663 * switched to virtual addresses. 664 */ 665 static bool __init sev_es_setup_ghcb(void) 666 { 667 /* First make sure the hypervisor talks a supported protocol. */ 668 if (!sev_es_negotiate_protocol()) 669 return false; 670 671 /* 672 * Clear the boot_ghcb. The first exception comes in before the bss 673 * section is cleared. 674 */ 675 memset(&boot_ghcb_page, 0, PAGE_SIZE); 676 677 /* Alright - Make the boot-ghcb public */ 678 boot_ghcb = &boot_ghcb_page; 679 680 return true; 681 } 682 683 #ifdef CONFIG_HOTPLUG_CPU 684 static void sev_es_ap_hlt_loop(void) 685 { 686 struct ghcb_state state; 687 struct ghcb *ghcb; 688 689 ghcb = __sev_get_ghcb(&state); 690 691 while (true) { 692 vc_ghcb_invalidate(ghcb); 693 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 694 ghcb_set_sw_exit_info_1(ghcb, 0); 695 ghcb_set_sw_exit_info_2(ghcb, 0); 696 697 sev_es_wr_ghcb_msr(__pa(ghcb)); 698 VMGEXIT(); 699 700 /* Wakeup signal? */ 701 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 702 ghcb->save.sw_exit_info_2) 703 break; 704 } 705 706 __sev_put_ghcb(&state); 707 } 708 709 /* 710 * Play_dead handler when running under SEV-ES. This is needed because 711 * the hypervisor can't deliver an SIPI request to restart the AP. 712 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 713 * hypervisor wakes it up again. 714 */ 715 static void sev_es_play_dead(void) 716 { 717 play_dead_common(); 718 719 /* IRQs now disabled */ 720 721 sev_es_ap_hlt_loop(); 722 723 /* 724 * If we get here, the VCPU was woken up again. Jump to CPU 725 * startup code to get it back online. 726 */ 727 start_cpu0(); 728 } 729 #else /* CONFIG_HOTPLUG_CPU */ 730 #define sev_es_play_dead native_play_dead 731 #endif /* CONFIG_HOTPLUG_CPU */ 732 733 #ifdef CONFIG_SMP 734 static void __init sev_es_setup_play_dead(void) 735 { 736 smp_ops.play_dead = sev_es_play_dead; 737 } 738 #else 739 static inline void sev_es_setup_play_dead(void) { } 740 #endif 741 742 static void __init alloc_runtime_data(int cpu) 743 { 744 struct sev_es_runtime_data *data; 745 746 data = memblock_alloc(sizeof(*data), PAGE_SIZE); 747 if (!data) 748 panic("Can't allocate SEV-ES runtime data"); 749 750 per_cpu(runtime_data, cpu) = data; 751 } 752 753 static void __init init_ghcb(int cpu) 754 { 755 struct sev_es_runtime_data *data; 756 int err; 757 758 data = per_cpu(runtime_data, cpu); 759 760 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 761 sizeof(data->ghcb_page)); 762 if (err) 763 panic("Can't map GHCBs unencrypted"); 764 765 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 766 767 data->ghcb_active = false; 768 data->backup_ghcb_active = false; 769 } 770 771 void __init sev_es_init_vc_handling(void) 772 { 773 int cpu; 774 775 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 776 777 if (!sev_es_active()) 778 return; 779 780 if (!sev_es_check_cpu_features()) 781 panic("SEV-ES CPU Features missing"); 782 783 /* Enable SEV-ES special handling */ 784 static_branch_enable(&sev_es_enable_key); 785 786 /* Initialize per-cpu GHCB pages */ 787 for_each_possible_cpu(cpu) { 788 alloc_runtime_data(cpu); 789 init_ghcb(cpu); 790 setup_vc_stacks(cpu); 791 } 792 793 sev_es_setup_play_dead(); 794 795 /* Secondary CPUs use the runtime #VC handler */ 796 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 797 } 798 799 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 800 { 801 int trapnr = ctxt->fi.vector; 802 803 if (trapnr == X86_TRAP_PF) 804 native_write_cr2(ctxt->fi.cr2); 805 806 ctxt->regs->orig_ax = ctxt->fi.error_code; 807 do_early_exception(ctxt->regs, trapnr); 808 } 809 810 static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) 811 { 812 long *reg_array; 813 int offset; 814 815 reg_array = (long *)ctxt->regs; 816 offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); 817 818 if (offset < 0) 819 return NULL; 820 821 offset /= sizeof(long); 822 823 return reg_array + offset; 824 } 825 826 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 827 { 828 long *reg_array; 829 int offset; 830 831 reg_array = (long *)ctxt->regs; 832 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 833 834 if (offset < 0) 835 return NULL; 836 837 offset /= sizeof(long); 838 839 return reg_array + offset; 840 } 841 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 842 unsigned int bytes, bool read) 843 { 844 u64 exit_code, exit_info_1, exit_info_2; 845 unsigned long ghcb_pa = __pa(ghcb); 846 enum es_result res; 847 phys_addr_t paddr; 848 void __user *ref; 849 850 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 851 if (ref == (void __user *)-1L) 852 return ES_UNSUPPORTED; 853 854 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 855 856 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 857 if (res != ES_OK) { 858 if (res == ES_EXCEPTION && !read) 859 ctxt->fi.error_code |= X86_PF_WRITE; 860 861 return res; 862 } 863 864 exit_info_1 = paddr; 865 /* Can never be greater than 8 */ 866 exit_info_2 = bytes; 867 868 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 869 870 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 871 } 872 873 static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, 874 struct es_em_ctxt *ctxt) 875 { 876 struct insn *insn = &ctxt->insn; 877 unsigned int bytes = 0; 878 enum es_result ret; 879 int sign_byte; 880 long *reg_data; 881 882 switch (insn->opcode.bytes[1]) { 883 /* MMIO Read w/ zero-extension */ 884 case 0xb6: 885 bytes = 1; 886 fallthrough; 887 case 0xb7: 888 if (!bytes) 889 bytes = 2; 890 891 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 892 if (ret) 893 break; 894 895 /* Zero extend based on operand size */ 896 reg_data = vc_insn_get_reg(ctxt); 897 if (!reg_data) 898 return ES_DECODE_FAILED; 899 900 memset(reg_data, 0, insn->opnd_bytes); 901 902 memcpy(reg_data, ghcb->shared_buffer, bytes); 903 break; 904 905 /* MMIO Read w/ sign-extension */ 906 case 0xbe: 907 bytes = 1; 908 fallthrough; 909 case 0xbf: 910 if (!bytes) 911 bytes = 2; 912 913 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 914 if (ret) 915 break; 916 917 /* Sign extend based on operand size */ 918 reg_data = vc_insn_get_reg(ctxt); 919 if (!reg_data) 920 return ES_DECODE_FAILED; 921 922 if (bytes == 1) { 923 u8 *val = (u8 *)ghcb->shared_buffer; 924 925 sign_byte = (*val & 0x80) ? 0xff : 0x00; 926 } else { 927 u16 *val = (u16 *)ghcb->shared_buffer; 928 929 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 930 } 931 memset(reg_data, sign_byte, insn->opnd_bytes); 932 933 memcpy(reg_data, ghcb->shared_buffer, bytes); 934 break; 935 936 default: 937 ret = ES_UNSUPPORTED; 938 } 939 940 return ret; 941 } 942 943 /* 944 * The MOVS instruction has two memory operands, which raises the 945 * problem that it is not known whether the access to the source or the 946 * destination caused the #VC exception (and hence whether an MMIO read 947 * or write operation needs to be emulated). 948 * 949 * Instead of playing games with walking page-tables and trying to guess 950 * whether the source or destination is an MMIO range, split the move 951 * into two operations, a read and a write with only one memory operand. 952 * This will cause a nested #VC exception on the MMIO address which can 953 * then be handled. 954 * 955 * This implementation has the benefit that it also supports MOVS where 956 * source _and_ destination are MMIO regions. 957 * 958 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 959 * rare operation. If it turns out to be a performance problem the split 960 * operations can be moved to memcpy_fromio() and memcpy_toio(). 961 */ 962 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 963 unsigned int bytes) 964 { 965 unsigned long ds_base, es_base; 966 unsigned char *src, *dst; 967 unsigned char buffer[8]; 968 enum es_result ret; 969 bool rep; 970 int off; 971 972 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 973 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 974 975 if (ds_base == -1L || es_base == -1L) { 976 ctxt->fi.vector = X86_TRAP_GP; 977 ctxt->fi.error_code = 0; 978 return ES_EXCEPTION; 979 } 980 981 src = ds_base + (unsigned char *)ctxt->regs->si; 982 dst = es_base + (unsigned char *)ctxt->regs->di; 983 984 ret = vc_read_mem(ctxt, src, buffer, bytes); 985 if (ret != ES_OK) 986 return ret; 987 988 ret = vc_write_mem(ctxt, dst, buffer, bytes); 989 if (ret != ES_OK) 990 return ret; 991 992 if (ctxt->regs->flags & X86_EFLAGS_DF) 993 off = -bytes; 994 else 995 off = bytes; 996 997 ctxt->regs->si += off; 998 ctxt->regs->di += off; 999 1000 rep = insn_has_rep_prefix(&ctxt->insn); 1001 if (rep) 1002 ctxt->regs->cx -= 1; 1003 1004 if (!rep || ctxt->regs->cx == 0) 1005 return ES_OK; 1006 else 1007 return ES_RETRY; 1008 } 1009 1010 static enum es_result vc_handle_mmio(struct ghcb *ghcb, 1011 struct es_em_ctxt *ctxt) 1012 { 1013 struct insn *insn = &ctxt->insn; 1014 unsigned int bytes = 0; 1015 enum es_result ret; 1016 long *reg_data; 1017 1018 switch (insn->opcode.bytes[0]) { 1019 /* MMIO Write */ 1020 case 0x88: 1021 bytes = 1; 1022 fallthrough; 1023 case 0x89: 1024 if (!bytes) 1025 bytes = insn->opnd_bytes; 1026 1027 reg_data = vc_insn_get_reg(ctxt); 1028 if (!reg_data) 1029 return ES_DECODE_FAILED; 1030 1031 memcpy(ghcb->shared_buffer, reg_data, bytes); 1032 1033 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1034 break; 1035 1036 case 0xc6: 1037 bytes = 1; 1038 fallthrough; 1039 case 0xc7: 1040 if (!bytes) 1041 bytes = insn->opnd_bytes; 1042 1043 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1044 1045 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1046 break; 1047 1048 /* MMIO Read */ 1049 case 0x8a: 1050 bytes = 1; 1051 fallthrough; 1052 case 0x8b: 1053 if (!bytes) 1054 bytes = insn->opnd_bytes; 1055 1056 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1057 if (ret) 1058 break; 1059 1060 reg_data = vc_insn_get_reg(ctxt); 1061 if (!reg_data) 1062 return ES_DECODE_FAILED; 1063 1064 /* Zero-extend for 32-bit operation */ 1065 if (bytes == 4) 1066 *reg_data = 0; 1067 1068 memcpy(reg_data, ghcb->shared_buffer, bytes); 1069 break; 1070 1071 /* MOVS instruction */ 1072 case 0xa4: 1073 bytes = 1; 1074 fallthrough; 1075 case 0xa5: 1076 if (!bytes) 1077 bytes = insn->opnd_bytes; 1078 1079 ret = vc_handle_mmio_movs(ctxt, bytes); 1080 break; 1081 /* Two-Byte Opcodes */ 1082 case 0x0f: 1083 ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); 1084 break; 1085 default: 1086 ret = ES_UNSUPPORTED; 1087 } 1088 1089 return ret; 1090 } 1091 1092 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1093 struct es_em_ctxt *ctxt) 1094 { 1095 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1096 long val, *reg = vc_insn_get_rm(ctxt); 1097 enum es_result ret; 1098 1099 if (!reg) 1100 return ES_DECODE_FAILED; 1101 1102 val = *reg; 1103 1104 /* Upper 32 bits must be written as zeroes */ 1105 if (val >> 32) { 1106 ctxt->fi.vector = X86_TRAP_GP; 1107 ctxt->fi.error_code = 0; 1108 return ES_EXCEPTION; 1109 } 1110 1111 /* Clear out other reserved bits and set bit 10 */ 1112 val = (val & 0xffff23ffL) | BIT(10); 1113 1114 /* Early non-zero writes to DR7 are not supported */ 1115 if (!data && (val & ~DR7_RESET_VALUE)) 1116 return ES_UNSUPPORTED; 1117 1118 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1119 ghcb_set_rax(ghcb, val); 1120 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1121 if (ret != ES_OK) 1122 return ret; 1123 1124 if (data) 1125 data->dr7 = val; 1126 1127 return ES_OK; 1128 } 1129 1130 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1131 struct es_em_ctxt *ctxt) 1132 { 1133 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1134 long *reg = vc_insn_get_rm(ctxt); 1135 1136 if (!reg) 1137 return ES_DECODE_FAILED; 1138 1139 if (data) 1140 *reg = data->dr7; 1141 else 1142 *reg = DR7_RESET_VALUE; 1143 1144 return ES_OK; 1145 } 1146 1147 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1148 struct es_em_ctxt *ctxt) 1149 { 1150 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1151 } 1152 1153 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1154 { 1155 enum es_result ret; 1156 1157 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1158 1159 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 1160 if (ret != ES_OK) 1161 return ret; 1162 1163 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 1164 return ES_VMM_ERROR; 1165 1166 ctxt->regs->ax = ghcb->save.rax; 1167 ctxt->regs->dx = ghcb->save.rdx; 1168 1169 return ES_OK; 1170 } 1171 1172 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 1173 struct es_em_ctxt *ctxt) 1174 { 1175 /* 1176 * Treat it as a NOP and do not leak a physical address to the 1177 * hypervisor. 1178 */ 1179 return ES_OK; 1180 } 1181 1182 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 1183 struct es_em_ctxt *ctxt) 1184 { 1185 /* Treat the same as MONITOR/MONITORX */ 1186 return ES_OK; 1187 } 1188 1189 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 1190 struct es_em_ctxt *ctxt) 1191 { 1192 enum es_result ret; 1193 1194 ghcb_set_rax(ghcb, ctxt->regs->ax); 1195 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 1196 1197 if (x86_platform.hyper.sev_es_hcall_prepare) 1198 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 1199 1200 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 1201 if (ret != ES_OK) 1202 return ret; 1203 1204 if (!ghcb_rax_is_valid(ghcb)) 1205 return ES_VMM_ERROR; 1206 1207 ctxt->regs->ax = ghcb->save.rax; 1208 1209 /* 1210 * Call sev_es_hcall_finish() after regs->ax is already set. 1211 * This allows the hypervisor handler to overwrite it again if 1212 * necessary. 1213 */ 1214 if (x86_platform.hyper.sev_es_hcall_finish && 1215 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 1216 return ES_VMM_ERROR; 1217 1218 return ES_OK; 1219 } 1220 1221 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 1222 struct es_em_ctxt *ctxt) 1223 { 1224 /* 1225 * Calling ecx_alignment_check() directly does not work, because it 1226 * enables IRQs and the GHCB is active. Forward the exception and call 1227 * it later from vc_forward_exception(). 1228 */ 1229 ctxt->fi.vector = X86_TRAP_AC; 1230 ctxt->fi.error_code = 0; 1231 return ES_EXCEPTION; 1232 } 1233 1234 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 1235 struct ghcb *ghcb, 1236 unsigned long exit_code) 1237 { 1238 enum es_result result; 1239 1240 switch (exit_code) { 1241 case SVM_EXIT_READ_DR7: 1242 result = vc_handle_dr7_read(ghcb, ctxt); 1243 break; 1244 case SVM_EXIT_WRITE_DR7: 1245 result = vc_handle_dr7_write(ghcb, ctxt); 1246 break; 1247 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 1248 result = vc_handle_trap_ac(ghcb, ctxt); 1249 break; 1250 case SVM_EXIT_RDTSC: 1251 case SVM_EXIT_RDTSCP: 1252 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 1253 break; 1254 case SVM_EXIT_RDPMC: 1255 result = vc_handle_rdpmc(ghcb, ctxt); 1256 break; 1257 case SVM_EXIT_INVD: 1258 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 1259 result = ES_UNSUPPORTED; 1260 break; 1261 case SVM_EXIT_CPUID: 1262 result = vc_handle_cpuid(ghcb, ctxt); 1263 break; 1264 case SVM_EXIT_IOIO: 1265 result = vc_handle_ioio(ghcb, ctxt); 1266 break; 1267 case SVM_EXIT_MSR: 1268 result = vc_handle_msr(ghcb, ctxt); 1269 break; 1270 case SVM_EXIT_VMMCALL: 1271 result = vc_handle_vmmcall(ghcb, ctxt); 1272 break; 1273 case SVM_EXIT_WBINVD: 1274 result = vc_handle_wbinvd(ghcb, ctxt); 1275 break; 1276 case SVM_EXIT_MONITOR: 1277 result = vc_handle_monitor(ghcb, ctxt); 1278 break; 1279 case SVM_EXIT_MWAIT: 1280 result = vc_handle_mwait(ghcb, ctxt); 1281 break; 1282 case SVM_EXIT_NPF: 1283 result = vc_handle_mmio(ghcb, ctxt); 1284 break; 1285 default: 1286 /* 1287 * Unexpected #VC exception 1288 */ 1289 result = ES_UNSUPPORTED; 1290 } 1291 1292 return result; 1293 } 1294 1295 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 1296 { 1297 long error_code = ctxt->fi.error_code; 1298 int trapnr = ctxt->fi.vector; 1299 1300 ctxt->regs->orig_ax = ctxt->fi.error_code; 1301 1302 switch (trapnr) { 1303 case X86_TRAP_GP: 1304 exc_general_protection(ctxt->regs, error_code); 1305 break; 1306 case X86_TRAP_UD: 1307 exc_invalid_op(ctxt->regs); 1308 break; 1309 case X86_TRAP_PF: 1310 write_cr2(ctxt->fi.cr2); 1311 exc_page_fault(ctxt->regs, error_code); 1312 break; 1313 case X86_TRAP_AC: 1314 exc_alignment_check(ctxt->regs, error_code); 1315 break; 1316 default: 1317 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 1318 BUG(); 1319 } 1320 } 1321 1322 static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) 1323 { 1324 unsigned long sp = (unsigned long)regs; 1325 1326 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 1327 } 1328 1329 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 1330 { 1331 struct ghcb_state state; 1332 struct es_em_ctxt ctxt; 1333 enum es_result result; 1334 struct ghcb *ghcb; 1335 bool ret = true; 1336 1337 ghcb = __sev_get_ghcb(&state); 1338 1339 vc_ghcb_invalidate(ghcb); 1340 result = vc_init_em_ctxt(&ctxt, regs, error_code); 1341 1342 if (result == ES_OK) 1343 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 1344 1345 __sev_put_ghcb(&state); 1346 1347 /* Done - now check the result */ 1348 switch (result) { 1349 case ES_OK: 1350 vc_finish_insn(&ctxt); 1351 break; 1352 case ES_UNSUPPORTED: 1353 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 1354 error_code, regs->ip); 1355 ret = false; 1356 break; 1357 case ES_VMM_ERROR: 1358 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1359 error_code, regs->ip); 1360 ret = false; 1361 break; 1362 case ES_DECODE_FAILED: 1363 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1364 error_code, regs->ip); 1365 ret = false; 1366 break; 1367 case ES_EXCEPTION: 1368 vc_forward_exception(&ctxt); 1369 break; 1370 case ES_RETRY: 1371 /* Nothing to do */ 1372 break; 1373 default: 1374 pr_emerg("Unknown result in %s():%d\n", __func__, result); 1375 /* 1376 * Emulating the instruction which caused the #VC exception 1377 * failed - can't continue so print debug information 1378 */ 1379 BUG(); 1380 } 1381 1382 return ret; 1383 } 1384 1385 static __always_inline bool vc_is_db(unsigned long error_code) 1386 { 1387 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 1388 } 1389 1390 /* 1391 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 1392 * and will panic when an error happens. 1393 */ 1394 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 1395 { 1396 irqentry_state_t irq_state; 1397 1398 /* 1399 * With the current implementation it is always possible to switch to a 1400 * safe stack because #VC exceptions only happen at known places, like 1401 * intercepted instructions or accesses to MMIO areas/IO ports. They can 1402 * also happen with code instrumentation when the hypervisor intercepts 1403 * #DB, but the critical paths are forbidden to be instrumented, so #DB 1404 * exceptions currently also only happen in safe places. 1405 * 1406 * But keep this here in case the noinstr annotations are violated due 1407 * to bug elsewhere. 1408 */ 1409 if (unlikely(on_vc_fallback_stack(regs))) { 1410 instrumentation_begin(); 1411 panic("Can't handle #VC exception from unsupported context\n"); 1412 instrumentation_end(); 1413 } 1414 1415 /* 1416 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1417 */ 1418 if (vc_is_db(error_code)) { 1419 exc_debug(regs); 1420 return; 1421 } 1422 1423 irq_state = irqentry_nmi_enter(regs); 1424 1425 instrumentation_begin(); 1426 1427 if (!vc_raw_handle_exception(regs, error_code)) { 1428 /* Show some debug info */ 1429 show_regs(regs); 1430 1431 /* Ask hypervisor to sev_es_terminate */ 1432 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1433 1434 /* If that fails and we get here - just panic */ 1435 panic("Returned from Terminate-Request to Hypervisor\n"); 1436 } 1437 1438 instrumentation_end(); 1439 irqentry_nmi_exit(regs, irq_state); 1440 } 1441 1442 /* 1443 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 1444 * and will kill the current task with SIGBUS when an error happens. 1445 */ 1446 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 1447 { 1448 /* 1449 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1450 */ 1451 if (vc_is_db(error_code)) { 1452 noist_exc_debug(regs); 1453 return; 1454 } 1455 1456 irqentry_enter_from_user_mode(regs); 1457 instrumentation_begin(); 1458 1459 if (!vc_raw_handle_exception(regs, error_code)) { 1460 /* 1461 * Do not kill the machine if user-space triggered the 1462 * exception. Send SIGBUS instead and let user-space deal with 1463 * it. 1464 */ 1465 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 1466 } 1467 1468 instrumentation_end(); 1469 irqentry_exit_to_user_mode(regs); 1470 } 1471 1472 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 1473 { 1474 unsigned long exit_code = regs->orig_ax; 1475 struct es_em_ctxt ctxt; 1476 enum es_result result; 1477 1478 /* Do initial setup or terminate the guest */ 1479 if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) 1480 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1481 1482 vc_ghcb_invalidate(boot_ghcb); 1483 1484 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 1485 if (result == ES_OK) 1486 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 1487 1488 /* Done - now check the result */ 1489 switch (result) { 1490 case ES_OK: 1491 vc_finish_insn(&ctxt); 1492 break; 1493 case ES_UNSUPPORTED: 1494 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 1495 exit_code, regs->ip); 1496 goto fail; 1497 case ES_VMM_ERROR: 1498 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1499 exit_code, regs->ip); 1500 goto fail; 1501 case ES_DECODE_FAILED: 1502 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1503 exit_code, regs->ip); 1504 goto fail; 1505 case ES_EXCEPTION: 1506 vc_early_forward_exception(&ctxt); 1507 break; 1508 case ES_RETRY: 1509 /* Nothing to do */ 1510 break; 1511 default: 1512 BUG(); 1513 } 1514 1515 return true; 1516 1517 fail: 1518 show_regs(regs); 1519 1520 while (true) 1521 halt(); 1522 } 1523