1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV-ES: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/mem_encrypt.h> 15 #include <linux/lockdep.h> 16 #include <linux/printk.h> 17 #include <linux/mm_types.h> 18 #include <linux/set_memory.h> 19 #include <linux/memblock.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 23 #include <asm/cpu_entry_area.h> 24 #include <asm/stacktrace.h> 25 #include <asm/sev.h> 26 #include <asm/insn-eval.h> 27 #include <asm/fpu/internal.h> 28 #include <asm/processor.h> 29 #include <asm/realmode.h> 30 #include <asm/traps.h> 31 #include <asm/svm.h> 32 #include <asm/smp.h> 33 #include <asm/cpu.h> 34 35 #define DR7_RESET_VALUE 0x400 36 37 /* For early boot hypervisor communication in SEV-ES enabled guests */ 38 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 39 40 /* 41 * Needs to be in the .data section because we need it NULL before bss is 42 * cleared 43 */ 44 static struct ghcb __initdata *boot_ghcb; 45 46 /* #VC handler runtime per-CPU data */ 47 struct sev_es_runtime_data { 48 struct ghcb ghcb_page; 49 50 /* Physical storage for the per-CPU IST stack of the #VC handler */ 51 char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); 52 53 /* 54 * Physical storage for the per-CPU fall-back stack of the #VC handler. 55 * The fall-back stack is used when it is not safe to switch back to the 56 * interrupted stack in the #VC entry code. 57 */ 58 char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); 59 60 /* 61 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 62 * It is needed when an NMI happens while the #VC handler uses the real 63 * GHCB, and the NMI handler itself is causing another #VC exception. In 64 * that case the GHCB content of the first handler needs to be backed up 65 * and restored. 66 */ 67 struct ghcb backup_ghcb; 68 69 /* 70 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 71 * There is no need for it to be atomic, because nothing is written to 72 * the GHCB between the read and the write of ghcb_active. So it is safe 73 * to use it when a nested #VC exception happens before the write. 74 * 75 * This is necessary for example in the #VC->NMI->#VC case when the NMI 76 * happens while the first #VC handler uses the GHCB. When the NMI code 77 * raises a second #VC handler it might overwrite the contents of the 78 * GHCB written by the first handler. To avoid this the content of the 79 * GHCB is saved and restored when the GHCB is detected to be in use 80 * already. 81 */ 82 bool ghcb_active; 83 bool backup_ghcb_active; 84 85 /* 86 * Cached DR7 value - write it on DR7 writes and return it on reads. 87 * That value will never make it to the real hardware DR7 as debugging 88 * is currently unsupported in SEV-ES guests. 89 */ 90 unsigned long dr7; 91 }; 92 93 struct ghcb_state { 94 struct ghcb *ghcb; 95 }; 96 97 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 98 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); 99 100 /* Needed in vc_early_forward_exception */ 101 void do_early_exception(struct pt_regs *regs, int trapnr); 102 103 static void __init setup_vc_stacks(int cpu) 104 { 105 struct sev_es_runtime_data *data; 106 struct cpu_entry_area *cea; 107 unsigned long vaddr; 108 phys_addr_t pa; 109 110 data = per_cpu(runtime_data, cpu); 111 cea = get_cpu_entry_area(cpu); 112 113 /* Map #VC IST stack */ 114 vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); 115 pa = __pa(data->ist_stack); 116 cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); 117 118 /* Map VC fall-back stack */ 119 vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); 120 pa = __pa(data->fallback_stack); 121 cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); 122 } 123 124 static __always_inline bool on_vc_stack(struct pt_regs *regs) 125 { 126 unsigned long sp = regs->sp; 127 128 /* User-mode RSP is not trusted */ 129 if (user_mode(regs)) 130 return false; 131 132 /* SYSCALL gap still has user-mode RSP */ 133 if (ip_within_syscall_gap(regs)) 134 return false; 135 136 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 137 } 138 139 /* 140 * This function handles the case when an NMI is raised in the #VC 141 * exception handler entry code, before the #VC handler has switched off 142 * its IST stack. In this case, the IST entry for #VC must be adjusted, 143 * so that any nested #VC exception will not overwrite the stack 144 * contents of the interrupted #VC handler. 145 * 146 * The IST entry is adjusted unconditionally so that it can be also be 147 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 148 * nested sev_es_ist_exit() call may adjust back the IST entry too 149 * early. 150 * 151 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 152 * on the NMI IST stack, as they are only called from NMI handling code 153 * right now. 154 */ 155 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 156 { 157 unsigned long old_ist, new_ist; 158 159 /* Read old IST entry */ 160 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 161 162 /* 163 * If NMI happened while on the #VC IST stack, set the new IST 164 * value below regs->sp, so that the interrupted stack frame is 165 * not overwritten by subsequent #VC exceptions. 166 */ 167 if (on_vc_stack(regs)) 168 new_ist = regs->sp; 169 170 /* 171 * Reserve additional 8 bytes and store old IST value so this 172 * adjustment can be unrolled in __sev_es_ist_exit(). 173 */ 174 new_ist -= sizeof(old_ist); 175 *(unsigned long *)new_ist = old_ist; 176 177 /* Set new IST entry */ 178 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 179 } 180 181 void noinstr __sev_es_ist_exit(void) 182 { 183 unsigned long ist; 184 185 /* Read IST entry */ 186 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 187 188 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 189 return; 190 191 /* Read back old IST entry and write it to the TSS */ 192 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 193 } 194 195 static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) 196 { 197 struct sev_es_runtime_data *data; 198 struct ghcb *ghcb; 199 200 data = this_cpu_read(runtime_data); 201 ghcb = &data->ghcb_page; 202 203 if (unlikely(data->ghcb_active)) { 204 /* GHCB is already in use - save its contents */ 205 206 if (unlikely(data->backup_ghcb_active)) { 207 /* 208 * Backup-GHCB is also already in use. There is no way 209 * to continue here so just kill the machine. To make 210 * panic() work, mark GHCBs inactive so that messages 211 * can be printed out. 212 */ 213 data->ghcb_active = false; 214 data->backup_ghcb_active = false; 215 216 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 217 } 218 219 /* Mark backup_ghcb active before writing to it */ 220 data->backup_ghcb_active = true; 221 222 state->ghcb = &data->backup_ghcb; 223 224 /* Backup GHCB content */ 225 *state->ghcb = *ghcb; 226 } else { 227 state->ghcb = NULL; 228 data->ghcb_active = true; 229 } 230 231 return ghcb; 232 } 233 234 /* Needed in vc_early_forward_exception */ 235 void do_early_exception(struct pt_regs *regs, int trapnr); 236 237 static inline u64 sev_es_rd_ghcb_msr(void) 238 { 239 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 240 } 241 242 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 243 { 244 u32 low, high; 245 246 low = (u32)(val); 247 high = (u32)(val >> 32); 248 249 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 250 } 251 252 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 253 unsigned char *buffer) 254 { 255 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 256 } 257 258 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 259 { 260 char buffer[MAX_INSN_SIZE]; 261 int res; 262 263 res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 264 if (!res) { 265 ctxt->fi.vector = X86_TRAP_PF; 266 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 267 ctxt->fi.cr2 = ctxt->regs->ip; 268 return ES_EXCEPTION; 269 } 270 271 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) 272 return ES_DECODE_FAILED; 273 274 if (ctxt->insn.immediate.got) 275 return ES_OK; 276 else 277 return ES_DECODE_FAILED; 278 } 279 280 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 281 { 282 char buffer[MAX_INSN_SIZE]; 283 int res, ret; 284 285 res = vc_fetch_insn_kernel(ctxt, buffer); 286 if (res) { 287 ctxt->fi.vector = X86_TRAP_PF; 288 ctxt->fi.error_code = X86_PF_INSTR; 289 ctxt->fi.cr2 = ctxt->regs->ip; 290 return ES_EXCEPTION; 291 } 292 293 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 294 if (ret < 0) 295 return ES_DECODE_FAILED; 296 else 297 return ES_OK; 298 } 299 300 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 301 { 302 if (user_mode(ctxt->regs)) 303 return __vc_decode_user_insn(ctxt); 304 else 305 return __vc_decode_kern_insn(ctxt); 306 } 307 308 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 309 char *dst, char *buf, size_t size) 310 { 311 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 312 char __user *target = (char __user *)dst; 313 u64 d8; 314 u32 d4; 315 u16 d2; 316 u8 d1; 317 318 /* 319 * This function uses __put_user() independent of whether kernel or user 320 * memory is accessed. This works fine because __put_user() does no 321 * sanity checks of the pointer being accessed. All that it does is 322 * to report when the access failed. 323 * 324 * Also, this function runs in atomic context, so __put_user() is not 325 * allowed to sleep. The page-fault handler detects that it is running 326 * in atomic context and will not try to take mmap_sem and handle the 327 * fault, so additional pagefault_enable()/disable() calls are not 328 * needed. 329 * 330 * The access can't be done via copy_to_user() here because 331 * vc_write_mem() must not use string instructions to access unsafe 332 * memory. The reason is that MOVS is emulated by the #VC handler by 333 * splitting the move up into a read and a write and taking a nested #VC 334 * exception on whatever of them is the MMIO access. Using string 335 * instructions here would cause infinite nesting. 336 */ 337 switch (size) { 338 case 1: 339 memcpy(&d1, buf, 1); 340 if (__put_user(d1, target)) 341 goto fault; 342 break; 343 case 2: 344 memcpy(&d2, buf, 2); 345 if (__put_user(d2, target)) 346 goto fault; 347 break; 348 case 4: 349 memcpy(&d4, buf, 4); 350 if (__put_user(d4, target)) 351 goto fault; 352 break; 353 case 8: 354 memcpy(&d8, buf, 8); 355 if (__put_user(d8, target)) 356 goto fault; 357 break; 358 default: 359 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 360 return ES_UNSUPPORTED; 361 } 362 363 return ES_OK; 364 365 fault: 366 if (user_mode(ctxt->regs)) 367 error_code |= X86_PF_USER; 368 369 ctxt->fi.vector = X86_TRAP_PF; 370 ctxt->fi.error_code = error_code; 371 ctxt->fi.cr2 = (unsigned long)dst; 372 373 return ES_EXCEPTION; 374 } 375 376 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 377 char *src, char *buf, size_t size) 378 { 379 unsigned long error_code = X86_PF_PROT; 380 char __user *s = (char __user *)src; 381 u64 d8; 382 u32 d4; 383 u16 d2; 384 u8 d1; 385 386 /* 387 * This function uses __get_user() independent of whether kernel or user 388 * memory is accessed. This works fine because __get_user() does no 389 * sanity checks of the pointer being accessed. All that it does is 390 * to report when the access failed. 391 * 392 * Also, this function runs in atomic context, so __get_user() is not 393 * allowed to sleep. The page-fault handler detects that it is running 394 * in atomic context and will not try to take mmap_sem and handle the 395 * fault, so additional pagefault_enable()/disable() calls are not 396 * needed. 397 * 398 * The access can't be done via copy_from_user() here because 399 * vc_read_mem() must not use string instructions to access unsafe 400 * memory. The reason is that MOVS is emulated by the #VC handler by 401 * splitting the move up into a read and a write and taking a nested #VC 402 * exception on whatever of them is the MMIO access. Using string 403 * instructions here would cause infinite nesting. 404 */ 405 switch (size) { 406 case 1: 407 if (__get_user(d1, s)) 408 goto fault; 409 memcpy(buf, &d1, 1); 410 break; 411 case 2: 412 if (__get_user(d2, s)) 413 goto fault; 414 memcpy(buf, &d2, 2); 415 break; 416 case 4: 417 if (__get_user(d4, s)) 418 goto fault; 419 memcpy(buf, &d4, 4); 420 break; 421 case 8: 422 if (__get_user(d8, s)) 423 goto fault; 424 memcpy(buf, &d8, 8); 425 break; 426 default: 427 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 428 return ES_UNSUPPORTED; 429 } 430 431 return ES_OK; 432 433 fault: 434 if (user_mode(ctxt->regs)) 435 error_code |= X86_PF_USER; 436 437 ctxt->fi.vector = X86_TRAP_PF; 438 ctxt->fi.error_code = error_code; 439 ctxt->fi.cr2 = (unsigned long)src; 440 441 return ES_EXCEPTION; 442 } 443 444 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 445 unsigned long vaddr, phys_addr_t *paddr) 446 { 447 unsigned long va = (unsigned long)vaddr; 448 unsigned int level; 449 phys_addr_t pa; 450 pgd_t *pgd; 451 pte_t *pte; 452 453 pgd = __va(read_cr3_pa()); 454 pgd = &pgd[pgd_index(va)]; 455 pte = lookup_address_in_pgd(pgd, va, &level); 456 if (!pte) { 457 ctxt->fi.vector = X86_TRAP_PF; 458 ctxt->fi.cr2 = vaddr; 459 ctxt->fi.error_code = 0; 460 461 if (user_mode(ctxt->regs)) 462 ctxt->fi.error_code |= X86_PF_USER; 463 464 return ES_EXCEPTION; 465 } 466 467 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 468 /* Emulated MMIO to/from encrypted memory not supported */ 469 return ES_UNSUPPORTED; 470 471 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 472 pa |= va & ~page_level_mask(level); 473 474 *paddr = pa; 475 476 return ES_OK; 477 } 478 479 /* Include code shared with pre-decompression boot stage */ 480 #include "sev-shared.c" 481 482 static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) 483 { 484 struct sev_es_runtime_data *data; 485 struct ghcb *ghcb; 486 487 data = this_cpu_read(runtime_data); 488 ghcb = &data->ghcb_page; 489 490 if (state->ghcb) { 491 /* Restore GHCB from Backup */ 492 *ghcb = *state->ghcb; 493 data->backup_ghcb_active = false; 494 state->ghcb = NULL; 495 } else { 496 /* 497 * Invalidate the GHCB so a VMGEXIT instruction issued 498 * from userspace won't appear to be valid. 499 */ 500 vc_ghcb_invalidate(ghcb); 501 data->ghcb_active = false; 502 } 503 } 504 505 void noinstr __sev_es_nmi_complete(void) 506 { 507 struct ghcb_state state; 508 struct ghcb *ghcb; 509 510 ghcb = sev_es_get_ghcb(&state); 511 512 vc_ghcb_invalidate(ghcb); 513 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 514 ghcb_set_sw_exit_info_1(ghcb, 0); 515 ghcb_set_sw_exit_info_2(ghcb, 0); 516 517 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 518 VMGEXIT(); 519 520 sev_es_put_ghcb(&state); 521 } 522 523 static u64 get_jump_table_addr(void) 524 { 525 struct ghcb_state state; 526 unsigned long flags; 527 struct ghcb *ghcb; 528 u64 ret = 0; 529 530 local_irq_save(flags); 531 532 ghcb = sev_es_get_ghcb(&state); 533 534 vc_ghcb_invalidate(ghcb); 535 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 536 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 537 ghcb_set_sw_exit_info_2(ghcb, 0); 538 539 sev_es_wr_ghcb_msr(__pa(ghcb)); 540 VMGEXIT(); 541 542 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 543 ghcb_sw_exit_info_2_is_valid(ghcb)) 544 ret = ghcb->save.sw_exit_info_2; 545 546 sev_es_put_ghcb(&state); 547 548 local_irq_restore(flags); 549 550 return ret; 551 } 552 553 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 554 { 555 u16 startup_cs, startup_ip; 556 phys_addr_t jump_table_pa; 557 u64 jump_table_addr; 558 u16 __iomem *jump_table; 559 560 jump_table_addr = get_jump_table_addr(); 561 562 /* On UP guests there is no jump table so this is not a failure */ 563 if (!jump_table_addr) 564 return 0; 565 566 /* Check if AP Jump Table is page-aligned */ 567 if (jump_table_addr & ~PAGE_MASK) 568 return -EINVAL; 569 570 jump_table_pa = jump_table_addr & PAGE_MASK; 571 572 startup_cs = (u16)(rmh->trampoline_start >> 4); 573 startup_ip = (u16)(rmh->sev_es_trampoline_start - 574 rmh->trampoline_start); 575 576 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 577 if (!jump_table) 578 return -EIO; 579 580 writew(startup_ip, &jump_table[0]); 581 writew(startup_cs, &jump_table[1]); 582 583 iounmap(jump_table); 584 585 return 0; 586 } 587 588 /* 589 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 590 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 591 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 592 */ 593 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 594 { 595 struct sev_es_runtime_data *data; 596 unsigned long address, pflags; 597 int cpu; 598 u64 pfn; 599 600 if (!sev_es_active()) 601 return 0; 602 603 pflags = _PAGE_NX | _PAGE_RW; 604 605 for_each_possible_cpu(cpu) { 606 data = per_cpu(runtime_data, cpu); 607 608 address = __pa(&data->ghcb_page); 609 pfn = address >> PAGE_SHIFT; 610 611 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 612 return 1; 613 } 614 615 return 0; 616 } 617 618 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 619 { 620 struct pt_regs *regs = ctxt->regs; 621 enum es_result ret; 622 u64 exit_info_1; 623 624 /* Is it a WRMSR? */ 625 exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; 626 627 ghcb_set_rcx(ghcb, regs->cx); 628 if (exit_info_1) { 629 ghcb_set_rax(ghcb, regs->ax); 630 ghcb_set_rdx(ghcb, regs->dx); 631 } 632 633 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); 634 635 if ((ret == ES_OK) && (!exit_info_1)) { 636 regs->ax = ghcb->save.rax; 637 regs->dx = ghcb->save.rdx; 638 } 639 640 return ret; 641 } 642 643 /* 644 * This function runs on the first #VC exception after the kernel 645 * switched to virtual addresses. 646 */ 647 static bool __init sev_es_setup_ghcb(void) 648 { 649 /* First make sure the hypervisor talks a supported protocol. */ 650 if (!sev_es_negotiate_protocol()) 651 return false; 652 653 /* 654 * Clear the boot_ghcb. The first exception comes in before the bss 655 * section is cleared. 656 */ 657 memset(&boot_ghcb_page, 0, PAGE_SIZE); 658 659 /* Alright - Make the boot-ghcb public */ 660 boot_ghcb = &boot_ghcb_page; 661 662 return true; 663 } 664 665 #ifdef CONFIG_HOTPLUG_CPU 666 static void sev_es_ap_hlt_loop(void) 667 { 668 struct ghcb_state state; 669 struct ghcb *ghcb; 670 671 ghcb = sev_es_get_ghcb(&state); 672 673 while (true) { 674 vc_ghcb_invalidate(ghcb); 675 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 676 ghcb_set_sw_exit_info_1(ghcb, 0); 677 ghcb_set_sw_exit_info_2(ghcb, 0); 678 679 sev_es_wr_ghcb_msr(__pa(ghcb)); 680 VMGEXIT(); 681 682 /* Wakeup signal? */ 683 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 684 ghcb->save.sw_exit_info_2) 685 break; 686 } 687 688 sev_es_put_ghcb(&state); 689 } 690 691 /* 692 * Play_dead handler when running under SEV-ES. This is needed because 693 * the hypervisor can't deliver an SIPI request to restart the AP. 694 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 695 * hypervisor wakes it up again. 696 */ 697 static void sev_es_play_dead(void) 698 { 699 play_dead_common(); 700 701 /* IRQs now disabled */ 702 703 sev_es_ap_hlt_loop(); 704 705 /* 706 * If we get here, the VCPU was woken up again. Jump to CPU 707 * startup code to get it back online. 708 */ 709 start_cpu0(); 710 } 711 #else /* CONFIG_HOTPLUG_CPU */ 712 #define sev_es_play_dead native_play_dead 713 #endif /* CONFIG_HOTPLUG_CPU */ 714 715 #ifdef CONFIG_SMP 716 static void __init sev_es_setup_play_dead(void) 717 { 718 smp_ops.play_dead = sev_es_play_dead; 719 } 720 #else 721 static inline void sev_es_setup_play_dead(void) { } 722 #endif 723 724 static void __init alloc_runtime_data(int cpu) 725 { 726 struct sev_es_runtime_data *data; 727 728 data = memblock_alloc(sizeof(*data), PAGE_SIZE); 729 if (!data) 730 panic("Can't allocate SEV-ES runtime data"); 731 732 per_cpu(runtime_data, cpu) = data; 733 } 734 735 static void __init init_ghcb(int cpu) 736 { 737 struct sev_es_runtime_data *data; 738 int err; 739 740 data = per_cpu(runtime_data, cpu); 741 742 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 743 sizeof(data->ghcb_page)); 744 if (err) 745 panic("Can't map GHCBs unencrypted"); 746 747 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 748 749 data->ghcb_active = false; 750 data->backup_ghcb_active = false; 751 } 752 753 void __init sev_es_init_vc_handling(void) 754 { 755 int cpu; 756 757 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 758 759 if (!sev_es_active()) 760 return; 761 762 if (!sev_es_check_cpu_features()) 763 panic("SEV-ES CPU Features missing"); 764 765 /* Enable SEV-ES special handling */ 766 static_branch_enable(&sev_es_enable_key); 767 768 /* Initialize per-cpu GHCB pages */ 769 for_each_possible_cpu(cpu) { 770 alloc_runtime_data(cpu); 771 init_ghcb(cpu); 772 setup_vc_stacks(cpu); 773 } 774 775 sev_es_setup_play_dead(); 776 777 /* Secondary CPUs use the runtime #VC handler */ 778 initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; 779 } 780 781 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 782 { 783 int trapnr = ctxt->fi.vector; 784 785 if (trapnr == X86_TRAP_PF) 786 native_write_cr2(ctxt->fi.cr2); 787 788 ctxt->regs->orig_ax = ctxt->fi.error_code; 789 do_early_exception(ctxt->regs, trapnr); 790 } 791 792 static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) 793 { 794 long *reg_array; 795 int offset; 796 797 reg_array = (long *)ctxt->regs; 798 offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); 799 800 if (offset < 0) 801 return NULL; 802 803 offset /= sizeof(long); 804 805 return reg_array + offset; 806 } 807 808 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 809 { 810 long *reg_array; 811 int offset; 812 813 reg_array = (long *)ctxt->regs; 814 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 815 816 if (offset < 0) 817 return NULL; 818 819 offset /= sizeof(long); 820 821 return reg_array + offset; 822 } 823 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 824 unsigned int bytes, bool read) 825 { 826 u64 exit_code, exit_info_1, exit_info_2; 827 unsigned long ghcb_pa = __pa(ghcb); 828 enum es_result res; 829 phys_addr_t paddr; 830 void __user *ref; 831 832 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 833 if (ref == (void __user *)-1L) 834 return ES_UNSUPPORTED; 835 836 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 837 838 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 839 if (res != ES_OK) { 840 if (res == ES_EXCEPTION && !read) 841 ctxt->fi.error_code |= X86_PF_WRITE; 842 843 return res; 844 } 845 846 exit_info_1 = paddr; 847 /* Can never be greater than 8 */ 848 exit_info_2 = bytes; 849 850 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 851 852 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 853 } 854 855 static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, 856 struct es_em_ctxt *ctxt) 857 { 858 struct insn *insn = &ctxt->insn; 859 unsigned int bytes = 0; 860 enum es_result ret; 861 int sign_byte; 862 long *reg_data; 863 864 switch (insn->opcode.bytes[1]) { 865 /* MMIO Read w/ zero-extension */ 866 case 0xb6: 867 bytes = 1; 868 fallthrough; 869 case 0xb7: 870 if (!bytes) 871 bytes = 2; 872 873 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 874 if (ret) 875 break; 876 877 /* Zero extend based on operand size */ 878 reg_data = vc_insn_get_reg(ctxt); 879 if (!reg_data) 880 return ES_DECODE_FAILED; 881 882 memset(reg_data, 0, insn->opnd_bytes); 883 884 memcpy(reg_data, ghcb->shared_buffer, bytes); 885 break; 886 887 /* MMIO Read w/ sign-extension */ 888 case 0xbe: 889 bytes = 1; 890 fallthrough; 891 case 0xbf: 892 if (!bytes) 893 bytes = 2; 894 895 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 896 if (ret) 897 break; 898 899 /* Sign extend based on operand size */ 900 reg_data = vc_insn_get_reg(ctxt); 901 if (!reg_data) 902 return ES_DECODE_FAILED; 903 904 if (bytes == 1) { 905 u8 *val = (u8 *)ghcb->shared_buffer; 906 907 sign_byte = (*val & 0x80) ? 0xff : 0x00; 908 } else { 909 u16 *val = (u16 *)ghcb->shared_buffer; 910 911 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 912 } 913 memset(reg_data, sign_byte, insn->opnd_bytes); 914 915 memcpy(reg_data, ghcb->shared_buffer, bytes); 916 break; 917 918 default: 919 ret = ES_UNSUPPORTED; 920 } 921 922 return ret; 923 } 924 925 /* 926 * The MOVS instruction has two memory operands, which raises the 927 * problem that it is not known whether the access to the source or the 928 * destination caused the #VC exception (and hence whether an MMIO read 929 * or write operation needs to be emulated). 930 * 931 * Instead of playing games with walking page-tables and trying to guess 932 * whether the source or destination is an MMIO range, split the move 933 * into two operations, a read and a write with only one memory operand. 934 * This will cause a nested #VC exception on the MMIO address which can 935 * then be handled. 936 * 937 * This implementation has the benefit that it also supports MOVS where 938 * source _and_ destination are MMIO regions. 939 * 940 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 941 * rare operation. If it turns out to be a performance problem the split 942 * operations can be moved to memcpy_fromio() and memcpy_toio(). 943 */ 944 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 945 unsigned int bytes) 946 { 947 unsigned long ds_base, es_base; 948 unsigned char *src, *dst; 949 unsigned char buffer[8]; 950 enum es_result ret; 951 bool rep; 952 int off; 953 954 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 955 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 956 957 if (ds_base == -1L || es_base == -1L) { 958 ctxt->fi.vector = X86_TRAP_GP; 959 ctxt->fi.error_code = 0; 960 return ES_EXCEPTION; 961 } 962 963 src = ds_base + (unsigned char *)ctxt->regs->si; 964 dst = es_base + (unsigned char *)ctxt->regs->di; 965 966 ret = vc_read_mem(ctxt, src, buffer, bytes); 967 if (ret != ES_OK) 968 return ret; 969 970 ret = vc_write_mem(ctxt, dst, buffer, bytes); 971 if (ret != ES_OK) 972 return ret; 973 974 if (ctxt->regs->flags & X86_EFLAGS_DF) 975 off = -bytes; 976 else 977 off = bytes; 978 979 ctxt->regs->si += off; 980 ctxt->regs->di += off; 981 982 rep = insn_has_rep_prefix(&ctxt->insn); 983 if (rep) 984 ctxt->regs->cx -= 1; 985 986 if (!rep || ctxt->regs->cx == 0) 987 return ES_OK; 988 else 989 return ES_RETRY; 990 } 991 992 static enum es_result vc_handle_mmio(struct ghcb *ghcb, 993 struct es_em_ctxt *ctxt) 994 { 995 struct insn *insn = &ctxt->insn; 996 unsigned int bytes = 0; 997 enum es_result ret; 998 long *reg_data; 999 1000 switch (insn->opcode.bytes[0]) { 1001 /* MMIO Write */ 1002 case 0x88: 1003 bytes = 1; 1004 fallthrough; 1005 case 0x89: 1006 if (!bytes) 1007 bytes = insn->opnd_bytes; 1008 1009 reg_data = vc_insn_get_reg(ctxt); 1010 if (!reg_data) 1011 return ES_DECODE_FAILED; 1012 1013 memcpy(ghcb->shared_buffer, reg_data, bytes); 1014 1015 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1016 break; 1017 1018 case 0xc6: 1019 bytes = 1; 1020 fallthrough; 1021 case 0xc7: 1022 if (!bytes) 1023 bytes = insn->opnd_bytes; 1024 1025 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1026 1027 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1028 break; 1029 1030 /* MMIO Read */ 1031 case 0x8a: 1032 bytes = 1; 1033 fallthrough; 1034 case 0x8b: 1035 if (!bytes) 1036 bytes = insn->opnd_bytes; 1037 1038 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1039 if (ret) 1040 break; 1041 1042 reg_data = vc_insn_get_reg(ctxt); 1043 if (!reg_data) 1044 return ES_DECODE_FAILED; 1045 1046 /* Zero-extend for 32-bit operation */ 1047 if (bytes == 4) 1048 *reg_data = 0; 1049 1050 memcpy(reg_data, ghcb->shared_buffer, bytes); 1051 break; 1052 1053 /* MOVS instruction */ 1054 case 0xa4: 1055 bytes = 1; 1056 fallthrough; 1057 case 0xa5: 1058 if (!bytes) 1059 bytes = insn->opnd_bytes; 1060 1061 ret = vc_handle_mmio_movs(ctxt, bytes); 1062 break; 1063 /* Two-Byte Opcodes */ 1064 case 0x0f: 1065 ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); 1066 break; 1067 default: 1068 ret = ES_UNSUPPORTED; 1069 } 1070 1071 return ret; 1072 } 1073 1074 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1075 struct es_em_ctxt *ctxt) 1076 { 1077 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1078 long val, *reg = vc_insn_get_rm(ctxt); 1079 enum es_result ret; 1080 1081 if (!reg) 1082 return ES_DECODE_FAILED; 1083 1084 val = *reg; 1085 1086 /* Upper 32 bits must be written as zeroes */ 1087 if (val >> 32) { 1088 ctxt->fi.vector = X86_TRAP_GP; 1089 ctxt->fi.error_code = 0; 1090 return ES_EXCEPTION; 1091 } 1092 1093 /* Clear out other reserved bits and set bit 10 */ 1094 val = (val & 0xffff23ffL) | BIT(10); 1095 1096 /* Early non-zero writes to DR7 are not supported */ 1097 if (!data && (val & ~DR7_RESET_VALUE)) 1098 return ES_UNSUPPORTED; 1099 1100 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1101 ghcb_set_rax(ghcb, val); 1102 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1103 if (ret != ES_OK) 1104 return ret; 1105 1106 if (data) 1107 data->dr7 = val; 1108 1109 return ES_OK; 1110 } 1111 1112 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1113 struct es_em_ctxt *ctxt) 1114 { 1115 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1116 long *reg = vc_insn_get_rm(ctxt); 1117 1118 if (!reg) 1119 return ES_DECODE_FAILED; 1120 1121 if (data) 1122 *reg = data->dr7; 1123 else 1124 *reg = DR7_RESET_VALUE; 1125 1126 return ES_OK; 1127 } 1128 1129 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1130 struct es_em_ctxt *ctxt) 1131 { 1132 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1133 } 1134 1135 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1136 { 1137 enum es_result ret; 1138 1139 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1140 1141 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 1142 if (ret != ES_OK) 1143 return ret; 1144 1145 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 1146 return ES_VMM_ERROR; 1147 1148 ctxt->regs->ax = ghcb->save.rax; 1149 ctxt->regs->dx = ghcb->save.rdx; 1150 1151 return ES_OK; 1152 } 1153 1154 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 1155 struct es_em_ctxt *ctxt) 1156 { 1157 /* 1158 * Treat it as a NOP and do not leak a physical address to the 1159 * hypervisor. 1160 */ 1161 return ES_OK; 1162 } 1163 1164 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 1165 struct es_em_ctxt *ctxt) 1166 { 1167 /* Treat the same as MONITOR/MONITORX */ 1168 return ES_OK; 1169 } 1170 1171 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 1172 struct es_em_ctxt *ctxt) 1173 { 1174 enum es_result ret; 1175 1176 ghcb_set_rax(ghcb, ctxt->regs->ax); 1177 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 1178 1179 if (x86_platform.hyper.sev_es_hcall_prepare) 1180 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 1181 1182 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 1183 if (ret != ES_OK) 1184 return ret; 1185 1186 if (!ghcb_rax_is_valid(ghcb)) 1187 return ES_VMM_ERROR; 1188 1189 ctxt->regs->ax = ghcb->save.rax; 1190 1191 /* 1192 * Call sev_es_hcall_finish() after regs->ax is already set. 1193 * This allows the hypervisor handler to overwrite it again if 1194 * necessary. 1195 */ 1196 if (x86_platform.hyper.sev_es_hcall_finish && 1197 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 1198 return ES_VMM_ERROR; 1199 1200 return ES_OK; 1201 } 1202 1203 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 1204 struct es_em_ctxt *ctxt) 1205 { 1206 /* 1207 * Calling ecx_alignment_check() directly does not work, because it 1208 * enables IRQs and the GHCB is active. Forward the exception and call 1209 * it later from vc_forward_exception(). 1210 */ 1211 ctxt->fi.vector = X86_TRAP_AC; 1212 ctxt->fi.error_code = 0; 1213 return ES_EXCEPTION; 1214 } 1215 1216 static __always_inline void vc_handle_trap_db(struct pt_regs *regs) 1217 { 1218 if (user_mode(regs)) 1219 noist_exc_debug(regs); 1220 else 1221 exc_debug(regs); 1222 } 1223 1224 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 1225 struct ghcb *ghcb, 1226 unsigned long exit_code) 1227 { 1228 enum es_result result; 1229 1230 switch (exit_code) { 1231 case SVM_EXIT_READ_DR7: 1232 result = vc_handle_dr7_read(ghcb, ctxt); 1233 break; 1234 case SVM_EXIT_WRITE_DR7: 1235 result = vc_handle_dr7_write(ghcb, ctxt); 1236 break; 1237 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 1238 result = vc_handle_trap_ac(ghcb, ctxt); 1239 break; 1240 case SVM_EXIT_RDTSC: 1241 case SVM_EXIT_RDTSCP: 1242 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 1243 break; 1244 case SVM_EXIT_RDPMC: 1245 result = vc_handle_rdpmc(ghcb, ctxt); 1246 break; 1247 case SVM_EXIT_INVD: 1248 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 1249 result = ES_UNSUPPORTED; 1250 break; 1251 case SVM_EXIT_CPUID: 1252 result = vc_handle_cpuid(ghcb, ctxt); 1253 break; 1254 case SVM_EXIT_IOIO: 1255 result = vc_handle_ioio(ghcb, ctxt); 1256 break; 1257 case SVM_EXIT_MSR: 1258 result = vc_handle_msr(ghcb, ctxt); 1259 break; 1260 case SVM_EXIT_VMMCALL: 1261 result = vc_handle_vmmcall(ghcb, ctxt); 1262 break; 1263 case SVM_EXIT_WBINVD: 1264 result = vc_handle_wbinvd(ghcb, ctxt); 1265 break; 1266 case SVM_EXIT_MONITOR: 1267 result = vc_handle_monitor(ghcb, ctxt); 1268 break; 1269 case SVM_EXIT_MWAIT: 1270 result = vc_handle_mwait(ghcb, ctxt); 1271 break; 1272 case SVM_EXIT_NPF: 1273 result = vc_handle_mmio(ghcb, ctxt); 1274 break; 1275 default: 1276 /* 1277 * Unexpected #VC exception 1278 */ 1279 result = ES_UNSUPPORTED; 1280 } 1281 1282 return result; 1283 } 1284 1285 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 1286 { 1287 long error_code = ctxt->fi.error_code; 1288 int trapnr = ctxt->fi.vector; 1289 1290 ctxt->regs->orig_ax = ctxt->fi.error_code; 1291 1292 switch (trapnr) { 1293 case X86_TRAP_GP: 1294 exc_general_protection(ctxt->regs, error_code); 1295 break; 1296 case X86_TRAP_UD: 1297 exc_invalid_op(ctxt->regs); 1298 break; 1299 case X86_TRAP_PF: 1300 write_cr2(ctxt->fi.cr2); 1301 exc_page_fault(ctxt->regs, error_code); 1302 break; 1303 case X86_TRAP_AC: 1304 exc_alignment_check(ctxt->regs, error_code); 1305 break; 1306 default: 1307 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 1308 BUG(); 1309 } 1310 } 1311 1312 static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) 1313 { 1314 unsigned long sp = (unsigned long)regs; 1315 1316 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 1317 } 1318 1319 /* 1320 * Main #VC exception handler. It is called when the entry code was able to 1321 * switch off the IST to a safe kernel stack. 1322 * 1323 * With the current implementation it is always possible to switch to a safe 1324 * stack because #VC exceptions only happen at known places, like intercepted 1325 * instructions or accesses to MMIO areas/IO ports. They can also happen with 1326 * code instrumentation when the hypervisor intercepts #DB, but the critical 1327 * paths are forbidden to be instrumented, so #DB exceptions currently also 1328 * only happen in safe places. 1329 */ 1330 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) 1331 { 1332 irqentry_state_t irq_state; 1333 struct ghcb_state state; 1334 struct es_em_ctxt ctxt; 1335 enum es_result result; 1336 struct ghcb *ghcb; 1337 1338 /* 1339 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1340 */ 1341 if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { 1342 vc_handle_trap_db(regs); 1343 return; 1344 } 1345 1346 irq_state = irqentry_nmi_enter(regs); 1347 lockdep_assert_irqs_disabled(); 1348 instrumentation_begin(); 1349 1350 /* 1351 * This is invoked through an interrupt gate, so IRQs are disabled. The 1352 * code below might walk page-tables for user or kernel addresses, so 1353 * keep the IRQs disabled to protect us against concurrent TLB flushes. 1354 */ 1355 1356 ghcb = sev_es_get_ghcb(&state); 1357 1358 vc_ghcb_invalidate(ghcb); 1359 result = vc_init_em_ctxt(&ctxt, regs, error_code); 1360 1361 if (result == ES_OK) 1362 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 1363 1364 sev_es_put_ghcb(&state); 1365 1366 /* Done - now check the result */ 1367 switch (result) { 1368 case ES_OK: 1369 vc_finish_insn(&ctxt); 1370 break; 1371 case ES_UNSUPPORTED: 1372 pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 1373 error_code, regs->ip); 1374 goto fail; 1375 case ES_VMM_ERROR: 1376 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1377 error_code, regs->ip); 1378 goto fail; 1379 case ES_DECODE_FAILED: 1380 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1381 error_code, regs->ip); 1382 goto fail; 1383 case ES_EXCEPTION: 1384 vc_forward_exception(&ctxt); 1385 break; 1386 case ES_RETRY: 1387 /* Nothing to do */ 1388 break; 1389 default: 1390 pr_emerg("Unknown result in %s():%d\n", __func__, result); 1391 /* 1392 * Emulating the instruction which caused the #VC exception 1393 * failed - can't continue so print debug information 1394 */ 1395 BUG(); 1396 } 1397 1398 out: 1399 instrumentation_end(); 1400 irqentry_nmi_exit(regs, irq_state); 1401 1402 return; 1403 1404 fail: 1405 if (user_mode(regs)) { 1406 /* 1407 * Do not kill the machine if user-space triggered the 1408 * exception. Send SIGBUS instead and let user-space deal with 1409 * it. 1410 */ 1411 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 1412 } else { 1413 pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", 1414 result); 1415 1416 /* Show some debug info */ 1417 show_regs(regs); 1418 1419 /* Ask hypervisor to sev_es_terminate */ 1420 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1421 1422 /* If that fails and we get here - just panic */ 1423 panic("Returned from Terminate-Request to Hypervisor\n"); 1424 } 1425 1426 goto out; 1427 } 1428 1429 /* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ 1430 DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) 1431 { 1432 instrumentation_begin(); 1433 panic("Can't handle #VC exception from unsupported context\n"); 1434 instrumentation_end(); 1435 } 1436 1437 DEFINE_IDTENTRY_VC(exc_vmm_communication) 1438 { 1439 if (likely(!on_vc_fallback_stack(regs))) 1440 safe_stack_exc_vmm_communication(regs, error_code); 1441 else 1442 ist_exc_vmm_communication(regs, error_code); 1443 } 1444 1445 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 1446 { 1447 unsigned long exit_code = regs->orig_ax; 1448 struct es_em_ctxt ctxt; 1449 enum es_result result; 1450 1451 /* Do initial setup or terminate the guest */ 1452 if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) 1453 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1454 1455 vc_ghcb_invalidate(boot_ghcb); 1456 1457 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 1458 if (result == ES_OK) 1459 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 1460 1461 /* Done - now check the result */ 1462 switch (result) { 1463 case ES_OK: 1464 vc_finish_insn(&ctxt); 1465 break; 1466 case ES_UNSUPPORTED: 1467 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 1468 exit_code, regs->ip); 1469 goto fail; 1470 case ES_VMM_ERROR: 1471 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1472 exit_code, regs->ip); 1473 goto fail; 1474 case ES_DECODE_FAILED: 1475 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1476 exit_code, regs->ip); 1477 goto fail; 1478 case ES_EXCEPTION: 1479 vc_early_forward_exception(&ctxt); 1480 break; 1481 case ES_RETRY: 1482 /* Nothing to do */ 1483 break; 1484 default: 1485 BUG(); 1486 } 1487 1488 return true; 1489 1490 fail: 1491 show_regs(regs); 1492 1493 while (true) 1494 halt(); 1495 } 1496