1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 22 #include <asm/cpu_entry_area.h> 23 #include <asm/stacktrace.h> 24 #include <asm/sev.h> 25 #include <asm/insn-eval.h> 26 #include <asm/fpu/xcr.h> 27 #include <asm/processor.h> 28 #include <asm/realmode.h> 29 #include <asm/traps.h> 30 #include <asm/svm.h> 31 #include <asm/smp.h> 32 #include <asm/cpu.h> 33 34 #define DR7_RESET_VALUE 0x400 35 36 /* For early boot hypervisor communication in SEV-ES enabled guests */ 37 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 38 39 /* 40 * Needs to be in the .data section because we need it NULL before bss is 41 * cleared 42 */ 43 static struct ghcb __initdata *boot_ghcb; 44 45 /* #VC handler runtime per-CPU data */ 46 struct sev_es_runtime_data { 47 struct ghcb ghcb_page; 48 49 /* 50 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 51 * It is needed when an NMI happens while the #VC handler uses the real 52 * GHCB, and the NMI handler itself is causing another #VC exception. In 53 * that case the GHCB content of the first handler needs to be backed up 54 * and restored. 55 */ 56 struct ghcb backup_ghcb; 57 58 /* 59 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 60 * There is no need for it to be atomic, because nothing is written to 61 * the GHCB between the read and the write of ghcb_active. So it is safe 62 * to use it when a nested #VC exception happens before the write. 63 * 64 * This is necessary for example in the #VC->NMI->#VC case when the NMI 65 * happens while the first #VC handler uses the GHCB. When the NMI code 66 * raises a second #VC handler it might overwrite the contents of the 67 * GHCB written by the first handler. To avoid this the content of the 68 * GHCB is saved and restored when the GHCB is detected to be in use 69 * already. 70 */ 71 bool ghcb_active; 72 bool backup_ghcb_active; 73 74 /* 75 * Cached DR7 value - write it on DR7 writes and return it on reads. 76 * That value will never make it to the real hardware DR7 as debugging 77 * is currently unsupported in SEV-ES guests. 78 */ 79 unsigned long dr7; 80 }; 81 82 struct ghcb_state { 83 struct ghcb *ghcb; 84 }; 85 86 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 87 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); 88 89 /* Needed in vc_early_forward_exception */ 90 void do_early_exception(struct pt_regs *regs, int trapnr); 91 92 static __always_inline bool on_vc_stack(struct pt_regs *regs) 93 { 94 unsigned long sp = regs->sp; 95 96 /* User-mode RSP is not trusted */ 97 if (user_mode(regs)) 98 return false; 99 100 /* SYSCALL gap still has user-mode RSP */ 101 if (ip_within_syscall_gap(regs)) 102 return false; 103 104 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 105 } 106 107 /* 108 * This function handles the case when an NMI is raised in the #VC 109 * exception handler entry code, before the #VC handler has switched off 110 * its IST stack. In this case, the IST entry for #VC must be adjusted, 111 * so that any nested #VC exception will not overwrite the stack 112 * contents of the interrupted #VC handler. 113 * 114 * The IST entry is adjusted unconditionally so that it can be also be 115 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 116 * nested sev_es_ist_exit() call may adjust back the IST entry too 117 * early. 118 * 119 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 120 * on the NMI IST stack, as they are only called from NMI handling code 121 * right now. 122 */ 123 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 124 { 125 unsigned long old_ist, new_ist; 126 127 /* Read old IST entry */ 128 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 129 130 /* 131 * If NMI happened while on the #VC IST stack, set the new IST 132 * value below regs->sp, so that the interrupted stack frame is 133 * not overwritten by subsequent #VC exceptions. 134 */ 135 if (on_vc_stack(regs)) 136 new_ist = regs->sp; 137 138 /* 139 * Reserve additional 8 bytes and store old IST value so this 140 * adjustment can be unrolled in __sev_es_ist_exit(). 141 */ 142 new_ist -= sizeof(old_ist); 143 *(unsigned long *)new_ist = old_ist; 144 145 /* Set new IST entry */ 146 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 147 } 148 149 void noinstr __sev_es_ist_exit(void) 150 { 151 unsigned long ist; 152 153 /* Read IST entry */ 154 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 155 156 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 157 return; 158 159 /* Read back old IST entry and write it to the TSS */ 160 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 161 } 162 163 /* 164 * Nothing shall interrupt this code path while holding the per-CPU 165 * GHCB. The backup GHCB is only for NMIs interrupting this path. 166 * 167 * Callers must disable local interrupts around it. 168 */ 169 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 170 { 171 struct sev_es_runtime_data *data; 172 struct ghcb *ghcb; 173 174 WARN_ON(!irqs_disabled()); 175 176 data = this_cpu_read(runtime_data); 177 ghcb = &data->ghcb_page; 178 179 if (unlikely(data->ghcb_active)) { 180 /* GHCB is already in use - save its contents */ 181 182 if (unlikely(data->backup_ghcb_active)) { 183 /* 184 * Backup-GHCB is also already in use. There is no way 185 * to continue here so just kill the machine. To make 186 * panic() work, mark GHCBs inactive so that messages 187 * can be printed out. 188 */ 189 data->ghcb_active = false; 190 data->backup_ghcb_active = false; 191 192 instrumentation_begin(); 193 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 194 instrumentation_end(); 195 } 196 197 /* Mark backup_ghcb active before writing to it */ 198 data->backup_ghcb_active = true; 199 200 state->ghcb = &data->backup_ghcb; 201 202 /* Backup GHCB content */ 203 *state->ghcb = *ghcb; 204 } else { 205 state->ghcb = NULL; 206 data->ghcb_active = true; 207 } 208 209 return ghcb; 210 } 211 212 /* Needed in vc_early_forward_exception */ 213 void do_early_exception(struct pt_regs *regs, int trapnr); 214 215 static inline u64 sev_es_rd_ghcb_msr(void) 216 { 217 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 218 } 219 220 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 221 { 222 u32 low, high; 223 224 low = (u32)(val); 225 high = (u32)(val >> 32); 226 227 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 228 } 229 230 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 231 unsigned char *buffer) 232 { 233 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 234 } 235 236 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 237 { 238 char buffer[MAX_INSN_SIZE]; 239 int insn_bytes; 240 241 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 242 if (insn_bytes == 0) { 243 /* Nothing could be copied */ 244 ctxt->fi.vector = X86_TRAP_PF; 245 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 246 ctxt->fi.cr2 = ctxt->regs->ip; 247 return ES_EXCEPTION; 248 } else if (insn_bytes == -EINVAL) { 249 /* Effective RIP could not be calculated */ 250 ctxt->fi.vector = X86_TRAP_GP; 251 ctxt->fi.error_code = 0; 252 ctxt->fi.cr2 = 0; 253 return ES_EXCEPTION; 254 } 255 256 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 257 return ES_DECODE_FAILED; 258 259 if (ctxt->insn.immediate.got) 260 return ES_OK; 261 else 262 return ES_DECODE_FAILED; 263 } 264 265 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 266 { 267 char buffer[MAX_INSN_SIZE]; 268 int res, ret; 269 270 res = vc_fetch_insn_kernel(ctxt, buffer); 271 if (res) { 272 ctxt->fi.vector = X86_TRAP_PF; 273 ctxt->fi.error_code = X86_PF_INSTR; 274 ctxt->fi.cr2 = ctxt->regs->ip; 275 return ES_EXCEPTION; 276 } 277 278 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 279 if (ret < 0) 280 return ES_DECODE_FAILED; 281 else 282 return ES_OK; 283 } 284 285 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 286 { 287 if (user_mode(ctxt->regs)) 288 return __vc_decode_user_insn(ctxt); 289 else 290 return __vc_decode_kern_insn(ctxt); 291 } 292 293 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 294 char *dst, char *buf, size_t size) 295 { 296 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 297 char __user *target = (char __user *)dst; 298 u64 d8; 299 u32 d4; 300 u16 d2; 301 u8 d1; 302 303 /* 304 * This function uses __put_user() independent of whether kernel or user 305 * memory is accessed. This works fine because __put_user() does no 306 * sanity checks of the pointer being accessed. All that it does is 307 * to report when the access failed. 308 * 309 * Also, this function runs in atomic context, so __put_user() is not 310 * allowed to sleep. The page-fault handler detects that it is running 311 * in atomic context and will not try to take mmap_sem and handle the 312 * fault, so additional pagefault_enable()/disable() calls are not 313 * needed. 314 * 315 * The access can't be done via copy_to_user() here because 316 * vc_write_mem() must not use string instructions to access unsafe 317 * memory. The reason is that MOVS is emulated by the #VC handler by 318 * splitting the move up into a read and a write and taking a nested #VC 319 * exception on whatever of them is the MMIO access. Using string 320 * instructions here would cause infinite nesting. 321 */ 322 switch (size) { 323 case 1: 324 memcpy(&d1, buf, 1); 325 if (__put_user(d1, target)) 326 goto fault; 327 break; 328 case 2: 329 memcpy(&d2, buf, 2); 330 if (__put_user(d2, target)) 331 goto fault; 332 break; 333 case 4: 334 memcpy(&d4, buf, 4); 335 if (__put_user(d4, target)) 336 goto fault; 337 break; 338 case 8: 339 memcpy(&d8, buf, 8); 340 if (__put_user(d8, target)) 341 goto fault; 342 break; 343 default: 344 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 345 return ES_UNSUPPORTED; 346 } 347 348 return ES_OK; 349 350 fault: 351 if (user_mode(ctxt->regs)) 352 error_code |= X86_PF_USER; 353 354 ctxt->fi.vector = X86_TRAP_PF; 355 ctxt->fi.error_code = error_code; 356 ctxt->fi.cr2 = (unsigned long)dst; 357 358 return ES_EXCEPTION; 359 } 360 361 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 362 char *src, char *buf, size_t size) 363 { 364 unsigned long error_code = X86_PF_PROT; 365 char __user *s = (char __user *)src; 366 u64 d8; 367 u32 d4; 368 u16 d2; 369 u8 d1; 370 371 /* 372 * This function uses __get_user() independent of whether kernel or user 373 * memory is accessed. This works fine because __get_user() does no 374 * sanity checks of the pointer being accessed. All that it does is 375 * to report when the access failed. 376 * 377 * Also, this function runs in atomic context, so __get_user() is not 378 * allowed to sleep. The page-fault handler detects that it is running 379 * in atomic context and will not try to take mmap_sem and handle the 380 * fault, so additional pagefault_enable()/disable() calls are not 381 * needed. 382 * 383 * The access can't be done via copy_from_user() here because 384 * vc_read_mem() must not use string instructions to access unsafe 385 * memory. The reason is that MOVS is emulated by the #VC handler by 386 * splitting the move up into a read and a write and taking a nested #VC 387 * exception on whatever of them is the MMIO access. Using string 388 * instructions here would cause infinite nesting. 389 */ 390 switch (size) { 391 case 1: 392 if (__get_user(d1, s)) 393 goto fault; 394 memcpy(buf, &d1, 1); 395 break; 396 case 2: 397 if (__get_user(d2, s)) 398 goto fault; 399 memcpy(buf, &d2, 2); 400 break; 401 case 4: 402 if (__get_user(d4, s)) 403 goto fault; 404 memcpy(buf, &d4, 4); 405 break; 406 case 8: 407 if (__get_user(d8, s)) 408 goto fault; 409 memcpy(buf, &d8, 8); 410 break; 411 default: 412 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 413 return ES_UNSUPPORTED; 414 } 415 416 return ES_OK; 417 418 fault: 419 if (user_mode(ctxt->regs)) 420 error_code |= X86_PF_USER; 421 422 ctxt->fi.vector = X86_TRAP_PF; 423 ctxt->fi.error_code = error_code; 424 ctxt->fi.cr2 = (unsigned long)src; 425 426 return ES_EXCEPTION; 427 } 428 429 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 430 unsigned long vaddr, phys_addr_t *paddr) 431 { 432 unsigned long va = (unsigned long)vaddr; 433 unsigned int level; 434 phys_addr_t pa; 435 pgd_t *pgd; 436 pte_t *pte; 437 438 pgd = __va(read_cr3_pa()); 439 pgd = &pgd[pgd_index(va)]; 440 pte = lookup_address_in_pgd(pgd, va, &level); 441 if (!pte) { 442 ctxt->fi.vector = X86_TRAP_PF; 443 ctxt->fi.cr2 = vaddr; 444 ctxt->fi.error_code = 0; 445 446 if (user_mode(ctxt->regs)) 447 ctxt->fi.error_code |= X86_PF_USER; 448 449 return ES_EXCEPTION; 450 } 451 452 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 453 /* Emulated MMIO to/from encrypted memory not supported */ 454 return ES_UNSUPPORTED; 455 456 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 457 pa |= va & ~page_level_mask(level); 458 459 *paddr = pa; 460 461 return ES_OK; 462 } 463 464 /* Include code shared with pre-decompression boot stage */ 465 #include "sev-shared.c" 466 467 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 468 { 469 struct sev_es_runtime_data *data; 470 struct ghcb *ghcb; 471 472 WARN_ON(!irqs_disabled()); 473 474 data = this_cpu_read(runtime_data); 475 ghcb = &data->ghcb_page; 476 477 if (state->ghcb) { 478 /* Restore GHCB from Backup */ 479 *ghcb = *state->ghcb; 480 data->backup_ghcb_active = false; 481 state->ghcb = NULL; 482 } else { 483 /* 484 * Invalidate the GHCB so a VMGEXIT instruction issued 485 * from userspace won't appear to be valid. 486 */ 487 vc_ghcb_invalidate(ghcb); 488 data->ghcb_active = false; 489 } 490 } 491 492 void noinstr __sev_es_nmi_complete(void) 493 { 494 struct ghcb_state state; 495 struct ghcb *ghcb; 496 497 ghcb = __sev_get_ghcb(&state); 498 499 vc_ghcb_invalidate(ghcb); 500 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 501 ghcb_set_sw_exit_info_1(ghcb, 0); 502 ghcb_set_sw_exit_info_2(ghcb, 0); 503 504 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 505 VMGEXIT(); 506 507 __sev_put_ghcb(&state); 508 } 509 510 static u64 get_jump_table_addr(void) 511 { 512 struct ghcb_state state; 513 unsigned long flags; 514 struct ghcb *ghcb; 515 u64 ret = 0; 516 517 local_irq_save(flags); 518 519 ghcb = __sev_get_ghcb(&state); 520 521 vc_ghcb_invalidate(ghcb); 522 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 523 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 524 ghcb_set_sw_exit_info_2(ghcb, 0); 525 526 sev_es_wr_ghcb_msr(__pa(ghcb)); 527 VMGEXIT(); 528 529 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 530 ghcb_sw_exit_info_2_is_valid(ghcb)) 531 ret = ghcb->save.sw_exit_info_2; 532 533 __sev_put_ghcb(&state); 534 535 local_irq_restore(flags); 536 537 return ret; 538 } 539 540 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 541 { 542 u16 startup_cs, startup_ip; 543 phys_addr_t jump_table_pa; 544 u64 jump_table_addr; 545 u16 __iomem *jump_table; 546 547 jump_table_addr = get_jump_table_addr(); 548 549 /* On UP guests there is no jump table so this is not a failure */ 550 if (!jump_table_addr) 551 return 0; 552 553 /* Check if AP Jump Table is page-aligned */ 554 if (jump_table_addr & ~PAGE_MASK) 555 return -EINVAL; 556 557 jump_table_pa = jump_table_addr & PAGE_MASK; 558 559 startup_cs = (u16)(rmh->trampoline_start >> 4); 560 startup_ip = (u16)(rmh->sev_es_trampoline_start - 561 rmh->trampoline_start); 562 563 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 564 if (!jump_table) 565 return -EIO; 566 567 writew(startup_ip, &jump_table[0]); 568 writew(startup_cs, &jump_table[1]); 569 570 iounmap(jump_table); 571 572 return 0; 573 } 574 575 /* 576 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 577 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 578 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 579 */ 580 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 581 { 582 struct sev_es_runtime_data *data; 583 unsigned long address, pflags; 584 int cpu; 585 u64 pfn; 586 587 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 588 return 0; 589 590 pflags = _PAGE_NX | _PAGE_RW; 591 592 for_each_possible_cpu(cpu) { 593 data = per_cpu(runtime_data, cpu); 594 595 address = __pa(&data->ghcb_page); 596 pfn = address >> PAGE_SHIFT; 597 598 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 599 return 1; 600 } 601 602 return 0; 603 } 604 605 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 606 { 607 struct pt_regs *regs = ctxt->regs; 608 enum es_result ret; 609 u64 exit_info_1; 610 611 /* Is it a WRMSR? */ 612 exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; 613 614 ghcb_set_rcx(ghcb, regs->cx); 615 if (exit_info_1) { 616 ghcb_set_rax(ghcb, regs->ax); 617 ghcb_set_rdx(ghcb, regs->dx); 618 } 619 620 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR, 621 exit_info_1, 0); 622 623 if ((ret == ES_OK) && (!exit_info_1)) { 624 regs->ax = ghcb->save.rax; 625 regs->dx = ghcb->save.rdx; 626 } 627 628 return ret; 629 } 630 631 /* 632 * This function runs on the first #VC exception after the kernel 633 * switched to virtual addresses. 634 */ 635 static bool __init sev_es_setup_ghcb(void) 636 { 637 /* First make sure the hypervisor talks a supported protocol. */ 638 if (!sev_es_negotiate_protocol()) 639 return false; 640 641 /* 642 * Clear the boot_ghcb. The first exception comes in before the bss 643 * section is cleared. 644 */ 645 memset(&boot_ghcb_page, 0, PAGE_SIZE); 646 647 /* Alright - Make the boot-ghcb public */ 648 boot_ghcb = &boot_ghcb_page; 649 650 return true; 651 } 652 653 #ifdef CONFIG_HOTPLUG_CPU 654 static void sev_es_ap_hlt_loop(void) 655 { 656 struct ghcb_state state; 657 struct ghcb *ghcb; 658 659 ghcb = __sev_get_ghcb(&state); 660 661 while (true) { 662 vc_ghcb_invalidate(ghcb); 663 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 664 ghcb_set_sw_exit_info_1(ghcb, 0); 665 ghcb_set_sw_exit_info_2(ghcb, 0); 666 667 sev_es_wr_ghcb_msr(__pa(ghcb)); 668 VMGEXIT(); 669 670 /* Wakeup signal? */ 671 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 672 ghcb->save.sw_exit_info_2) 673 break; 674 } 675 676 __sev_put_ghcb(&state); 677 } 678 679 /* 680 * Play_dead handler when running under SEV-ES. This is needed because 681 * the hypervisor can't deliver an SIPI request to restart the AP. 682 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 683 * hypervisor wakes it up again. 684 */ 685 static void sev_es_play_dead(void) 686 { 687 play_dead_common(); 688 689 /* IRQs now disabled */ 690 691 sev_es_ap_hlt_loop(); 692 693 /* 694 * If we get here, the VCPU was woken up again. Jump to CPU 695 * startup code to get it back online. 696 */ 697 start_cpu0(); 698 } 699 #else /* CONFIG_HOTPLUG_CPU */ 700 #define sev_es_play_dead native_play_dead 701 #endif /* CONFIG_HOTPLUG_CPU */ 702 703 #ifdef CONFIG_SMP 704 static void __init sev_es_setup_play_dead(void) 705 { 706 smp_ops.play_dead = sev_es_play_dead; 707 } 708 #else 709 static inline void sev_es_setup_play_dead(void) { } 710 #endif 711 712 static void __init alloc_runtime_data(int cpu) 713 { 714 struct sev_es_runtime_data *data; 715 716 data = memblock_alloc(sizeof(*data), PAGE_SIZE); 717 if (!data) 718 panic("Can't allocate SEV-ES runtime data"); 719 720 per_cpu(runtime_data, cpu) = data; 721 } 722 723 static void __init init_ghcb(int cpu) 724 { 725 struct sev_es_runtime_data *data; 726 int err; 727 728 data = per_cpu(runtime_data, cpu); 729 730 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 731 sizeof(data->ghcb_page)); 732 if (err) 733 panic("Can't map GHCBs unencrypted"); 734 735 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 736 737 data->ghcb_active = false; 738 data->backup_ghcb_active = false; 739 } 740 741 void __init sev_es_init_vc_handling(void) 742 { 743 int cpu; 744 745 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 746 747 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 748 return; 749 750 if (!sev_es_check_cpu_features()) 751 panic("SEV-ES CPU Features missing"); 752 753 /* Enable SEV-ES special handling */ 754 static_branch_enable(&sev_es_enable_key); 755 756 /* Initialize per-cpu GHCB pages */ 757 for_each_possible_cpu(cpu) { 758 alloc_runtime_data(cpu); 759 init_ghcb(cpu); 760 } 761 762 sev_es_setup_play_dead(); 763 764 /* Secondary CPUs use the runtime #VC handler */ 765 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 766 } 767 768 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 769 { 770 int trapnr = ctxt->fi.vector; 771 772 if (trapnr == X86_TRAP_PF) 773 native_write_cr2(ctxt->fi.cr2); 774 775 ctxt->regs->orig_ax = ctxt->fi.error_code; 776 do_early_exception(ctxt->regs, trapnr); 777 } 778 779 static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) 780 { 781 long *reg_array; 782 int offset; 783 784 reg_array = (long *)ctxt->regs; 785 offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); 786 787 if (offset < 0) 788 return NULL; 789 790 offset /= sizeof(long); 791 792 return reg_array + offset; 793 } 794 795 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 796 { 797 long *reg_array; 798 int offset; 799 800 reg_array = (long *)ctxt->regs; 801 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 802 803 if (offset < 0) 804 return NULL; 805 806 offset /= sizeof(long); 807 808 return reg_array + offset; 809 } 810 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 811 unsigned int bytes, bool read) 812 { 813 u64 exit_code, exit_info_1, exit_info_2; 814 unsigned long ghcb_pa = __pa(ghcb); 815 enum es_result res; 816 phys_addr_t paddr; 817 void __user *ref; 818 819 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 820 if (ref == (void __user *)-1L) 821 return ES_UNSUPPORTED; 822 823 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 824 825 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 826 if (res != ES_OK) { 827 if (res == ES_EXCEPTION && !read) 828 ctxt->fi.error_code |= X86_PF_WRITE; 829 830 return res; 831 } 832 833 exit_info_1 = paddr; 834 /* Can never be greater than 8 */ 835 exit_info_2 = bytes; 836 837 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 838 839 return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2); 840 } 841 842 static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, 843 struct es_em_ctxt *ctxt) 844 { 845 struct insn *insn = &ctxt->insn; 846 unsigned int bytes = 0; 847 enum es_result ret; 848 int sign_byte; 849 long *reg_data; 850 851 switch (insn->opcode.bytes[1]) { 852 /* MMIO Read w/ zero-extension */ 853 case 0xb6: 854 bytes = 1; 855 fallthrough; 856 case 0xb7: 857 if (!bytes) 858 bytes = 2; 859 860 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 861 if (ret) 862 break; 863 864 /* Zero extend based on operand size */ 865 reg_data = vc_insn_get_reg(ctxt); 866 if (!reg_data) 867 return ES_DECODE_FAILED; 868 869 memset(reg_data, 0, insn->opnd_bytes); 870 871 memcpy(reg_data, ghcb->shared_buffer, bytes); 872 break; 873 874 /* MMIO Read w/ sign-extension */ 875 case 0xbe: 876 bytes = 1; 877 fallthrough; 878 case 0xbf: 879 if (!bytes) 880 bytes = 2; 881 882 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 883 if (ret) 884 break; 885 886 /* Sign extend based on operand size */ 887 reg_data = vc_insn_get_reg(ctxt); 888 if (!reg_data) 889 return ES_DECODE_FAILED; 890 891 if (bytes == 1) { 892 u8 *val = (u8 *)ghcb->shared_buffer; 893 894 sign_byte = (*val & 0x80) ? 0xff : 0x00; 895 } else { 896 u16 *val = (u16 *)ghcb->shared_buffer; 897 898 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 899 } 900 memset(reg_data, sign_byte, insn->opnd_bytes); 901 902 memcpy(reg_data, ghcb->shared_buffer, bytes); 903 break; 904 905 default: 906 ret = ES_UNSUPPORTED; 907 } 908 909 return ret; 910 } 911 912 /* 913 * The MOVS instruction has two memory operands, which raises the 914 * problem that it is not known whether the access to the source or the 915 * destination caused the #VC exception (and hence whether an MMIO read 916 * or write operation needs to be emulated). 917 * 918 * Instead of playing games with walking page-tables and trying to guess 919 * whether the source or destination is an MMIO range, split the move 920 * into two operations, a read and a write with only one memory operand. 921 * This will cause a nested #VC exception on the MMIO address which can 922 * then be handled. 923 * 924 * This implementation has the benefit that it also supports MOVS where 925 * source _and_ destination are MMIO regions. 926 * 927 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 928 * rare operation. If it turns out to be a performance problem the split 929 * operations can be moved to memcpy_fromio() and memcpy_toio(). 930 */ 931 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 932 unsigned int bytes) 933 { 934 unsigned long ds_base, es_base; 935 unsigned char *src, *dst; 936 unsigned char buffer[8]; 937 enum es_result ret; 938 bool rep; 939 int off; 940 941 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 942 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 943 944 if (ds_base == -1L || es_base == -1L) { 945 ctxt->fi.vector = X86_TRAP_GP; 946 ctxt->fi.error_code = 0; 947 return ES_EXCEPTION; 948 } 949 950 src = ds_base + (unsigned char *)ctxt->regs->si; 951 dst = es_base + (unsigned char *)ctxt->regs->di; 952 953 ret = vc_read_mem(ctxt, src, buffer, bytes); 954 if (ret != ES_OK) 955 return ret; 956 957 ret = vc_write_mem(ctxt, dst, buffer, bytes); 958 if (ret != ES_OK) 959 return ret; 960 961 if (ctxt->regs->flags & X86_EFLAGS_DF) 962 off = -bytes; 963 else 964 off = bytes; 965 966 ctxt->regs->si += off; 967 ctxt->regs->di += off; 968 969 rep = insn_has_rep_prefix(&ctxt->insn); 970 if (rep) 971 ctxt->regs->cx -= 1; 972 973 if (!rep || ctxt->regs->cx == 0) 974 return ES_OK; 975 else 976 return ES_RETRY; 977 } 978 979 static enum es_result vc_handle_mmio(struct ghcb *ghcb, 980 struct es_em_ctxt *ctxt) 981 { 982 struct insn *insn = &ctxt->insn; 983 unsigned int bytes = 0; 984 enum es_result ret; 985 long *reg_data; 986 987 switch (insn->opcode.bytes[0]) { 988 /* MMIO Write */ 989 case 0x88: 990 bytes = 1; 991 fallthrough; 992 case 0x89: 993 if (!bytes) 994 bytes = insn->opnd_bytes; 995 996 reg_data = vc_insn_get_reg(ctxt); 997 if (!reg_data) 998 return ES_DECODE_FAILED; 999 1000 memcpy(ghcb->shared_buffer, reg_data, bytes); 1001 1002 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1003 break; 1004 1005 case 0xc6: 1006 bytes = 1; 1007 fallthrough; 1008 case 0xc7: 1009 if (!bytes) 1010 bytes = insn->opnd_bytes; 1011 1012 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1013 1014 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1015 break; 1016 1017 /* MMIO Read */ 1018 case 0x8a: 1019 bytes = 1; 1020 fallthrough; 1021 case 0x8b: 1022 if (!bytes) 1023 bytes = insn->opnd_bytes; 1024 1025 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1026 if (ret) 1027 break; 1028 1029 reg_data = vc_insn_get_reg(ctxt); 1030 if (!reg_data) 1031 return ES_DECODE_FAILED; 1032 1033 /* Zero-extend for 32-bit operation */ 1034 if (bytes == 4) 1035 *reg_data = 0; 1036 1037 memcpy(reg_data, ghcb->shared_buffer, bytes); 1038 break; 1039 1040 /* MOVS instruction */ 1041 case 0xa4: 1042 bytes = 1; 1043 fallthrough; 1044 case 0xa5: 1045 if (!bytes) 1046 bytes = insn->opnd_bytes; 1047 1048 ret = vc_handle_mmio_movs(ctxt, bytes); 1049 break; 1050 /* Two-Byte Opcodes */ 1051 case 0x0f: 1052 ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); 1053 break; 1054 default: 1055 ret = ES_UNSUPPORTED; 1056 } 1057 1058 return ret; 1059 } 1060 1061 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1062 struct es_em_ctxt *ctxt) 1063 { 1064 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1065 long val, *reg = vc_insn_get_rm(ctxt); 1066 enum es_result ret; 1067 1068 if (!reg) 1069 return ES_DECODE_FAILED; 1070 1071 val = *reg; 1072 1073 /* Upper 32 bits must be written as zeroes */ 1074 if (val >> 32) { 1075 ctxt->fi.vector = X86_TRAP_GP; 1076 ctxt->fi.error_code = 0; 1077 return ES_EXCEPTION; 1078 } 1079 1080 /* Clear out other reserved bits and set bit 10 */ 1081 val = (val & 0xffff23ffL) | BIT(10); 1082 1083 /* Early non-zero writes to DR7 are not supported */ 1084 if (!data && (val & ~DR7_RESET_VALUE)) 1085 return ES_UNSUPPORTED; 1086 1087 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1088 ghcb_set_rax(ghcb, val); 1089 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1090 if (ret != ES_OK) 1091 return ret; 1092 1093 if (data) 1094 data->dr7 = val; 1095 1096 return ES_OK; 1097 } 1098 1099 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1100 struct es_em_ctxt *ctxt) 1101 { 1102 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1103 long *reg = vc_insn_get_rm(ctxt); 1104 1105 if (!reg) 1106 return ES_DECODE_FAILED; 1107 1108 if (data) 1109 *reg = data->dr7; 1110 else 1111 *reg = DR7_RESET_VALUE; 1112 1113 return ES_OK; 1114 } 1115 1116 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1117 struct es_em_ctxt *ctxt) 1118 { 1119 return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0); 1120 } 1121 1122 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1123 { 1124 enum es_result ret; 1125 1126 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1127 1128 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0); 1129 if (ret != ES_OK) 1130 return ret; 1131 1132 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 1133 return ES_VMM_ERROR; 1134 1135 ctxt->regs->ax = ghcb->save.rax; 1136 ctxt->regs->dx = ghcb->save.rdx; 1137 1138 return ES_OK; 1139 } 1140 1141 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 1142 struct es_em_ctxt *ctxt) 1143 { 1144 /* 1145 * Treat it as a NOP and do not leak a physical address to the 1146 * hypervisor. 1147 */ 1148 return ES_OK; 1149 } 1150 1151 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 1152 struct es_em_ctxt *ctxt) 1153 { 1154 /* Treat the same as MONITOR/MONITORX */ 1155 return ES_OK; 1156 } 1157 1158 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 1159 struct es_em_ctxt *ctxt) 1160 { 1161 enum es_result ret; 1162 1163 ghcb_set_rax(ghcb, ctxt->regs->ax); 1164 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 1165 1166 if (x86_platform.hyper.sev_es_hcall_prepare) 1167 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 1168 1169 ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0); 1170 if (ret != ES_OK) 1171 return ret; 1172 1173 if (!ghcb_rax_is_valid(ghcb)) 1174 return ES_VMM_ERROR; 1175 1176 ctxt->regs->ax = ghcb->save.rax; 1177 1178 /* 1179 * Call sev_es_hcall_finish() after regs->ax is already set. 1180 * This allows the hypervisor handler to overwrite it again if 1181 * necessary. 1182 */ 1183 if (x86_platform.hyper.sev_es_hcall_finish && 1184 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 1185 return ES_VMM_ERROR; 1186 1187 return ES_OK; 1188 } 1189 1190 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 1191 struct es_em_ctxt *ctxt) 1192 { 1193 /* 1194 * Calling ecx_alignment_check() directly does not work, because it 1195 * enables IRQs and the GHCB is active. Forward the exception and call 1196 * it later from vc_forward_exception(). 1197 */ 1198 ctxt->fi.vector = X86_TRAP_AC; 1199 ctxt->fi.error_code = 0; 1200 return ES_EXCEPTION; 1201 } 1202 1203 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 1204 struct ghcb *ghcb, 1205 unsigned long exit_code) 1206 { 1207 enum es_result result; 1208 1209 switch (exit_code) { 1210 case SVM_EXIT_READ_DR7: 1211 result = vc_handle_dr7_read(ghcb, ctxt); 1212 break; 1213 case SVM_EXIT_WRITE_DR7: 1214 result = vc_handle_dr7_write(ghcb, ctxt); 1215 break; 1216 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 1217 result = vc_handle_trap_ac(ghcb, ctxt); 1218 break; 1219 case SVM_EXIT_RDTSC: 1220 case SVM_EXIT_RDTSCP: 1221 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 1222 break; 1223 case SVM_EXIT_RDPMC: 1224 result = vc_handle_rdpmc(ghcb, ctxt); 1225 break; 1226 case SVM_EXIT_INVD: 1227 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 1228 result = ES_UNSUPPORTED; 1229 break; 1230 case SVM_EXIT_CPUID: 1231 result = vc_handle_cpuid(ghcb, ctxt); 1232 break; 1233 case SVM_EXIT_IOIO: 1234 result = vc_handle_ioio(ghcb, ctxt); 1235 break; 1236 case SVM_EXIT_MSR: 1237 result = vc_handle_msr(ghcb, ctxt); 1238 break; 1239 case SVM_EXIT_VMMCALL: 1240 result = vc_handle_vmmcall(ghcb, ctxt); 1241 break; 1242 case SVM_EXIT_WBINVD: 1243 result = vc_handle_wbinvd(ghcb, ctxt); 1244 break; 1245 case SVM_EXIT_MONITOR: 1246 result = vc_handle_monitor(ghcb, ctxt); 1247 break; 1248 case SVM_EXIT_MWAIT: 1249 result = vc_handle_mwait(ghcb, ctxt); 1250 break; 1251 case SVM_EXIT_NPF: 1252 result = vc_handle_mmio(ghcb, ctxt); 1253 break; 1254 default: 1255 /* 1256 * Unexpected #VC exception 1257 */ 1258 result = ES_UNSUPPORTED; 1259 } 1260 1261 return result; 1262 } 1263 1264 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 1265 { 1266 long error_code = ctxt->fi.error_code; 1267 int trapnr = ctxt->fi.vector; 1268 1269 ctxt->regs->orig_ax = ctxt->fi.error_code; 1270 1271 switch (trapnr) { 1272 case X86_TRAP_GP: 1273 exc_general_protection(ctxt->regs, error_code); 1274 break; 1275 case X86_TRAP_UD: 1276 exc_invalid_op(ctxt->regs); 1277 break; 1278 case X86_TRAP_PF: 1279 write_cr2(ctxt->fi.cr2); 1280 exc_page_fault(ctxt->regs, error_code); 1281 break; 1282 case X86_TRAP_AC: 1283 exc_alignment_check(ctxt->regs, error_code); 1284 break; 1285 default: 1286 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 1287 BUG(); 1288 } 1289 } 1290 1291 static __always_inline bool is_vc2_stack(unsigned long sp) 1292 { 1293 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 1294 } 1295 1296 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 1297 { 1298 unsigned long sp, prev_sp; 1299 1300 sp = (unsigned long)regs; 1301 prev_sp = regs->sp; 1302 1303 /* 1304 * If the code was already executing on the VC2 stack when the #VC 1305 * happened, let it proceed to the normal handling routine. This way the 1306 * code executing on the VC2 stack can cause #VC exceptions to get handled. 1307 */ 1308 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 1309 } 1310 1311 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 1312 { 1313 struct ghcb_state state; 1314 struct es_em_ctxt ctxt; 1315 enum es_result result; 1316 struct ghcb *ghcb; 1317 bool ret = true; 1318 1319 ghcb = __sev_get_ghcb(&state); 1320 1321 vc_ghcb_invalidate(ghcb); 1322 result = vc_init_em_ctxt(&ctxt, regs, error_code); 1323 1324 if (result == ES_OK) 1325 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 1326 1327 __sev_put_ghcb(&state); 1328 1329 /* Done - now check the result */ 1330 switch (result) { 1331 case ES_OK: 1332 vc_finish_insn(&ctxt); 1333 break; 1334 case ES_UNSUPPORTED: 1335 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 1336 error_code, regs->ip); 1337 ret = false; 1338 break; 1339 case ES_VMM_ERROR: 1340 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1341 error_code, regs->ip); 1342 ret = false; 1343 break; 1344 case ES_DECODE_FAILED: 1345 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1346 error_code, regs->ip); 1347 ret = false; 1348 break; 1349 case ES_EXCEPTION: 1350 vc_forward_exception(&ctxt); 1351 break; 1352 case ES_RETRY: 1353 /* Nothing to do */ 1354 break; 1355 default: 1356 pr_emerg("Unknown result in %s():%d\n", __func__, result); 1357 /* 1358 * Emulating the instruction which caused the #VC exception 1359 * failed - can't continue so print debug information 1360 */ 1361 BUG(); 1362 } 1363 1364 return ret; 1365 } 1366 1367 static __always_inline bool vc_is_db(unsigned long error_code) 1368 { 1369 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 1370 } 1371 1372 /* 1373 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 1374 * and will panic when an error happens. 1375 */ 1376 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 1377 { 1378 irqentry_state_t irq_state; 1379 1380 /* 1381 * With the current implementation it is always possible to switch to a 1382 * safe stack because #VC exceptions only happen at known places, like 1383 * intercepted instructions or accesses to MMIO areas/IO ports. They can 1384 * also happen with code instrumentation when the hypervisor intercepts 1385 * #DB, but the critical paths are forbidden to be instrumented, so #DB 1386 * exceptions currently also only happen in safe places. 1387 * 1388 * But keep this here in case the noinstr annotations are violated due 1389 * to bug elsewhere. 1390 */ 1391 if (unlikely(vc_from_invalid_context(regs))) { 1392 instrumentation_begin(); 1393 panic("Can't handle #VC exception from unsupported context\n"); 1394 instrumentation_end(); 1395 } 1396 1397 /* 1398 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1399 */ 1400 if (vc_is_db(error_code)) { 1401 exc_debug(regs); 1402 return; 1403 } 1404 1405 irq_state = irqentry_nmi_enter(regs); 1406 1407 instrumentation_begin(); 1408 1409 if (!vc_raw_handle_exception(regs, error_code)) { 1410 /* Show some debug info */ 1411 show_regs(regs); 1412 1413 /* Ask hypervisor to sev_es_terminate */ 1414 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1415 1416 /* If that fails and we get here - just panic */ 1417 panic("Returned from Terminate-Request to Hypervisor\n"); 1418 } 1419 1420 instrumentation_end(); 1421 irqentry_nmi_exit(regs, irq_state); 1422 } 1423 1424 /* 1425 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 1426 * and will kill the current task with SIGBUS when an error happens. 1427 */ 1428 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 1429 { 1430 /* 1431 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 1432 */ 1433 if (vc_is_db(error_code)) { 1434 noist_exc_debug(regs); 1435 return; 1436 } 1437 1438 irqentry_enter_from_user_mode(regs); 1439 instrumentation_begin(); 1440 1441 if (!vc_raw_handle_exception(regs, error_code)) { 1442 /* 1443 * Do not kill the machine if user-space triggered the 1444 * exception. Send SIGBUS instead and let user-space deal with 1445 * it. 1446 */ 1447 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 1448 } 1449 1450 instrumentation_end(); 1451 irqentry_exit_to_user_mode(regs); 1452 } 1453 1454 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 1455 { 1456 unsigned long exit_code = regs->orig_ax; 1457 struct es_em_ctxt ctxt; 1458 enum es_result result; 1459 1460 /* Do initial setup or terminate the guest */ 1461 if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) 1462 sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); 1463 1464 vc_ghcb_invalidate(boot_ghcb); 1465 1466 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 1467 if (result == ES_OK) 1468 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 1469 1470 /* Done - now check the result */ 1471 switch (result) { 1472 case ES_OK: 1473 vc_finish_insn(&ctxt); 1474 break; 1475 case ES_UNSUPPORTED: 1476 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 1477 exit_code, regs->ip); 1478 goto fail; 1479 case ES_VMM_ERROR: 1480 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 1481 exit_code, regs->ip); 1482 goto fail; 1483 case ES_DECODE_FAILED: 1484 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 1485 exit_code, regs->ip); 1486 goto fail; 1487 case ES_EXCEPTION: 1488 vc_early_forward_exception(&ctxt); 1489 break; 1490 case ES_RETRY: 1491 /* Nothing to do */ 1492 break; 1493 default: 1494 BUG(); 1495 } 1496 1497 return true; 1498 1499 fail: 1500 show_regs(regs); 1501 1502 while (true) 1503 halt(); 1504 } 1505