1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 29e9b4573SThomas Gleixner/* 39e9b4573SThomas Gleixner * linux/boot/head.S 49e9b4573SThomas Gleixner * 59e9b4573SThomas Gleixner * Copyright (C) 1991, 1992, 1993 Linus Torvalds 69e9b4573SThomas Gleixner */ 79e9b4573SThomas Gleixner 89e9b4573SThomas Gleixner/* 99e9b4573SThomas Gleixner * head.S contains the 32-bit startup code. 109e9b4573SThomas Gleixner * 119e9b4573SThomas Gleixner * NOTE!!! Startup happens at absolute address 0x00001000, which is also where 129e9b4573SThomas Gleixner * the page directory will exist. The startup code will be overwritten by 139e9b4573SThomas Gleixner * the page directory. [According to comments etc elsewhere on a compressed 149e9b4573SThomas Gleixner * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] 159e9b4573SThomas Gleixner * 169e9b4573SThomas Gleixner * Page 0 is deliberately kept safe, since System Management Mode code in 179e9b4573SThomas Gleixner * laptops may need to access the BIOS data stored there. This is also 189e9b4573SThomas Gleixner * useful for future device drivers that either access the BIOS via VM86 199e9b4573SThomas Gleixner * mode. 209e9b4573SThomas Gleixner */ 219e9b4573SThomas Gleixner 229e9b4573SThomas Gleixner/* 239e9b4573SThomas Gleixner * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 249e9b4573SThomas Gleixner */ 259e9b4573SThomas Gleixner .code32 269e9b4573SThomas Gleixner .text 279e9b4573SThomas Gleixner 281dc818c1STim Abbott#include <linux/init.h> 299e9b4573SThomas Gleixner#include <linux/linkage.h> 309e9b4573SThomas Gleixner#include <asm/segment.h> 317c539764SAlexander van Heukelum#include <asm/boot.h> 329e9b4573SThomas Gleixner#include <asm/msr.h> 33e83e31f4SCyrill Gorcunov#include <asm/processor-flags.h> 34bd53147dSEric W. Biederman#include <asm/asm-offsets.h> 35fb148d83SAlexander Kuleshov#include <asm/bootparam.h> 3664e68263SJoerg Roedel#include <asm/desc_defs.h> 371ccdbf74SJoerg Roedel#include <asm/trapnr.h> 38f7ff53e4SKirill A. Shutemov#include "pgtable.h" 399e9b4573SThomas Gleixner 406d92bc9dSH.J. Lu/* 41d49a0626SPeter Zijlstra * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result 42d49a0626SPeter Zijlstra * in assembly errors due to trying to move .org backward due to the excessive 43d49a0626SPeter Zijlstra * alignment. 44d49a0626SPeter Zijlstra */ 45d49a0626SPeter Zijlstra#undef __ALIGN 46d49a0626SPeter Zijlstra#define __ALIGN .balign 16, 0x90 47d49a0626SPeter Zijlstra 48d49a0626SPeter Zijlstra/* 496d92bc9dSH.J. Lu * Locally defined symbols should be marked hidden: 506d92bc9dSH.J. Lu */ 516d92bc9dSH.J. Lu .hidden _bss 526d92bc9dSH.J. Lu .hidden _ebss 535214028dSArvind Sankar .hidden _end 546d92bc9dSH.J. Lu 551dc818c1STim Abbott __HEAD 56a2c4fc4dSArvind Sankar 57a2c4fc4dSArvind Sankar/* 58a2c4fc4dSArvind Sankar * This macro gives the relative virtual address of X, i.e. the offset of X 59a2c4fc4dSArvind Sankar * from startup_32. This is the same as the link-time virtual address of X, 60a2c4fc4dSArvind Sankar * since startup_32 is at 0, but defining it this way tells the 61a2c4fc4dSArvind Sankar * assembler/linker that we do not want the actual run-time address of X. This 62a2c4fc4dSArvind Sankar * prevents the linker from trying to create unwanted run-time relocation 63a2c4fc4dSArvind Sankar * entries for the reference when the compressed kernel is linked as PIE. 64a2c4fc4dSArvind Sankar * 65a2c4fc4dSArvind Sankar * A reference X(%reg) will result in the link-time VA of X being stored with 66a2c4fc4dSArvind Sankar * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that 67a2c4fc4dSArvind Sankar * adds the 64-bit base address where the kernel is loaded. 68a2c4fc4dSArvind Sankar * 69a2c4fc4dSArvind Sankar * Replacing it with (X-startup_32)(%reg) results in the offset being stored, 70a2c4fc4dSArvind Sankar * and no run-time relocation. 71a2c4fc4dSArvind Sankar * 72a2c4fc4dSArvind Sankar * The macro should be used as a displacement with a base register containing 73a2c4fc4dSArvind Sankar * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate 74a2c4fc4dSArvind Sankar * [$ rva(X)]. 75a2c4fc4dSArvind Sankar * 76a2c4fc4dSArvind Sankar * This macro can only be used from within the .head.text section, since the 77a2c4fc4dSArvind Sankar * expression requires startup_32 to be in the same section as the code being 78a2c4fc4dSArvind Sankar * assembled. 79a2c4fc4dSArvind Sankar */ 80a2c4fc4dSArvind Sankar#define rva(X) ((X) - startup_32) 81a2c4fc4dSArvind Sankar 829e9b4573SThomas Gleixner .code32 836dcc5627SJiri SlabySYM_FUNC_START(startup_32) 848ee2f2dfSYinghai Lu /* 858ee2f2dfSYinghai Lu * 32bit entry is 0 and it is ABI so immutable! 868ee2f2dfSYinghai Lu * If we come here directly from a bootloader, 878ee2f2dfSYinghai Lu * kernel(text+data+bss+brk) ramdisk, zero_page, command line 888ee2f2dfSYinghai Lu * all need to be under the 4G limit. 898ee2f2dfSYinghai Lu */ 909e9b4573SThomas Gleixner cld 919e9b4573SThomas Gleixner cli 929e9b4573SThomas Gleixner 93b40d68d5SH. Peter Anvin/* 94b40d68d5SH. Peter Anvin * Calculate the delta between where we were compiled to run 959e9b4573SThomas Gleixner * at and where we were actually loaded at. This can only be done 969e9b4573SThomas Gleixner * with a short local call on x86. Nothing else will tell us what 979e9b4573SThomas Gleixner * address we are running at. The reserved chunk of the real-mode 989e9b4573SThomas Gleixner * data at 0x1e4 (defined as a scratch field) are used as the stack 999e9b4573SThomas Gleixner * for this calculation. Only 4 bytes are needed. 1009e9b4573SThomas Gleixner */ 101bd2a3698SH. Peter Anvin leal (BP_scratch+4)(%esi), %esp 1029e9b4573SThomas Gleixner call 1f 1039e9b4573SThomas Gleixner1: popl %ebp 104a2c4fc4dSArvind Sankar subl $ rva(1b), %ebp 1059e9b4573SThomas Gleixner 10690ff2262SArvind Sankar /* Load new GDT with the 64bit segments using 32bit descriptor */ 107a2c4fc4dSArvind Sankar leal rva(gdt)(%ebp), %eax 1088a3abe30SArvind Sankar movl %eax, 2(%eax) 1098a3abe30SArvind Sankar lgdt (%eax) 11090ff2262SArvind Sankar 11190ff2262SArvind Sankar /* Load segment registers with our descriptors */ 11290ff2262SArvind Sankar movl $__BOOT_DS, %eax 11390ff2262SArvind Sankar movl %eax, %ds 11490ff2262SArvind Sankar movl %eax, %es 11590ff2262SArvind Sankar movl %eax, %fs 11690ff2262SArvind Sankar movl %eax, %gs 11790ff2262SArvind Sankar movl %eax, %ss 11890ff2262SArvind Sankar 1190c289ff8SJoerg Roedel /* Setup a stack and load CS from current GDT */ 120a2c4fc4dSArvind Sankar leal rva(boot_stack_end)(%ebp), %esp 1219e9b4573SThomas Gleixner 1220c289ff8SJoerg Roedel pushl $__KERNEL32_CS 1230c289ff8SJoerg Roedel leal rva(1f)(%ebp), %eax 1240c289ff8SJoerg Roedel pushl %eax 1250c289ff8SJoerg Roedel lretl 1260c289ff8SJoerg Roedel1: 1270c289ff8SJoerg Roedel 12879419e13SJoerg Roedel /* Setup Exception handling for SEV-ES */ 129c6355995SArd Biesheuvel#ifdef CONFIG_AMD_MEM_ENCRYPT 13079419e13SJoerg Roedel call startup32_load_idt 131c6355995SArd Biesheuvel#endif 13279419e13SJoerg Roedel 1330c289ff8SJoerg Roedel /* Make sure cpu supports long mode. */ 1349e9b4573SThomas Gleixner call verify_cpu 1359e9b4573SThomas Gleixner testl %eax, %eax 13698ededb6SJiri Slaby jnz .Lno_longmode 1379e9b4573SThomas Gleixner 138b40d68d5SH. Peter Anvin/* 139b40d68d5SH. Peter Anvin * Compute the delta between where we were compiled to run at 1409e9b4573SThomas Gleixner * and where the code will actually run at. 141b40d68d5SH. Peter Anvin * 142b40d68d5SH. Peter Anvin * %ebp contains the address we are loaded at by the boot loader and %ebx 1439e9b4573SThomas Gleixner * contains the address where we should move the kernel image temporarily 1449e9b4573SThomas Gleixner * for safe in-place decompression. 1459e9b4573SThomas Gleixner */ 1469e9b4573SThomas Gleixner 1479e9b4573SThomas Gleixner#ifdef CONFIG_RELOCATABLE 1489e9b4573SThomas Gleixner movl %ebp, %ebx 14937ba7ab5SH. Peter Anvin movl BP_kernel_alignment(%esi), %eax 15037ba7ab5SH. Peter Anvin decl %eax 15137ba7ab5SH. Peter Anvin addl %eax, %ebx 15237ba7ab5SH. Peter Anvin notl %eax 15337ba7ab5SH. Peter Anvin andl %eax, %ebx 1548ab3820fSKees Cook cmpl $LOAD_PHYSICAL_ADDR, %ebx 15581a34892SArvind Sankar jae 1f 1569e9b4573SThomas Gleixner#endif 1578ab3820fSKees Cook movl $LOAD_PHYSICAL_ADDR, %ebx 1588ab3820fSKees Cook1: 1599e9b4573SThomas Gleixner 16002a884c0SH. Peter Anvin /* Target address to relocate to for decompression */ 1611887c9b6SArvind Sankar addl BP_init_size(%esi), %ebx 162a2c4fc4dSArvind Sankar subl $ rva(_end), %ebx 1639e9b4573SThomas Gleixner 1649e9b4573SThomas Gleixner/* 1659e9b4573SThomas Gleixner * Prepare for entering 64 bit mode 1669e9b4573SThomas Gleixner */ 1679e9b4573SThomas Gleixner 1689e9b4573SThomas Gleixner /* Enable PAE mode */ 169108d3f44SMatt Fleming movl %cr4, %eax 170108d3f44SMatt Fleming orl $X86_CR4_PAE, %eax 1719e9b4573SThomas Gleixner movl %eax, %cr4 1729e9b4573SThomas Gleixner 1739e9b4573SThomas Gleixner /* 1749e9b4573SThomas Gleixner * Build early 4G boot pagetable 1759e9b4573SThomas Gleixner */ 1761958b5fcSTom Lendacky /* 1771958b5fcSTom Lendacky * If SEV is active then set the encryption mask in the page tables. 17830c9ca16SArd Biesheuvel * This will ensure that when the kernel is copied and decompressed 1791958b5fcSTom Lendacky * it will be done so encrypted. 1801958b5fcSTom Lendacky */ 1811958b5fcSTom Lendacky xorl %edx, %edx 182fef81c86SJoerg Roedel#ifdef CONFIG_AMD_MEM_ENCRYPT 18330c9ca16SArd Biesheuvel call get_sev_encryption_bit 18430c9ca16SArd Biesheuvel xorl %edx, %edx 1851958b5fcSTom Lendacky testl %eax, %eax 1861958b5fcSTom Lendacky jz 1f 1871958b5fcSTom Lendacky subl $32, %eax /* Encryption bit is always above bit 31 */ 1881958b5fcSTom Lendacky bts %eax, %edx /* Set encryption mask for page tables */ 189fef81c86SJoerg Roedel /* 190ec1c66afSMichael Roth * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that 191ec1c66afSMichael Roth * startup32_check_sev_cbit() will do a check. sev_enable() will 192ec1c66afSMichael Roth * initialize sev_status with all the bits reported by 193ec1c66afSMichael Roth * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT 194ec1c66afSMichael Roth * needs to be set for now. 195fef81c86SJoerg Roedel */ 196fef81c86SJoerg Roedel movl $1, rva(sev_status)(%ebp) 1971958b5fcSTom Lendacky1: 198fef81c86SJoerg Roedel#endif 1991958b5fcSTom Lendacky 2009e9b4573SThomas Gleixner /* Initialize Page tables to 0 */ 201a2c4fc4dSArvind Sankar leal rva(pgtable)(%ebx), %edi 2029e9b4573SThomas Gleixner xorl %eax, %eax 2033a94707dSKees Cook movl $(BOOT_INIT_PGT_SIZE/4), %ecx 2049e9b4573SThomas Gleixner rep stosl 2059e9b4573SThomas Gleixner 2069e9b4573SThomas Gleixner /* Build Level 4 */ 207a2c4fc4dSArvind Sankar leal rva(pgtable + 0)(%ebx), %edi 2089e9b4573SThomas Gleixner leal 0x1007 (%edi), %eax 2099e9b4573SThomas Gleixner movl %eax, 0(%edi) 2101958b5fcSTom Lendacky addl %edx, 4(%edi) 2119e9b4573SThomas Gleixner 2129e9b4573SThomas Gleixner /* Build Level 3 */ 213a2c4fc4dSArvind Sankar leal rva(pgtable + 0x1000)(%ebx), %edi 2149e9b4573SThomas Gleixner leal 0x1007(%edi), %eax 2159e9b4573SThomas Gleixner movl $4, %ecx 2169e9b4573SThomas Gleixner1: movl %eax, 0x00(%edi) 2171958b5fcSTom Lendacky addl %edx, 0x04(%edi) 2189e9b4573SThomas Gleixner addl $0x00001000, %eax 2199e9b4573SThomas Gleixner addl $8, %edi 2209e9b4573SThomas Gleixner decl %ecx 2219e9b4573SThomas Gleixner jnz 1b 2229e9b4573SThomas Gleixner 2239e9b4573SThomas Gleixner /* Build Level 2 */ 224a2c4fc4dSArvind Sankar leal rva(pgtable + 0x2000)(%ebx), %edi 2259e9b4573SThomas Gleixner movl $0x00000183, %eax 2269e9b4573SThomas Gleixner movl $2048, %ecx 2279e9b4573SThomas Gleixner1: movl %eax, 0(%edi) 2281958b5fcSTom Lendacky addl %edx, 4(%edi) 2299e9b4573SThomas Gleixner addl $0x00200000, %eax 2309e9b4573SThomas Gleixner addl $8, %edi 2319e9b4573SThomas Gleixner decl %ecx 2329e9b4573SThomas Gleixner jnz 1b 2339e9b4573SThomas Gleixner 2349e9b4573SThomas Gleixner /* Enable the boot page tables */ 235a2c4fc4dSArvind Sankar leal rva(pgtable)(%ebx), %eax 2369e9b4573SThomas Gleixner movl %eax, %cr3 2379e9b4573SThomas Gleixner 2389e9b4573SThomas Gleixner /* Enable Long mode in EFER (Extended Feature Enable Register) */ 2399e9b4573SThomas Gleixner movl $MSR_EFER, %ecx 2409e9b4573SThomas Gleixner rdmsr 2419e9b4573SThomas Gleixner btsl $_EFER_LME, %eax 2429e9b4573SThomas Gleixner wrmsr 2439e9b4573SThomas Gleixner 244d3c433bfSYinghai Lu /* After gdt is loaded */ 245d3c433bfSYinghai Lu xorl %eax, %eax 246d3c433bfSYinghai Lu lldt %ax 24740e4f2d1SDenys Vlasenko movl $__BOOT_TSS, %eax 248d3c433bfSYinghai Lu ltr %ax 249d3c433bfSYinghai Lu 250b5d854cdSArd Biesheuvel#ifdef CONFIG_AMD_MEM_ENCRYPT 251b5d854cdSArd Biesheuvel /* Check if the C-bit position is correct when SEV is active */ 252b5d854cdSArd Biesheuvel call startup32_check_sev_cbit 253b5d854cdSArd Biesheuvel#endif 254b5d854cdSArd Biesheuvel 255b40d68d5SH. Peter Anvin /* 256b40d68d5SH. Peter Anvin * Setup for the jump to 64bit mode 2579e9b4573SThomas Gleixner * 258163b0991SIngo Molnar * When the jump is performed we will be in long mode but 2599e9b4573SThomas Gleixner * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 2609e9b4573SThomas Gleixner * (and in turn EFER.LMA = 1). To jump into 64bit mode we use 2619e9b4573SThomas Gleixner * the new gdt/idt that has __KERNEL_CS with CS.L = 1. 2629e9b4573SThomas Gleixner * We place all of the values on our mini stack so lret can 2639e9b4573SThomas Gleixner * used to perform that far jump. 2649e9b4573SThomas Gleixner */ 265a2c4fc4dSArvind Sankar leal rva(startup_64)(%ebp), %eax 266b8ff87a6SMatt Fleming#ifdef CONFIG_EFI_MIXED 2675c3a85f3SArd Biesheuvel cmpb $1, rva(efi_is64)(%ebp) 2685c3a85f3SArd Biesheuvel je 1f 2695c3a85f3SArd Biesheuvel leal rva(startup_64_mixed_mode)(%ebp), %eax 270b8ff87a6SMatt Fleming1: 271b8ff87a6SMatt Fleming#endif 272fef81c86SJoerg Roedel 27341d90b0cSArvind Sankar pushl $__KERNEL_CS 2749e9b4573SThomas Gleixner pushl %eax 2759e9b4573SThomas Gleixner 2769e9b4573SThomas Gleixner /* Enter paged protected Mode, activating Long Mode */ 2779cf30606SKirill A. Shutemov movl $CR0_STATE, %eax 2789e9b4573SThomas Gleixner movl %eax, %cr0 2799e9b4573SThomas Gleixner 2809e9b4573SThomas Gleixner /* Jump from 32bit compatibility mode into 64bit mode. */ 2819e9b4573SThomas Gleixner lret 2826dcc5627SJiri SlabySYM_FUNC_END(startup_32) 2839e9b4573SThomas Gleixner 2849e9b4573SThomas Gleixner .code64 2859e9b4573SThomas Gleixner .org 0x200 2864aec216bSJiri SlabySYM_CODE_START(startup_64) 287b40d68d5SH. Peter Anvin /* 2888ee2f2dfSYinghai Lu * 64bit entry is 0x200 and it is ABI so immutable! 289b40d68d5SH. Peter Anvin * We come here either from startup_32 or directly from a 2908ee2f2dfSYinghai Lu * 64bit bootloader. 2918ee2f2dfSYinghai Lu * If we come here from a bootloader, kernel(text+data+bss+brk), 2928ee2f2dfSYinghai Lu * ramdisk, zero_page, command line could be above 4G. 2938ee2f2dfSYinghai Lu * We depend on an identity mapped page table being provided 2948ee2f2dfSYinghai Lu * that maps our entire kernel(text+data+bss+brk), zero page 2958ee2f2dfSYinghai Lu * and command line. 2969e9b4573SThomas Gleixner */ 2979e9b4573SThomas Gleixner 298cae0e431SArvind Sankar cld 299cae0e431SArvind Sankar cli 300cae0e431SArvind Sankar 3019e9b4573SThomas Gleixner /* Setup data segments. */ 3029e9b4573SThomas Gleixner xorl %eax, %eax 3039e9b4573SThomas Gleixner movl %eax, %ds 3049e9b4573SThomas Gleixner movl %eax, %es 3059e9b4573SThomas Gleixner movl %eax, %ss 3069e9b4573SThomas Gleixner movl %eax, %fs 3079e9b4573SThomas Gleixner movl %eax, %gs 3089e9b4573SThomas Gleixner 309b40d68d5SH. Peter Anvin /* 310b40d68d5SH. Peter Anvin * Compute the decompressed kernel start address. It is where 3119e9b4573SThomas Gleixner * we were loaded at aligned to a 2M boundary. %rbp contains the 3129e9b4573SThomas Gleixner * decompressed kernel start address. 3139e9b4573SThomas Gleixner * 3149e9b4573SThomas Gleixner * If it is a relocatable kernel then decompress and run the kernel 3159e9b4573SThomas Gleixner * from load address aligned to 2MB addr, otherwise decompress and 31640b387a8SH. Peter Anvin * run the kernel from LOAD_PHYSICAL_ADDR 31702a884c0SH. Peter Anvin * 31802a884c0SH. Peter Anvin * We cannot rely on the calculation done in 32-bit mode, since we 31902a884c0SH. Peter Anvin * may have been invoked via the 64-bit entry point. 3209e9b4573SThomas Gleixner */ 3219e9b4573SThomas Gleixner 3229e9b4573SThomas Gleixner /* Start with the delta to where the kernel will run at. */ 3239e9b4573SThomas Gleixner#ifdef CONFIG_RELOCATABLE 3249e9b4573SThomas Gleixner leaq startup_32(%rip) /* - $startup_32 */, %rbp 32537ba7ab5SH. Peter Anvin movl BP_kernel_alignment(%rsi), %eax 32637ba7ab5SH. Peter Anvin decl %eax 32737ba7ab5SH. Peter Anvin addq %rax, %rbp 32837ba7ab5SH. Peter Anvin notq %rax 32937ba7ab5SH. Peter Anvin andq %rax, %rbp 3308ab3820fSKees Cook cmpq $LOAD_PHYSICAL_ADDR, %rbp 33181a34892SArvind Sankar jae 1f 3329e9b4573SThomas Gleixner#endif 3338ab3820fSKees Cook movq $LOAD_PHYSICAL_ADDR, %rbp 3348ab3820fSKees Cook1: 3359e9b4573SThomas Gleixner 33602a884c0SH. Peter Anvin /* Target address to relocate to for decompression */ 337974f221cSYinghai Lu movl BP_init_size(%rsi), %ebx 338a2c4fc4dSArvind Sankar subl $ rva(_end), %ebx 339974f221cSYinghai Lu addq %rbp, %rbx 3409e9b4573SThomas Gleixner 3410a137736SH. Peter Anvin /* Set up the stack */ 342a2c4fc4dSArvind Sankar leaq rva(boot_stack_end)(%rbx), %rsp 3430a137736SH. Peter Anvin 34434bbb000SKirill A. Shutemov /* 34534bbb000SKirill A. Shutemov * At this point we are in long mode with 4-level paging enabled, 346194a9749SKirill A. Shutemov * but we might want to enable 5-level paging or vice versa. 34734bbb000SKirill A. Shutemov * 348194a9749SKirill A. Shutemov * The problem is that we cannot do it directly. Setting or clearing 349194a9749SKirill A. Shutemov * CR4.LA57 in long mode would trigger #GP. So we need to switch off 350194a9749SKirill A. Shutemov * long mode and paging first. 351194a9749SKirill A. Shutemov * 352194a9749SKirill A. Shutemov * We also need a trampoline in lower memory to switch over from 353194a9749SKirill A. Shutemov * 4- to 5-level paging for cases when the bootloader puts the kernel 354194a9749SKirill A. Shutemov * above 4G, but didn't enable 5-level paging for us. 355194a9749SKirill A. Shutemov * 356194a9749SKirill A. Shutemov * The same trampoline can be used to switch from 5- to 4-level paging 357194a9749SKirill A. Shutemov * mode, like when starting 4-level paging kernel via kexec() when 358194a9749SKirill A. Shutemov * original kernel worked in 5-level paging mode. 359194a9749SKirill A. Shutemov * 360194a9749SKirill A. Shutemov * For the trampoline, we need the top page table to reside in lower 361194a9749SKirill A. Shutemov * memory as we don't have a way to load 64-bit values into CR3 in 362194a9749SKirill A. Shutemov * 32-bit mode. 36334bbb000SKirill A. Shutemov */ 36434bbb000SKirill A. Shutemov 3657beebaccSKirill A. Shutemov /* Make sure we have GDT with 32-bit code segment */ 3668a3abe30SArvind Sankar leaq gdt64(%rip), %rax 3678a3abe30SArvind Sankar addq %rax, 2(%rax) 3688a3abe30SArvind Sankar lgdt (%rax) 3697beebaccSKirill A. Shutemov 37034bb4922SJoerg Roedel /* Reload CS so IRET returns to a CS actually in the GDT */ 37134bb4922SJoerg Roedel pushq $__KERNEL_CS 37234bb4922SJoerg Roedel leaq .Lon_kernel_cs(%rip), %rax 37334bb4922SJoerg Roedel pushq %rax 37434bb4922SJoerg Roedel lretq 37534bb4922SJoerg Roedel 37634bb4922SJoerg Roedel.Lon_kernel_cs: 3778b63cba7SArd Biesheuvel /* 3788b63cba7SArd Biesheuvel * RSI holds a pointer to a boot_params structure provided by the 3798b63cba7SArd Biesheuvel * loader, and this needs to be preserved across C function calls. So 3808b63cba7SArd Biesheuvel * move it into a callee saved register. 3818b63cba7SArd Biesheuvel */ 3828b63cba7SArd Biesheuvel movq %rsi, %r15 38334bb4922SJoerg Roedel 38464e68263SJoerg Roedel call load_stage1_idt 38564e68263SJoerg Roedel 386ec1c66afSMichael Roth#ifdef CONFIG_AMD_MEM_ENCRYPT 387ec1c66afSMichael Roth /* 388ec1c66afSMichael Roth * Now that the stage1 interrupt handlers are set up, #VC exceptions from 389ec1c66afSMichael Roth * CPUID instructions can be properly handled for SEV-ES guests. 390ec1c66afSMichael Roth * 391ec1c66afSMichael Roth * For SEV-SNP, the CPUID table also needs to be set up in advance of any 392ec1c66afSMichael Roth * CPUID instructions being issued, so go ahead and do that now via 393ec1c66afSMichael Roth * sev_enable(), which will also handle the rest of the SEV-related 394ec1c66afSMichael Roth * detection/setup to ensure that has been done in advance of any dependent 3958b63cba7SArd Biesheuvel * code. Pass the boot_params pointer as the first argument. 396ec1c66afSMichael Roth */ 3978b63cba7SArd Biesheuvel movq %r15, %rdi 398ec1c66afSMichael Roth call sev_enable 399ec1c66afSMichael Roth#endif 400ec1c66afSMichael Roth 401*dc03a375SArd Biesheuvel /* Preserve only the CR4 bits that must be preserved, and clear the rest */ 402*dc03a375SArd Biesheuvel movq %cr4, %rax 403*dc03a375SArd Biesheuvel andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax 404*dc03a375SArd Biesheuvel movq %rax, %cr4 405*dc03a375SArd Biesheuvel 4064440977bSKirill A. Shutemov /* 40764ef578bSArd Biesheuvel * configure_5level_paging() updates the number of paging levels using 40864ef578bSArd Biesheuvel * a trampoline in 32-bit addressable memory if the current number does 40964ef578bSArd Biesheuvel * not match the desired number. 4104440977bSKirill A. Shutemov * 41103dda951SArd Biesheuvel * Pass the boot_params pointer as the first argument. The second 41203dda951SArd Biesheuvel * argument is the relocated address of the page table to use instead 41303dda951SArd Biesheuvel * of the page table in trampoline memory (if required). 4144440977bSKirill A. Shutemov */ 4158b63cba7SArd Biesheuvel movq %r15, %rdi 41603dda951SArd Biesheuvel leaq rva(top_pgtable)(%rbx), %rsi 41764ef578bSArd Biesheuvel call configure_5level_paging 41834bbb000SKirill A. Shutemov 4190a137736SH. Peter Anvin /* Zero EFLAGS */ 4200a137736SH. Peter Anvin pushq $0 4210a137736SH. Peter Anvin popfq 4220a137736SH. Peter Anvin 423b40d68d5SH. Peter Anvin/* 424b40d68d5SH. Peter Anvin * Copy the compressed kernel to the end of our buffer 4259e9b4573SThomas Gleixner * where decompression in place becomes safe. 4269e9b4573SThomas Gleixner */ 42736d3793cSH. Peter Anvin leaq (_bss-8)(%rip), %rsi 428a2c4fc4dSArvind Sankar leaq rva(_bss-8)(%rbx), %rdi 429a2c4fc4dSArvind Sankar movl $(_bss - startup_32), %ecx 430a2c4fc4dSArvind Sankar shrl $3, %ecx 43136d3793cSH. Peter Anvin std 43236d3793cSH. Peter Anvin rep movsq 43336d3793cSH. Peter Anvin cld 4349e9b4573SThomas Gleixner 43532d00913SArvind Sankar /* 43632d00913SArvind Sankar * The GDT may get overwritten either during the copy we just did or 43732d00913SArvind Sankar * during extract_kernel below. To avoid any issues, repoint the GDTR 43832d00913SArvind Sankar * to the new copy of the GDT. 43932d00913SArvind Sankar */ 440a2c4fc4dSArvind Sankar leaq rva(gdt64)(%rbx), %rax 441a2c4fc4dSArvind Sankar leaq rva(gdt)(%rbx), %rdx 442c98a76eaSArvind Sankar movq %rdx, 2(%rax) 44332d00913SArvind Sankar lgdt (%rax) 44432d00913SArvind Sankar 4459e9b4573SThomas Gleixner/* 4469e9b4573SThomas Gleixner * Jump to the relocated address. 4479e9b4573SThomas Gleixner */ 448a2c4fc4dSArvind Sankar leaq rva(.Lrelocated)(%rbx), %rax 4499e9b4573SThomas Gleixner jmp *%rax 4504aec216bSJiri SlabySYM_CODE_END(startup_64) 4519e9b4573SThomas Gleixner 452b40d68d5SH. Peter Anvin .text 453deff8a24SJiri SlabySYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) 4549e9b4573SThomas Gleixner 4559e9b4573SThomas Gleixner/* 4560a137736SH. Peter Anvin * Clear BSS (stack is currently empty) 4579e9b4573SThomas Gleixner */ 45836d3793cSH. Peter Anvin xorl %eax, %eax 45936d3793cSH. Peter Anvin leaq _bss(%rip), %rdi 46036d3793cSH. Peter Anvin leaq _ebss(%rip), %rcx 4619e9b4573SThomas Gleixner subq %rdi, %rcx 46236d3793cSH. Peter Anvin shrq $3, %rcx 46336d3793cSH. Peter Anvin rep stosq 4649e9b4573SThomas Gleixner 46564e68263SJoerg Roedel call load_stage2_idt 466b17a45b6SArvind Sankar 467b17a45b6SArvind Sankar /* Pass boot_params to initialize_identity_maps() */ 4688b63cba7SArd Biesheuvel movq %r15, %rdi 469ca0e22d4SJoerg Roedel call initialize_identity_maps 47064e68263SJoerg Roedel 47164e68263SJoerg Roedel/* 472c0402881SKees Cook * Do the extraction, and jump to the new kernel.. 4739e9b4573SThomas Gleixner */ 47424388292SArd Biesheuvel /* pass struct boot_params pointer and output target address */ 4758b63cba7SArd Biesheuvel movq %r15, %rdi 47624388292SArd Biesheuvel movq %rbp, %rsi 4777734a0f3SAlexander Lobakin call extract_kernel /* returns kernel entry point in %rax */ 4789e9b4573SThomas Gleixner 4799e9b4573SThomas Gleixner/* 4809e9b4573SThomas Gleixner * Jump to the decompressed kernel. 4819e9b4573SThomas Gleixner */ 4828b63cba7SArd Biesheuvel movq %r15, %rsi 4838ab3820fSKees Cook jmp *%rax 484deff8a24SJiri SlabySYM_FUNC_END(.Lrelocated) 4859e9b4573SThomas Gleixner 486194a9749SKirill A. Shutemov/* 487918a7a04SArd Biesheuvel * This is the 32-bit trampoline that will be copied over to low memory. It 488918a7a04SArd Biesheuvel * will be called using the ordinary 64-bit calling convention from code 489918a7a04SArd Biesheuvel * running in 64-bit mode. 490194a9749SKirill A. Shutemov * 491264b82fdSArd Biesheuvel * Return address is at the top of the stack (might be above 4G). 492cb83ceceSArd Biesheuvel * The first argument (EDI) contains the address of the temporary PGD level 493cb83ceceSArd Biesheuvel * page table in 32-bit addressable memory which will be programmed into 494cb83ceceSArd Biesheuvel * register CR3. 495194a9749SKirill A. Shutemov */ 496bd328aa0SArd Biesheuvel .section ".rodata", "a", @progbits 4974aec216bSJiri SlabySYM_CODE_START(trampoline_32bit_src) 498e8972a76SArd Biesheuvel /* 49964ef578bSArd Biesheuvel * Preserve callee save 64-bit registers on the stack: this is 50064ef578bSArd Biesheuvel * necessary because the architecture does not guarantee that GPRs will 50164ef578bSArd Biesheuvel * retain their full 64-bit values across a 32-bit mode switch. 502e8972a76SArd Biesheuvel */ 503e8972a76SArd Biesheuvel pushq %r15 50464ef578bSArd Biesheuvel pushq %r14 50564ef578bSArd Biesheuvel pushq %r13 50664ef578bSArd Biesheuvel pushq %r12 507e8972a76SArd Biesheuvel pushq %rbp 508e8972a76SArd Biesheuvel pushq %rbx 509e8972a76SArd Biesheuvel 510bd328aa0SArd Biesheuvel /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ 511bd328aa0SArd Biesheuvel movq %rsp, %rbx 512bd328aa0SArd Biesheuvel shrq $32, %rbx 513e8972a76SArd Biesheuvel 514e8972a76SArd Biesheuvel /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ 515e8972a76SArd Biesheuvel pushq $__KERNEL32_CS 516e8972a76SArd Biesheuvel leaq 0f(%rip), %rax 517e8972a76SArd Biesheuvel pushq %rax 518e8972a76SArd Biesheuvel lretq 519e8972a76SArd Biesheuvel 520bd328aa0SArd Biesheuvel /* 521bd328aa0SArd Biesheuvel * The 32-bit code below will do a far jump back to long mode and end 522bd328aa0SArd Biesheuvel * up here after reconfiguring the number of paging levels. First, the 523bd328aa0SArd Biesheuvel * stack pointer needs to be restored to its full 64-bit value before 524bd328aa0SArd Biesheuvel * the callee save register contents can be popped from the stack. 525bd328aa0SArd Biesheuvel */ 526e8972a76SArd Biesheuvel.Lret: 527bd328aa0SArd Biesheuvel shlq $32, %rbx 528bd328aa0SArd Biesheuvel orq %rbx, %rsp 529bd328aa0SArd Biesheuvel 530e8972a76SArd Biesheuvel /* Restore the preserved 64-bit registers */ 531e8972a76SArd Biesheuvel popq %rbx 532e8972a76SArd Biesheuvel popq %rbp 53364ef578bSArd Biesheuvel popq %r12 53464ef578bSArd Biesheuvel popq %r13 53564ef578bSArd Biesheuvel popq %r14 536e8972a76SArd Biesheuvel popq %r15 537e8972a76SArd Biesheuvel retq 538e8972a76SArd Biesheuvel 539e8972a76SArd Biesheuvel .code32 540e8972a76SArd Biesheuvel0: 54134bbb000SKirill A. Shutemov /* Disable paging */ 54234bbb000SKirill A. Shutemov movl %cr0, %eax 54334bbb000SKirill A. Shutemov btrl $X86_CR0_PG_BIT, %eax 54434bbb000SKirill A. Shutemov movl %eax, %cr0 54534bbb000SKirill A. Shutemov 5460a1756bdSKirill A. Shutemov /* Point CR3 to the trampoline's new top level page table */ 547cb83ceceSArd Biesheuvel movl %edi, %cr3 548f97b67a7SArd Biesheuvel 549b677dfaeSWei Huang /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ 550b677dfaeSWei Huang movl $MSR_EFER, %ecx 551b677dfaeSWei Huang rdmsr 552b677dfaeSWei Huang btsl $_EFER_LME, %eax 55377a512e3SSean Christopherson /* Avoid writing EFER if no change was made (for TDX guest) */ 55477a512e3SSean Christopherson jc 1f 555b677dfaeSWei Huang wrmsr 556918a7a04SArd Biesheuvel1: 557f97b67a7SArd Biesheuvel /* Toggle CR4.LA57 */ 55877a512e3SSean Christopherson movl %cr4, %eax 559f97b67a7SArd Biesheuvel btcl $X86_CR4_LA57_BIT, %eax 56034bbb000SKirill A. Shutemov movl %eax, %cr4 56134bbb000SKirill A. Shutemov 5629cf30606SKirill A. Shutemov /* Enable paging again. */ 5639cf30606SKirill A. Shutemov movl %cr0, %eax 5649cf30606SKirill A. Shutemov btsl $X86_CR0_PG_BIT, %eax 56534bbb000SKirill A. Shutemov movl %eax, %cr0 56634bbb000SKirill A. Shutemov 567bd328aa0SArd Biesheuvel /* 568bd328aa0SArd Biesheuvel * Return to the 64-bit calling code using LJMP rather than LRET, to 569bd328aa0SArd Biesheuvel * avoid the need for a 32-bit addressable stack. The destination 570bd328aa0SArd Biesheuvel * address will be adjusted after the template code is copied into a 571bd328aa0SArd Biesheuvel * 32-bit addressable buffer. 572bd328aa0SArd Biesheuvel */ 573bd328aa0SArd Biesheuvel.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) 5744aec216bSJiri SlabySYM_CODE_END(trampoline_32bit_src) 57534bbb000SKirill A. Shutemov 576194a9749SKirill A. Shutemov/* 577bd328aa0SArd Biesheuvel * This symbol is placed right after trampoline_32bit_src() so its address can 578bd328aa0SArd Biesheuvel * be used to infer the size of the trampoline code. 579bd328aa0SArd Biesheuvel */ 580bd328aa0SArd BiesheuvelSYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) 581bd328aa0SArd Biesheuvel 582bd328aa0SArd Biesheuvel /* 583194a9749SKirill A. Shutemov * The trampoline code has a size limit. 584194a9749SKirill A. Shutemov * Make sure we fail to compile if the trampoline code grows 585194a9749SKirill A. Shutemov * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. 586194a9749SKirill A. Shutemov */ 587194a9749SKirill A. Shutemov .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE 588194a9749SKirill A. Shutemov 589bd328aa0SArd Biesheuvel .text 590deff8a24SJiri SlabySYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) 591194a9749SKirill A. Shutemov /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 592187a8a73SYinghai Lu1: 593187a8a73SYinghai Lu hlt 594187a8a73SYinghai Lu jmp 1b 595deff8a24SJiri SlabySYM_FUNC_END(.Lno_longmode) 596187a8a73SYinghai Lu 5977f22ca39SArd Biesheuvel .globl verify_cpu 598187a8a73SYinghai Lu#include "../../kernel/verify_cpu.S" 599187a8a73SYinghai Lu 6009e9b4573SThomas Gleixner .data 601b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(gdt64) 602b75e2b07SArvind Sankar .word gdt_end - gdt - 1 6038a3abe30SArvind Sankar .quad gdt - gdt64 604b8c3f9b5SJiri SlabySYM_DATA_END(gdt64) 6052238246fSXiaoyao Li .balign 8 606b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(gdt) 607b75e2b07SArvind Sankar .word gdt_end - gdt - 1 6088a3abe30SArvind Sankar .long 0 6099e9b4573SThomas Gleixner .word 0 61034bbb000SKirill A. Shutemov .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 6119e9b4573SThomas Gleixner .quad 0x00af9a000000ffff /* __KERNEL_CS */ 6129e9b4573SThomas Gleixner .quad 0x00cf92000000ffff /* __KERNEL_DS */ 6139e9b4573SThomas Gleixner .quad 0x0080890000000000 /* TS descriptor */ 6149e9b4573SThomas Gleixner .quad 0x0000000000000000 /* TS continued */ 615b8c3f9b5SJiri SlabySYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) 6167c539764SAlexander van Heukelum 61764e68263SJoerg RoedelSYM_DATA_START(boot_idt_desc) 61864e68263SJoerg Roedel .word boot_idt_end - boot_idt - 1 61964e68263SJoerg Roedel .quad 0 62064e68263SJoerg RoedelSYM_DATA_END(boot_idt_desc) 62164e68263SJoerg Roedel .balign 8 62264e68263SJoerg RoedelSYM_DATA_START(boot_idt) 62364e68263SJoerg Roedel .rept BOOT_IDT_ENTRIES 62464e68263SJoerg Roedel .quad 0 62564e68263SJoerg Roedel .quad 0 62664e68263SJoerg Roedel .endr 62764e68263SJoerg RoedelSYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) 62864e68263SJoerg Roedel 629b40d68d5SH. Peter Anvin/* 630b40d68d5SH. Peter Anvin * Stack and heap for uncompression 631b40d68d5SH. Peter Anvin */ 6329e9b4573SThomas Gleixner .bss 6339e9b4573SThomas Gleixner .balign 4 634b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(boot_stack) 6357c539764SAlexander van Heukelum .fill BOOT_STACK_SIZE, 1, 0 63641d90b0cSArvind Sankar .balign 16 637b8c3f9b5SJiri SlabySYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) 6385b11f1ceSH. Peter Anvin 6395b11f1ceSH. Peter Anvin/* 6405b11f1ceSH. Peter Anvin * Space for page tables (not in .bss so not zeroed) 6415b11f1ceSH. Peter Anvin */ 6423ee372ccSArvind Sankar .section ".pgtable","aw",@nobits 6435b11f1ceSH. Peter Anvin .balign 4096 644b8c3f9b5SJiri SlabySYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0) 645589bb62bSKirill A. Shutemov 646589bb62bSKirill A. Shutemov/* 647589bb62bSKirill A. Shutemov * The page table is going to be used instead of page table in the trampoline 648589bb62bSKirill A. Shutemov * memory. 649589bb62bSKirill A. Shutemov */ 650b8c3f9b5SJiri SlabySYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0) 651