xref: /openbmc/linux/arch/x86/boot/compressed/head_64.S (revision b181f7029bd71238ac2754ce7052dffd69432085)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
29e9b4573SThomas Gleixner/*
39e9b4573SThomas Gleixner *  linux/boot/head.S
49e9b4573SThomas Gleixner *
59e9b4573SThomas Gleixner *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
69e9b4573SThomas Gleixner */
79e9b4573SThomas Gleixner
89e9b4573SThomas Gleixner/*
99e9b4573SThomas Gleixner *  head.S contains the 32-bit startup code.
109e9b4573SThomas Gleixner *
119e9b4573SThomas Gleixner * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
129e9b4573SThomas Gleixner * the page directory will exist. The startup code will be overwritten by
139e9b4573SThomas Gleixner * the page directory. [According to comments etc elsewhere on a compressed
149e9b4573SThomas Gleixner * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
159e9b4573SThomas Gleixner *
169e9b4573SThomas Gleixner * Page 0 is deliberately kept safe, since System Management Mode code in
179e9b4573SThomas Gleixner * laptops may need to access the BIOS data stored there.  This is also
189e9b4573SThomas Gleixner * useful for future device drivers that either access the BIOS via VM86
199e9b4573SThomas Gleixner * mode.
209e9b4573SThomas Gleixner */
219e9b4573SThomas Gleixner
229e9b4573SThomas Gleixner/*
239e9b4573SThomas Gleixner * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
249e9b4573SThomas Gleixner */
259e9b4573SThomas Gleixner	.code32
269e9b4573SThomas Gleixner	.text
279e9b4573SThomas Gleixner
281dc818c1STim Abbott#include <linux/init.h>
299e9b4573SThomas Gleixner#include <linux/linkage.h>
309e9b4573SThomas Gleixner#include <asm/segment.h>
317c539764SAlexander van Heukelum#include <asm/boot.h>
329e9b4573SThomas Gleixner#include <asm/msr.h>
33e83e31f4SCyrill Gorcunov#include <asm/processor-flags.h>
34bd53147dSEric W. Biederman#include <asm/asm-offsets.h>
35fb148d83SAlexander Kuleshov#include <asm/bootparam.h>
3664e68263SJoerg Roedel#include <asm/desc_defs.h>
371ccdbf74SJoerg Roedel#include <asm/trapnr.h>
38f7ff53e4SKirill A. Shutemov#include "pgtable.h"
399e9b4573SThomas Gleixner
406d92bc9dSH.J. Lu/*
41d49a0626SPeter Zijlstra * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
42d49a0626SPeter Zijlstra * in assembly errors due to trying to move .org backward due to the excessive
43d49a0626SPeter Zijlstra * alignment.
44d49a0626SPeter Zijlstra */
45d49a0626SPeter Zijlstra#undef __ALIGN
46d49a0626SPeter Zijlstra#define __ALIGN		.balign	16, 0x90
47d49a0626SPeter Zijlstra
48d49a0626SPeter Zijlstra/*
496d92bc9dSH.J. Lu * Locally defined symbols should be marked hidden:
506d92bc9dSH.J. Lu */
516d92bc9dSH.J. Lu	.hidden _bss
526d92bc9dSH.J. Lu	.hidden _ebss
535214028dSArvind Sankar	.hidden _end
546d92bc9dSH.J. Lu
551dc818c1STim Abbott	__HEAD
56a2c4fc4dSArvind Sankar
57a2c4fc4dSArvind Sankar/*
58a2c4fc4dSArvind Sankar * This macro gives the relative virtual address of X, i.e. the offset of X
59a2c4fc4dSArvind Sankar * from startup_32. This is the same as the link-time virtual address of X,
60a2c4fc4dSArvind Sankar * since startup_32 is at 0, but defining it this way tells the
61a2c4fc4dSArvind Sankar * assembler/linker that we do not want the actual run-time address of X. This
62a2c4fc4dSArvind Sankar * prevents the linker from trying to create unwanted run-time relocation
63a2c4fc4dSArvind Sankar * entries for the reference when the compressed kernel is linked as PIE.
64a2c4fc4dSArvind Sankar *
65a2c4fc4dSArvind Sankar * A reference X(%reg) will result in the link-time VA of X being stored with
66a2c4fc4dSArvind Sankar * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
67a2c4fc4dSArvind Sankar * adds the 64-bit base address where the kernel is loaded.
68a2c4fc4dSArvind Sankar *
69a2c4fc4dSArvind Sankar * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
70a2c4fc4dSArvind Sankar * and no run-time relocation.
71a2c4fc4dSArvind Sankar *
72a2c4fc4dSArvind Sankar * The macro should be used as a displacement with a base register containing
73a2c4fc4dSArvind Sankar * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
74a2c4fc4dSArvind Sankar * [$ rva(X)].
75a2c4fc4dSArvind Sankar *
76a2c4fc4dSArvind Sankar * This macro can only be used from within the .head.text section, since the
77a2c4fc4dSArvind Sankar * expression requires startup_32 to be in the same section as the code being
78a2c4fc4dSArvind Sankar * assembled.
79a2c4fc4dSArvind Sankar */
80a2c4fc4dSArvind Sankar#define rva(X) ((X) - startup_32)
81a2c4fc4dSArvind Sankar
829e9b4573SThomas Gleixner	.code32
836dcc5627SJiri SlabySYM_FUNC_START(startup_32)
848ee2f2dfSYinghai Lu	/*
858ee2f2dfSYinghai Lu	 * 32bit entry is 0 and it is ABI so immutable!
868ee2f2dfSYinghai Lu	 * If we come here directly from a bootloader,
878ee2f2dfSYinghai Lu	 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
888ee2f2dfSYinghai Lu	 * all need to be under the 4G limit.
898ee2f2dfSYinghai Lu	 */
909e9b4573SThomas Gleixner	cld
919e9b4573SThomas Gleixner	cli
929e9b4573SThomas Gleixner
93b40d68d5SH. Peter Anvin/*
94b40d68d5SH. Peter Anvin * Calculate the delta between where we were compiled to run
959e9b4573SThomas Gleixner * at and where we were actually loaded at.  This can only be done
969e9b4573SThomas Gleixner * with a short local call on x86.  Nothing  else will tell us what
979e9b4573SThomas Gleixner * address we are running at.  The reserved chunk of the real-mode
989e9b4573SThomas Gleixner * data at 0x1e4 (defined as a scratch field) are used as the stack
999e9b4573SThomas Gleixner * for this calculation. Only 4 bytes are needed.
1009e9b4573SThomas Gleixner */
101bd2a3698SH. Peter Anvin	leal	(BP_scratch+4)(%esi), %esp
1029e9b4573SThomas Gleixner	call	1f
1039e9b4573SThomas Gleixner1:	popl	%ebp
104a2c4fc4dSArvind Sankar	subl	$ rva(1b), %ebp
1059e9b4573SThomas Gleixner
10690ff2262SArvind Sankar	/* Load new GDT with the 64bit segments using 32bit descriptor */
107a2c4fc4dSArvind Sankar	leal	rva(gdt)(%ebp), %eax
1088a3abe30SArvind Sankar	movl	%eax, 2(%eax)
1098a3abe30SArvind Sankar	lgdt	(%eax)
11090ff2262SArvind Sankar
11190ff2262SArvind Sankar	/* Load segment registers with our descriptors */
11290ff2262SArvind Sankar	movl	$__BOOT_DS, %eax
11390ff2262SArvind Sankar	movl	%eax, %ds
11490ff2262SArvind Sankar	movl	%eax, %es
11590ff2262SArvind Sankar	movl	%eax, %fs
11690ff2262SArvind Sankar	movl	%eax, %gs
11790ff2262SArvind Sankar	movl	%eax, %ss
11890ff2262SArvind Sankar
1190c289ff8SJoerg Roedel	/* Setup a stack and load CS from current GDT */
120a2c4fc4dSArvind Sankar	leal	rva(boot_stack_end)(%ebp), %esp
1219e9b4573SThomas Gleixner
1220c289ff8SJoerg Roedel	pushl	$__KERNEL32_CS
1230c289ff8SJoerg Roedel	leal	rva(1f)(%ebp), %eax
1240c289ff8SJoerg Roedel	pushl	%eax
1250c289ff8SJoerg Roedel	lretl
1260c289ff8SJoerg Roedel1:
1270c289ff8SJoerg Roedel
12879419e13SJoerg Roedel	/* Setup Exception handling for SEV-ES */
129c6355995SArd Biesheuvel#ifdef CONFIG_AMD_MEM_ENCRYPT
13079419e13SJoerg Roedel	call	startup32_load_idt
131c6355995SArd Biesheuvel#endif
13279419e13SJoerg Roedel
1330c289ff8SJoerg Roedel	/* Make sure cpu supports long mode. */
1349e9b4573SThomas Gleixner	call	verify_cpu
1359e9b4573SThomas Gleixner	testl	%eax, %eax
13698ededb6SJiri Slaby	jnz	.Lno_longmode
1379e9b4573SThomas Gleixner
138b40d68d5SH. Peter Anvin/*
139b40d68d5SH. Peter Anvin * Compute the delta between where we were compiled to run at
1409e9b4573SThomas Gleixner * and where the code will actually run at.
141b40d68d5SH. Peter Anvin *
142b40d68d5SH. Peter Anvin * %ebp contains the address we are loaded at by the boot loader and %ebx
1439e9b4573SThomas Gleixner * contains the address where we should move the kernel image temporarily
1449e9b4573SThomas Gleixner * for safe in-place decompression.
1459e9b4573SThomas Gleixner */
1469e9b4573SThomas Gleixner
1479e9b4573SThomas Gleixner#ifdef CONFIG_RELOCATABLE
1489e9b4573SThomas Gleixner	movl	%ebp, %ebx
14937ba7ab5SH. Peter Anvin	movl	BP_kernel_alignment(%esi), %eax
15037ba7ab5SH. Peter Anvin	decl	%eax
15137ba7ab5SH. Peter Anvin	addl	%eax, %ebx
15237ba7ab5SH. Peter Anvin	notl	%eax
15337ba7ab5SH. Peter Anvin	andl	%eax, %ebx
1548ab3820fSKees Cook	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
15581a34892SArvind Sankar	jae	1f
1569e9b4573SThomas Gleixner#endif
1578ab3820fSKees Cook	movl	$LOAD_PHYSICAL_ADDR, %ebx
1588ab3820fSKees Cook1:
1599e9b4573SThomas Gleixner
16002a884c0SH. Peter Anvin	/* Target address to relocate to for decompression */
1611887c9b6SArvind Sankar	addl	BP_init_size(%esi), %ebx
162a2c4fc4dSArvind Sankar	subl	$ rva(_end), %ebx
1639e9b4573SThomas Gleixner
1649e9b4573SThomas Gleixner/*
1659e9b4573SThomas Gleixner * Prepare for entering 64 bit mode
1669e9b4573SThomas Gleixner */
1679e9b4573SThomas Gleixner
1689e9b4573SThomas Gleixner	/* Enable PAE mode */
169108d3f44SMatt Fleming	movl	%cr4, %eax
170108d3f44SMatt Fleming	orl	$X86_CR4_PAE, %eax
1719e9b4573SThomas Gleixner	movl	%eax, %cr4
1729e9b4573SThomas Gleixner
1739e9b4573SThomas Gleixner /*
1749e9b4573SThomas Gleixner  * Build early 4G boot pagetable
1759e9b4573SThomas Gleixner  */
1761958b5fcSTom Lendacky	/*
1771958b5fcSTom Lendacky	 * If SEV is active then set the encryption mask in the page tables.
17830c9ca16SArd Biesheuvel	 * This will ensure that when the kernel is copied and decompressed
1791958b5fcSTom Lendacky	 * it will be done so encrypted.
1801958b5fcSTom Lendacky	 */
1811958b5fcSTom Lendacky	xorl	%edx, %edx
182fef81c86SJoerg Roedel#ifdef	CONFIG_AMD_MEM_ENCRYPT
18330c9ca16SArd Biesheuvel	call	get_sev_encryption_bit
18430c9ca16SArd Biesheuvel	xorl	%edx, %edx
1851958b5fcSTom Lendacky	testl	%eax, %eax
1861958b5fcSTom Lendacky	jz	1f
1871958b5fcSTom Lendacky	subl	$32, %eax	/* Encryption bit is always above bit 31 */
1881958b5fcSTom Lendacky	bts	%eax, %edx	/* Set encryption mask for page tables */
189fef81c86SJoerg Roedel	/*
190ec1c66afSMichael Roth	 * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
191ec1c66afSMichael Roth	 * startup32_check_sev_cbit() will do a check. sev_enable() will
192ec1c66afSMichael Roth	 * initialize sev_status with all the bits reported by
193ec1c66afSMichael Roth	 * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
194ec1c66afSMichael Roth	 * needs to be set for now.
195fef81c86SJoerg Roedel	 */
196fef81c86SJoerg Roedel	movl	$1, rva(sev_status)(%ebp)
1971958b5fcSTom Lendacky1:
198fef81c86SJoerg Roedel#endif
1991958b5fcSTom Lendacky
2009e9b4573SThomas Gleixner	/* Initialize Page tables to 0 */
201a2c4fc4dSArvind Sankar	leal	rva(pgtable)(%ebx), %edi
2029e9b4573SThomas Gleixner	xorl	%eax, %eax
2033a94707dSKees Cook	movl	$(BOOT_INIT_PGT_SIZE/4), %ecx
2049e9b4573SThomas Gleixner	rep	stosl
2059e9b4573SThomas Gleixner
2069e9b4573SThomas Gleixner	/* Build Level 4 */
207a2c4fc4dSArvind Sankar	leal	rva(pgtable + 0)(%ebx), %edi
2089e9b4573SThomas Gleixner	leal	0x1007 (%edi), %eax
2099e9b4573SThomas Gleixner	movl	%eax, 0(%edi)
2101958b5fcSTom Lendacky	addl	%edx, 4(%edi)
2119e9b4573SThomas Gleixner
2129e9b4573SThomas Gleixner	/* Build Level 3 */
213a2c4fc4dSArvind Sankar	leal	rva(pgtable + 0x1000)(%ebx), %edi
2149e9b4573SThomas Gleixner	leal	0x1007(%edi), %eax
2159e9b4573SThomas Gleixner	movl	$4, %ecx
2169e9b4573SThomas Gleixner1:	movl	%eax, 0x00(%edi)
2171958b5fcSTom Lendacky	addl	%edx, 0x04(%edi)
2189e9b4573SThomas Gleixner	addl	$0x00001000, %eax
2199e9b4573SThomas Gleixner	addl	$8, %edi
2209e9b4573SThomas Gleixner	decl	%ecx
2219e9b4573SThomas Gleixner	jnz	1b
2229e9b4573SThomas Gleixner
2239e9b4573SThomas Gleixner	/* Build Level 2 */
224a2c4fc4dSArvind Sankar	leal	rva(pgtable + 0x2000)(%ebx), %edi
2259e9b4573SThomas Gleixner	movl	$0x00000183, %eax
2269e9b4573SThomas Gleixner	movl	$2048, %ecx
2279e9b4573SThomas Gleixner1:	movl	%eax, 0(%edi)
2281958b5fcSTom Lendacky	addl	%edx, 4(%edi)
2299e9b4573SThomas Gleixner	addl	$0x00200000, %eax
2309e9b4573SThomas Gleixner	addl	$8, %edi
2319e9b4573SThomas Gleixner	decl	%ecx
2329e9b4573SThomas Gleixner	jnz	1b
2339e9b4573SThomas Gleixner
2349e9b4573SThomas Gleixner	/* Enable the boot page tables */
235a2c4fc4dSArvind Sankar	leal	rva(pgtable)(%ebx), %eax
2369e9b4573SThomas Gleixner	movl	%eax, %cr3
2379e9b4573SThomas Gleixner
2389e9b4573SThomas Gleixner	/* Enable Long mode in EFER (Extended Feature Enable Register) */
2399e9b4573SThomas Gleixner	movl	$MSR_EFER, %ecx
2409e9b4573SThomas Gleixner	rdmsr
2419e9b4573SThomas Gleixner	btsl	$_EFER_LME, %eax
2429e9b4573SThomas Gleixner	wrmsr
2439e9b4573SThomas Gleixner
244d3c433bfSYinghai Lu	/* After gdt is loaded */
245d3c433bfSYinghai Lu	xorl	%eax, %eax
246d3c433bfSYinghai Lu	lldt	%ax
24740e4f2d1SDenys Vlasenko	movl    $__BOOT_TSS, %eax
248d3c433bfSYinghai Lu	ltr	%ax
249d3c433bfSYinghai Lu
250b5d854cdSArd Biesheuvel#ifdef CONFIG_AMD_MEM_ENCRYPT
251b5d854cdSArd Biesheuvel	/* Check if the C-bit position is correct when SEV is active */
252b5d854cdSArd Biesheuvel	call	startup32_check_sev_cbit
253b5d854cdSArd Biesheuvel#endif
254b5d854cdSArd Biesheuvel
255b40d68d5SH. Peter Anvin	/*
256b40d68d5SH. Peter Anvin	 * Setup for the jump to 64bit mode
2579e9b4573SThomas Gleixner	 *
258163b0991SIngo Molnar	 * When the jump is performed we will be in long mode but
2599e9b4573SThomas Gleixner	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
2609e9b4573SThomas Gleixner	 * (and in turn EFER.LMA = 1).	To jump into 64bit mode we use
2619e9b4573SThomas Gleixner	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
2629e9b4573SThomas Gleixner	 * We place all of the values on our mini stack so lret can
2639e9b4573SThomas Gleixner	 * used to perform that far jump.
2649e9b4573SThomas Gleixner	 */
265a2c4fc4dSArvind Sankar	leal	rva(startup_64)(%ebp), %eax
266b8ff87a6SMatt Fleming#ifdef CONFIG_EFI_MIXED
2675c3a85f3SArd Biesheuvel	cmpb	$1, rva(efi_is64)(%ebp)
2685c3a85f3SArd Biesheuvel	je	1f
2695c3a85f3SArd Biesheuvel	leal	rva(startup_64_mixed_mode)(%ebp), %eax
270b8ff87a6SMatt Fleming1:
271b8ff87a6SMatt Fleming#endif
272fef81c86SJoerg Roedel
27341d90b0cSArvind Sankar	pushl	$__KERNEL_CS
2749e9b4573SThomas Gleixner	pushl	%eax
2759e9b4573SThomas Gleixner
2769e9b4573SThomas Gleixner	/* Enter paged protected Mode, activating Long Mode */
2779cf30606SKirill A. Shutemov	movl	$CR0_STATE, %eax
2789e9b4573SThomas Gleixner	movl	%eax, %cr0
2799e9b4573SThomas Gleixner
2809e9b4573SThomas Gleixner	/* Jump from 32bit compatibility mode into 64bit mode. */
2819e9b4573SThomas Gleixner	lret
2826dcc5627SJiri SlabySYM_FUNC_END(startup_32)
2839e9b4573SThomas Gleixner
2849e9b4573SThomas Gleixner	.code64
2859e9b4573SThomas Gleixner	.org 0x200
2864aec216bSJiri SlabySYM_CODE_START(startup_64)
287b40d68d5SH. Peter Anvin	/*
2888ee2f2dfSYinghai Lu	 * 64bit entry is 0x200 and it is ABI so immutable!
289b40d68d5SH. Peter Anvin	 * We come here either from startup_32 or directly from a
2908ee2f2dfSYinghai Lu	 * 64bit bootloader.
2918ee2f2dfSYinghai Lu	 * If we come here from a bootloader, kernel(text+data+bss+brk),
2928ee2f2dfSYinghai Lu	 * ramdisk, zero_page, command line could be above 4G.
2938ee2f2dfSYinghai Lu	 * We depend on an identity mapped page table being provided
2948ee2f2dfSYinghai Lu	 * that maps our entire kernel(text+data+bss+brk), zero page
2958ee2f2dfSYinghai Lu	 * and command line.
2969e9b4573SThomas Gleixner	 */
2979e9b4573SThomas Gleixner
298cae0e431SArvind Sankar	cld
299cae0e431SArvind Sankar	cli
300cae0e431SArvind Sankar
3019e9b4573SThomas Gleixner	/* Setup data segments. */
3029e9b4573SThomas Gleixner	xorl	%eax, %eax
3039e9b4573SThomas Gleixner	movl	%eax, %ds
3049e9b4573SThomas Gleixner	movl	%eax, %es
3059e9b4573SThomas Gleixner	movl	%eax, %ss
3069e9b4573SThomas Gleixner	movl	%eax, %fs
3079e9b4573SThomas Gleixner	movl	%eax, %gs
3089e9b4573SThomas Gleixner
309b40d68d5SH. Peter Anvin	/*
310b40d68d5SH. Peter Anvin	 * Compute the decompressed kernel start address.  It is where
3119e9b4573SThomas Gleixner	 * we were loaded at aligned to a 2M boundary. %rbp contains the
3129e9b4573SThomas Gleixner	 * decompressed kernel start address.
3139e9b4573SThomas Gleixner	 *
3149e9b4573SThomas Gleixner	 * If it is a relocatable kernel then decompress and run the kernel
3159e9b4573SThomas Gleixner	 * from load address aligned to 2MB addr, otherwise decompress and
31640b387a8SH. Peter Anvin	 * run the kernel from LOAD_PHYSICAL_ADDR
31702a884c0SH. Peter Anvin	 *
31802a884c0SH. Peter Anvin	 * We cannot rely on the calculation done in 32-bit mode, since we
31902a884c0SH. Peter Anvin	 * may have been invoked via the 64-bit entry point.
3209e9b4573SThomas Gleixner	 */
3219e9b4573SThomas Gleixner
3229e9b4573SThomas Gleixner	/* Start with the delta to where the kernel will run at. */
3239e9b4573SThomas Gleixner#ifdef CONFIG_RELOCATABLE
3249e9b4573SThomas Gleixner	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
32537ba7ab5SH. Peter Anvin	movl	BP_kernel_alignment(%rsi), %eax
32637ba7ab5SH. Peter Anvin	decl	%eax
32737ba7ab5SH. Peter Anvin	addq	%rax, %rbp
32837ba7ab5SH. Peter Anvin	notq	%rax
32937ba7ab5SH. Peter Anvin	andq	%rax, %rbp
3308ab3820fSKees Cook	cmpq	$LOAD_PHYSICAL_ADDR, %rbp
33181a34892SArvind Sankar	jae	1f
3329e9b4573SThomas Gleixner#endif
3338ab3820fSKees Cook	movq	$LOAD_PHYSICAL_ADDR, %rbp
3348ab3820fSKees Cook1:
3359e9b4573SThomas Gleixner
33602a884c0SH. Peter Anvin	/* Target address to relocate to for decompression */
337974f221cSYinghai Lu	movl	BP_init_size(%rsi), %ebx
338a2c4fc4dSArvind Sankar	subl	$ rva(_end), %ebx
339974f221cSYinghai Lu	addq	%rbp, %rbx
3409e9b4573SThomas Gleixner
3410a137736SH. Peter Anvin	/* Set up the stack */
342a2c4fc4dSArvind Sankar	leaq	rva(boot_stack_end)(%rbx), %rsp
3430a137736SH. Peter Anvin
34434bbb000SKirill A. Shutemov	/*
34534bbb000SKirill A. Shutemov	 * At this point we are in long mode with 4-level paging enabled,
346194a9749SKirill A. Shutemov	 * but we might want to enable 5-level paging or vice versa.
34734bbb000SKirill A. Shutemov	 *
348194a9749SKirill A. Shutemov	 * The problem is that we cannot do it directly. Setting or clearing
349194a9749SKirill A. Shutemov	 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
350194a9749SKirill A. Shutemov	 * long mode and paging first.
351194a9749SKirill A. Shutemov	 *
352194a9749SKirill A. Shutemov	 * We also need a trampoline in lower memory to switch over from
353194a9749SKirill A. Shutemov	 * 4- to 5-level paging for cases when the bootloader puts the kernel
354194a9749SKirill A. Shutemov	 * above 4G, but didn't enable 5-level paging for us.
355194a9749SKirill A. Shutemov	 *
356194a9749SKirill A. Shutemov	 * The same trampoline can be used to switch from 5- to 4-level paging
357194a9749SKirill A. Shutemov	 * mode, like when starting 4-level paging kernel via kexec() when
358194a9749SKirill A. Shutemov	 * original kernel worked in 5-level paging mode.
359194a9749SKirill A. Shutemov	 *
360194a9749SKirill A. Shutemov	 * For the trampoline, we need the top page table to reside in lower
361194a9749SKirill A. Shutemov	 * memory as we don't have a way to load 64-bit values into CR3 in
362194a9749SKirill A. Shutemov	 * 32-bit mode.
36334bbb000SKirill A. Shutemov	 */
36434bbb000SKirill A. Shutemov
3657beebaccSKirill A. Shutemov	/* Make sure we have GDT with 32-bit code segment */
3668a3abe30SArvind Sankar	leaq	gdt64(%rip), %rax
3678a3abe30SArvind Sankar	addq	%rax, 2(%rax)
3688a3abe30SArvind Sankar	lgdt	(%rax)
3697beebaccSKirill A. Shutemov
37034bb4922SJoerg Roedel	/* Reload CS so IRET returns to a CS actually in the GDT */
37134bb4922SJoerg Roedel	pushq	$__KERNEL_CS
37234bb4922SJoerg Roedel	leaq	.Lon_kernel_cs(%rip), %rax
37334bb4922SJoerg Roedel	pushq	%rax
37434bb4922SJoerg Roedel	lretq
37534bb4922SJoerg Roedel
37634bb4922SJoerg Roedel.Lon_kernel_cs:
3778b63cba7SArd Biesheuvel	/*
3788b63cba7SArd Biesheuvel	 * RSI holds a pointer to a boot_params structure provided by the
3798b63cba7SArd Biesheuvel	 * loader, and this needs to be preserved across C function calls. So
3808b63cba7SArd Biesheuvel	 * move it into a callee saved register.
3818b63cba7SArd Biesheuvel	 */
3828b63cba7SArd Biesheuvel	movq	%rsi, %r15
38334bb4922SJoerg Roedel
38464e68263SJoerg Roedel	call	load_stage1_idt
38564e68263SJoerg Roedel
386ec1c66afSMichael Roth#ifdef CONFIG_AMD_MEM_ENCRYPT
387ec1c66afSMichael Roth	/*
388ec1c66afSMichael Roth	 * Now that the stage1 interrupt handlers are set up, #VC exceptions from
389ec1c66afSMichael Roth	 * CPUID instructions can be properly handled for SEV-ES guests.
390ec1c66afSMichael Roth	 *
391ec1c66afSMichael Roth	 * For SEV-SNP, the CPUID table also needs to be set up in advance of any
392ec1c66afSMichael Roth	 * CPUID instructions being issued, so go ahead and do that now via
393ec1c66afSMichael Roth	 * sev_enable(), which will also handle the rest of the SEV-related
394ec1c66afSMichael Roth	 * detection/setup to ensure that has been done in advance of any dependent
3958b63cba7SArd Biesheuvel	 * code. Pass the boot_params pointer as the first argument.
396ec1c66afSMichael Roth	 */
3978b63cba7SArd Biesheuvel	movq	%r15, %rdi
398ec1c66afSMichael Roth	call	sev_enable
399ec1c66afSMichael Roth#endif
400ec1c66afSMichael Roth
401*dc03a375SArd Biesheuvel	/* Preserve only the CR4 bits that must be preserved, and clear the rest */
402*dc03a375SArd Biesheuvel	movq	%cr4, %rax
403*dc03a375SArd Biesheuvel	andl	$(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax
404*dc03a375SArd Biesheuvel	movq	%rax, %cr4
405*dc03a375SArd Biesheuvel
4064440977bSKirill A. Shutemov	/*
40764ef578bSArd Biesheuvel	 * configure_5level_paging() updates the number of paging levels using
40864ef578bSArd Biesheuvel	 * a trampoline in 32-bit addressable memory if the current number does
40964ef578bSArd Biesheuvel	 * not match the desired number.
4104440977bSKirill A. Shutemov	 *
41103dda951SArd Biesheuvel	 * Pass the boot_params pointer as the first argument. The second
41203dda951SArd Biesheuvel	 * argument is the relocated address of the page table to use instead
41303dda951SArd Biesheuvel	 * of the page table in trampoline memory (if required).
4144440977bSKirill A. Shutemov	 */
4158b63cba7SArd Biesheuvel	movq	%r15, %rdi
41603dda951SArd Biesheuvel	leaq	rva(top_pgtable)(%rbx), %rsi
41764ef578bSArd Biesheuvel	call	configure_5level_paging
41834bbb000SKirill A. Shutemov
4190a137736SH. Peter Anvin	/* Zero EFLAGS */
4200a137736SH. Peter Anvin	pushq	$0
4210a137736SH. Peter Anvin	popfq
4220a137736SH. Peter Anvin
423b40d68d5SH. Peter Anvin/*
424b40d68d5SH. Peter Anvin * Copy the compressed kernel to the end of our buffer
4259e9b4573SThomas Gleixner * where decompression in place becomes safe.
4269e9b4573SThomas Gleixner */
42736d3793cSH. Peter Anvin	leaq	(_bss-8)(%rip), %rsi
428a2c4fc4dSArvind Sankar	leaq	rva(_bss-8)(%rbx), %rdi
429a2c4fc4dSArvind Sankar	movl	$(_bss - startup_32), %ecx
430a2c4fc4dSArvind Sankar	shrl	$3, %ecx
43136d3793cSH. Peter Anvin	std
43236d3793cSH. Peter Anvin	rep	movsq
43336d3793cSH. Peter Anvin	cld
4349e9b4573SThomas Gleixner
43532d00913SArvind Sankar	/*
43632d00913SArvind Sankar	 * The GDT may get overwritten either during the copy we just did or
43732d00913SArvind Sankar	 * during extract_kernel below. To avoid any issues, repoint the GDTR
43832d00913SArvind Sankar	 * to the new copy of the GDT.
43932d00913SArvind Sankar	 */
440a2c4fc4dSArvind Sankar	leaq	rva(gdt64)(%rbx), %rax
441a2c4fc4dSArvind Sankar	leaq	rva(gdt)(%rbx), %rdx
442c98a76eaSArvind Sankar	movq	%rdx, 2(%rax)
44332d00913SArvind Sankar	lgdt	(%rax)
44432d00913SArvind Sankar
4459e9b4573SThomas Gleixner/*
4469e9b4573SThomas Gleixner * Jump to the relocated address.
4479e9b4573SThomas Gleixner */
448a2c4fc4dSArvind Sankar	leaq	rva(.Lrelocated)(%rbx), %rax
4499e9b4573SThomas Gleixner	jmp	*%rax
4504aec216bSJiri SlabySYM_CODE_END(startup_64)
4519e9b4573SThomas Gleixner
452b40d68d5SH. Peter Anvin	.text
453deff8a24SJiri SlabySYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
4549e9b4573SThomas Gleixner
4559e9b4573SThomas Gleixner/*
4560a137736SH. Peter Anvin * Clear BSS (stack is currently empty)
4579e9b4573SThomas Gleixner */
45836d3793cSH. Peter Anvin	xorl	%eax, %eax
45936d3793cSH. Peter Anvin	leaq    _bss(%rip), %rdi
46036d3793cSH. Peter Anvin	leaq    _ebss(%rip), %rcx
4619e9b4573SThomas Gleixner	subq	%rdi, %rcx
46236d3793cSH. Peter Anvin	shrq	$3, %rcx
46336d3793cSH. Peter Anvin	rep	stosq
4649e9b4573SThomas Gleixner
46564e68263SJoerg Roedel	call	load_stage2_idt
466b17a45b6SArvind Sankar
467b17a45b6SArvind Sankar	/* Pass boot_params to initialize_identity_maps() */
4688b63cba7SArd Biesheuvel	movq	%r15, %rdi
469ca0e22d4SJoerg Roedel	call	initialize_identity_maps
47064e68263SJoerg Roedel
47164e68263SJoerg Roedel/*
472c0402881SKees Cook * Do the extraction, and jump to the new kernel..
4739e9b4573SThomas Gleixner */
47424388292SArd Biesheuvel	/* pass struct boot_params pointer and output target address */
4758b63cba7SArd Biesheuvel	movq	%r15, %rdi
47624388292SArd Biesheuvel	movq	%rbp, %rsi
4777734a0f3SAlexander Lobakin	call	extract_kernel		/* returns kernel entry point in %rax */
4789e9b4573SThomas Gleixner
4799e9b4573SThomas Gleixner/*
4809e9b4573SThomas Gleixner * Jump to the decompressed kernel.
4819e9b4573SThomas Gleixner */
4828b63cba7SArd Biesheuvel	movq	%r15, %rsi
4838ab3820fSKees Cook	jmp	*%rax
484deff8a24SJiri SlabySYM_FUNC_END(.Lrelocated)
4859e9b4573SThomas Gleixner
486194a9749SKirill A. Shutemov/*
487918a7a04SArd Biesheuvel * This is the 32-bit trampoline that will be copied over to low memory. It
488918a7a04SArd Biesheuvel * will be called using the ordinary 64-bit calling convention from code
489918a7a04SArd Biesheuvel * running in 64-bit mode.
490194a9749SKirill A. Shutemov *
491264b82fdSArd Biesheuvel * Return address is at the top of the stack (might be above 4G).
492cb83ceceSArd Biesheuvel * The first argument (EDI) contains the address of the temporary PGD level
493cb83ceceSArd Biesheuvel * page table in 32-bit addressable memory which will be programmed into
494cb83ceceSArd Biesheuvel * register CR3.
495194a9749SKirill A. Shutemov */
496bd328aa0SArd Biesheuvel	.section ".rodata", "a", @progbits
4974aec216bSJiri SlabySYM_CODE_START(trampoline_32bit_src)
498e8972a76SArd Biesheuvel	/*
49964ef578bSArd Biesheuvel	 * Preserve callee save 64-bit registers on the stack: this is
50064ef578bSArd Biesheuvel	 * necessary because the architecture does not guarantee that GPRs will
50164ef578bSArd Biesheuvel	 * retain their full 64-bit values across a 32-bit mode switch.
502e8972a76SArd Biesheuvel	 */
503e8972a76SArd Biesheuvel	pushq	%r15
50464ef578bSArd Biesheuvel	pushq	%r14
50564ef578bSArd Biesheuvel	pushq	%r13
50664ef578bSArd Biesheuvel	pushq	%r12
507e8972a76SArd Biesheuvel	pushq	%rbp
508e8972a76SArd Biesheuvel	pushq	%rbx
509e8972a76SArd Biesheuvel
510bd328aa0SArd Biesheuvel	/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
511bd328aa0SArd Biesheuvel	movq	%rsp, %rbx
512bd328aa0SArd Biesheuvel	shrq	$32, %rbx
513e8972a76SArd Biesheuvel
514e8972a76SArd Biesheuvel	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
515e8972a76SArd Biesheuvel	pushq	$__KERNEL32_CS
516e8972a76SArd Biesheuvel	leaq	0f(%rip), %rax
517e8972a76SArd Biesheuvel	pushq	%rax
518e8972a76SArd Biesheuvel	lretq
519e8972a76SArd Biesheuvel
520bd328aa0SArd Biesheuvel	/*
521bd328aa0SArd Biesheuvel	 * The 32-bit code below will do a far jump back to long mode and end
522bd328aa0SArd Biesheuvel	 * up here after reconfiguring the number of paging levels. First, the
523bd328aa0SArd Biesheuvel	 * stack pointer needs to be restored to its full 64-bit value before
524bd328aa0SArd Biesheuvel	 * the callee save register contents can be popped from the stack.
525bd328aa0SArd Biesheuvel	 */
526e8972a76SArd Biesheuvel.Lret:
527bd328aa0SArd Biesheuvel	shlq	$32, %rbx
528bd328aa0SArd Biesheuvel	orq	%rbx, %rsp
529bd328aa0SArd Biesheuvel
530e8972a76SArd Biesheuvel	/* Restore the preserved 64-bit registers */
531e8972a76SArd Biesheuvel	popq	%rbx
532e8972a76SArd Biesheuvel	popq	%rbp
53364ef578bSArd Biesheuvel	popq	%r12
53464ef578bSArd Biesheuvel	popq	%r13
53564ef578bSArd Biesheuvel	popq	%r14
536e8972a76SArd Biesheuvel	popq	%r15
537e8972a76SArd Biesheuvel	retq
538e8972a76SArd Biesheuvel
539e8972a76SArd Biesheuvel	.code32
540e8972a76SArd Biesheuvel0:
54134bbb000SKirill A. Shutemov	/* Disable paging */
54234bbb000SKirill A. Shutemov	movl	%cr0, %eax
54334bbb000SKirill A. Shutemov	btrl	$X86_CR0_PG_BIT, %eax
54434bbb000SKirill A. Shutemov	movl	%eax, %cr0
54534bbb000SKirill A. Shutemov
5460a1756bdSKirill A. Shutemov	/* Point CR3 to the trampoline's new top level page table */
547cb83ceceSArd Biesheuvel	movl	%edi, %cr3
548f97b67a7SArd Biesheuvel
549b677dfaeSWei Huang	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
550b677dfaeSWei Huang	movl	$MSR_EFER, %ecx
551b677dfaeSWei Huang	rdmsr
552b677dfaeSWei Huang	btsl	$_EFER_LME, %eax
55377a512e3SSean Christopherson	/* Avoid writing EFER if no change was made (for TDX guest) */
55477a512e3SSean Christopherson	jc	1f
555b677dfaeSWei Huang	wrmsr
556918a7a04SArd Biesheuvel1:
557f97b67a7SArd Biesheuvel	/* Toggle CR4.LA57 */
55877a512e3SSean Christopherson	movl	%cr4, %eax
559f97b67a7SArd Biesheuvel	btcl	$X86_CR4_LA57_BIT, %eax
56034bbb000SKirill A. Shutemov	movl	%eax, %cr4
56134bbb000SKirill A. Shutemov
5629cf30606SKirill A. Shutemov	/* Enable paging again. */
5639cf30606SKirill A. Shutemov	movl	%cr0, %eax
5649cf30606SKirill A. Shutemov	btsl	$X86_CR0_PG_BIT, %eax
56534bbb000SKirill A. Shutemov	movl	%eax, %cr0
56634bbb000SKirill A. Shutemov
567bd328aa0SArd Biesheuvel	/*
568bd328aa0SArd Biesheuvel	 * Return to the 64-bit calling code using LJMP rather than LRET, to
569bd328aa0SArd Biesheuvel	 * avoid the need for a 32-bit addressable stack. The destination
570bd328aa0SArd Biesheuvel	 * address will be adjusted after the template code is copied into a
571bd328aa0SArd Biesheuvel	 * 32-bit addressable buffer.
572bd328aa0SArd Biesheuvel	 */
573bd328aa0SArd Biesheuvel.Ljmp:	ljmpl	$__KERNEL_CS, $(.Lret - trampoline_32bit_src)
5744aec216bSJiri SlabySYM_CODE_END(trampoline_32bit_src)
57534bbb000SKirill A. Shutemov
576194a9749SKirill A. Shutemov/*
577bd328aa0SArd Biesheuvel * This symbol is placed right after trampoline_32bit_src() so its address can
578bd328aa0SArd Biesheuvel * be used to infer the size of the trampoline code.
579bd328aa0SArd Biesheuvel */
580bd328aa0SArd BiesheuvelSYM_DATA(trampoline_ljmp_imm_offset, .word  .Ljmp + 1 - trampoline_32bit_src)
581bd328aa0SArd Biesheuvel
582bd328aa0SArd Biesheuvel	/*
583194a9749SKirill A. Shutemov         * The trampoline code has a size limit.
584194a9749SKirill A. Shutemov         * Make sure we fail to compile if the trampoline code grows
585194a9749SKirill A. Shutemov         * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
586194a9749SKirill A. Shutemov	 */
587194a9749SKirill A. Shutemov	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
588194a9749SKirill A. Shutemov
589bd328aa0SArd Biesheuvel	.text
590deff8a24SJiri SlabySYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
591194a9749SKirill A. Shutemov	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
592187a8a73SYinghai Lu1:
593187a8a73SYinghai Lu	hlt
594187a8a73SYinghai Lu	jmp     1b
595deff8a24SJiri SlabySYM_FUNC_END(.Lno_longmode)
596187a8a73SYinghai Lu
5977f22ca39SArd Biesheuvel	.globl	verify_cpu
598187a8a73SYinghai Lu#include "../../kernel/verify_cpu.S"
599187a8a73SYinghai Lu
6009e9b4573SThomas Gleixner	.data
601b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(gdt64)
602b75e2b07SArvind Sankar	.word	gdt_end - gdt - 1
6038a3abe30SArvind Sankar	.quad   gdt - gdt64
604b8c3f9b5SJiri SlabySYM_DATA_END(gdt64)
6052238246fSXiaoyao Li	.balign	8
606b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(gdt)
607b75e2b07SArvind Sankar	.word	gdt_end - gdt - 1
6088a3abe30SArvind Sankar	.long	0
6099e9b4573SThomas Gleixner	.word	0
61034bbb000SKirill A. Shutemov	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
6119e9b4573SThomas Gleixner	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
6129e9b4573SThomas Gleixner	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
6139e9b4573SThomas Gleixner	.quad	0x0080890000000000	/* TS descriptor */
6149e9b4573SThomas Gleixner	.quad   0x0000000000000000	/* TS continued */
615b8c3f9b5SJiri SlabySYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
6167c539764SAlexander van Heukelum
61764e68263SJoerg RoedelSYM_DATA_START(boot_idt_desc)
61864e68263SJoerg Roedel	.word	boot_idt_end - boot_idt - 1
61964e68263SJoerg Roedel	.quad	0
62064e68263SJoerg RoedelSYM_DATA_END(boot_idt_desc)
62164e68263SJoerg Roedel	.balign 8
62264e68263SJoerg RoedelSYM_DATA_START(boot_idt)
62364e68263SJoerg Roedel	.rept	BOOT_IDT_ENTRIES
62464e68263SJoerg Roedel	.quad	0
62564e68263SJoerg Roedel	.quad	0
62664e68263SJoerg Roedel	.endr
62764e68263SJoerg RoedelSYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
62864e68263SJoerg Roedel
629b40d68d5SH. Peter Anvin/*
630b40d68d5SH. Peter Anvin * Stack and heap for uncompression
631b40d68d5SH. Peter Anvin */
6329e9b4573SThomas Gleixner	.bss
6339e9b4573SThomas Gleixner	.balign 4
634b8c3f9b5SJiri SlabySYM_DATA_START_LOCAL(boot_stack)
6357c539764SAlexander van Heukelum	.fill BOOT_STACK_SIZE, 1, 0
63641d90b0cSArvind Sankar	.balign 16
637b8c3f9b5SJiri SlabySYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
6385b11f1ceSH. Peter Anvin
6395b11f1ceSH. Peter Anvin/*
6405b11f1ceSH. Peter Anvin * Space for page tables (not in .bss so not zeroed)
6415b11f1ceSH. Peter Anvin */
6423ee372ccSArvind Sankar	.section ".pgtable","aw",@nobits
6435b11f1ceSH. Peter Anvin	.balign 4096
644b8c3f9b5SJiri SlabySYM_DATA_LOCAL(pgtable,		.fill BOOT_PGT_SIZE, 1, 0)
645589bb62bSKirill A. Shutemov
646589bb62bSKirill A. Shutemov/*
647589bb62bSKirill A. Shutemov * The page table is going to be used instead of page table in the trampoline
648589bb62bSKirill A. Shutemov * memory.
649589bb62bSKirill A. Shutemov */
650b8c3f9b5SJiri SlabySYM_DATA_LOCAL(top_pgtable,	.fill PAGE_SIZE, 1, 0)
651