1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * linux/boot/head.S 4 * 5 * Copyright (C) 1991, 1992, 1993 Linus Torvalds 6 */ 7 8/* 9 * head.S contains the 32-bit startup code. 10 * 11 * NOTE!!! Startup happens at absolute address 0x00001000, which is also where 12 * the page directory will exist. The startup code will be overwritten by 13 * the page directory. [According to comments etc elsewhere on a compressed 14 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] 15 * 16 * Page 0 is deliberately kept safe, since System Management Mode code in 17 * laptops may need to access the BIOS data stored there. This is also 18 * useful for future device drivers that either access the BIOS via VM86 19 * mode. 20 */ 21 22/* 23 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 24 */ 25 .code32 26 .text 27 28#include <linux/init.h> 29#include <linux/linkage.h> 30#include <asm/segment.h> 31#include <asm/boot.h> 32#include <asm/msr.h> 33#include <asm/processor-flags.h> 34#include <asm/asm-offsets.h> 35#include <asm/bootparam.h> 36#include "pgtable.h" 37 38/* 39 * Locally defined symbols should be marked hidden: 40 */ 41 .hidden _bss 42 .hidden _ebss 43 .hidden _got 44 .hidden _egot 45 46 __HEAD 47 .code32 48SYM_FUNC_START(startup_32) 49 /* 50 * 32bit entry is 0 and it is ABI so immutable! 51 * If we come here directly from a bootloader, 52 * kernel(text+data+bss+brk) ramdisk, zero_page, command line 53 * all need to be under the 4G limit. 54 */ 55 cld 56 /* 57 * Test KEEP_SEGMENTS flag to see if the bootloader is asking 58 * us to not reload segments 59 */ 60 testb $KEEP_SEGMENTS, BP_loadflags(%esi) 61 jnz 1f 62 63 cli 64 movl $(__BOOT_DS), %eax 65 movl %eax, %ds 66 movl %eax, %es 67 movl %eax, %ss 681: 69 70/* 71 * Calculate the delta between where we were compiled to run 72 * at and where we were actually loaded at. This can only be done 73 * with a short local call on x86. Nothing else will tell us what 74 * address we are running at. The reserved chunk of the real-mode 75 * data at 0x1e4 (defined as a scratch field) are used as the stack 76 * for this calculation. Only 4 bytes are needed. 77 */ 78 leal (BP_scratch+4)(%esi), %esp 79 call 1f 801: popl %ebp 81 subl $1b, %ebp 82 83/* setup a stack and make sure cpu supports long mode. */ 84 movl $boot_stack_end, %eax 85 addl %ebp, %eax 86 movl %eax, %esp 87 88 call verify_cpu 89 testl %eax, %eax 90 jnz .Lno_longmode 91 92/* 93 * Compute the delta between where we were compiled to run at 94 * and where the code will actually run at. 95 * 96 * %ebp contains the address we are loaded at by the boot loader and %ebx 97 * contains the address where we should move the kernel image temporarily 98 * for safe in-place decompression. 99 */ 100 101#ifdef CONFIG_RELOCATABLE 102 movl %ebp, %ebx 103 movl BP_kernel_alignment(%esi), %eax 104 decl %eax 105 addl %eax, %ebx 106 notl %eax 107 andl %eax, %ebx 108 cmpl $LOAD_PHYSICAL_ADDR, %ebx 109 jge 1f 110#endif 111 movl $LOAD_PHYSICAL_ADDR, %ebx 1121: 113 114 /* Target address to relocate to for decompression */ 115 movl BP_init_size(%esi), %eax 116 subl $_end, %eax 117 addl %eax, %ebx 118 119/* 120 * Prepare for entering 64 bit mode 121 */ 122 123 /* Load new GDT with the 64bit segments using 32bit descriptor */ 124 addl %ebp, gdt+2(%ebp) 125 lgdt gdt(%ebp) 126 127 /* Enable PAE mode */ 128 movl %cr4, %eax 129 orl $X86_CR4_PAE, %eax 130 movl %eax, %cr4 131 132 /* 133 * Build early 4G boot pagetable 134 */ 135 /* 136 * If SEV is active then set the encryption mask in the page tables. 137 * This will insure that when the kernel is copied and decompressed 138 * it will be done so encrypted. 139 */ 140 call get_sev_encryption_bit 141 xorl %edx, %edx 142 testl %eax, %eax 143 jz 1f 144 subl $32, %eax /* Encryption bit is always above bit 31 */ 145 bts %eax, %edx /* Set encryption mask for page tables */ 1461: 147 148 /* Initialize Page tables to 0 */ 149 leal pgtable(%ebx), %edi 150 xorl %eax, %eax 151 movl $(BOOT_INIT_PGT_SIZE/4), %ecx 152 rep stosl 153 154 /* Build Level 4 */ 155 leal pgtable + 0(%ebx), %edi 156 leal 0x1007 (%edi), %eax 157 movl %eax, 0(%edi) 158 addl %edx, 4(%edi) 159 160 /* Build Level 3 */ 161 leal pgtable + 0x1000(%ebx), %edi 162 leal 0x1007(%edi), %eax 163 movl $4, %ecx 1641: movl %eax, 0x00(%edi) 165 addl %edx, 0x04(%edi) 166 addl $0x00001000, %eax 167 addl $8, %edi 168 decl %ecx 169 jnz 1b 170 171 /* Build Level 2 */ 172 leal pgtable + 0x2000(%ebx), %edi 173 movl $0x00000183, %eax 174 movl $2048, %ecx 1751: movl %eax, 0(%edi) 176 addl %edx, 4(%edi) 177 addl $0x00200000, %eax 178 addl $8, %edi 179 decl %ecx 180 jnz 1b 181 182 /* Enable the boot page tables */ 183 leal pgtable(%ebx), %eax 184 movl %eax, %cr3 185 186 /* Enable Long mode in EFER (Extended Feature Enable Register) */ 187 movl $MSR_EFER, %ecx 188 rdmsr 189 btsl $_EFER_LME, %eax 190 wrmsr 191 192 /* After gdt is loaded */ 193 xorl %eax, %eax 194 lldt %ax 195 movl $__BOOT_TSS, %eax 196 ltr %ax 197 198 /* 199 * Setup for the jump to 64bit mode 200 * 201 * When the jump is performend we will be in long mode but 202 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 203 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use 204 * the new gdt/idt that has __KERNEL_CS with CS.L = 1. 205 * We place all of the values on our mini stack so lret can 206 * used to perform that far jump. 207 */ 208 pushl $__KERNEL_CS 209 leal startup_64(%ebp), %eax 210#ifdef CONFIG_EFI_MIXED 211 movl efi32_config(%ebp), %ebx 212 cmp $0, %ebx 213 jz 1f 214 leal handover_entry(%ebp), %eax 2151: 216#endif 217 pushl %eax 218 219 /* Enter paged protected Mode, activating Long Mode */ 220 movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ 221 movl %eax, %cr0 222 223 /* Jump from 32bit compatibility mode into 64bit mode. */ 224 lret 225SYM_FUNC_END(startup_32) 226 227#ifdef CONFIG_EFI_MIXED 228 .org 0x190 229SYM_FUNC_START(efi32_stub_entry) 230 add $0x4, %esp /* Discard return address */ 231 popl %ecx 232 popl %edx 233 popl %esi 234 235 leal (BP_scratch+4)(%esi), %esp 236 call 1f 2371: pop %ebp 238 subl $1b, %ebp 239 240 movl %ecx, efi32_config(%ebp) 241 movl %edx, efi32_config+8(%ebp) 242 sgdtl efi32_boot_gdt(%ebp) 243 244 leal efi32_config(%ebp), %eax 245 movl %eax, efi_config(%ebp) 246 247 /* Disable paging */ 248 movl %cr0, %eax 249 btrl $X86_CR0_PG_BIT, %eax 250 movl %eax, %cr0 251 252 jmp startup_32 253SYM_FUNC_END(efi32_stub_entry) 254#endif 255 256 .code64 257 .org 0x200 258SYM_CODE_START(startup_64) 259 /* 260 * 64bit entry is 0x200 and it is ABI so immutable! 261 * We come here either from startup_32 or directly from a 262 * 64bit bootloader. 263 * If we come here from a bootloader, kernel(text+data+bss+brk), 264 * ramdisk, zero_page, command line could be above 4G. 265 * We depend on an identity mapped page table being provided 266 * that maps our entire kernel(text+data+bss+brk), zero page 267 * and command line. 268 */ 269 270 /* Setup data segments. */ 271 xorl %eax, %eax 272 movl %eax, %ds 273 movl %eax, %es 274 movl %eax, %ss 275 movl %eax, %fs 276 movl %eax, %gs 277 278 /* 279 * Compute the decompressed kernel start address. It is where 280 * we were loaded at aligned to a 2M boundary. %rbp contains the 281 * decompressed kernel start address. 282 * 283 * If it is a relocatable kernel then decompress and run the kernel 284 * from load address aligned to 2MB addr, otherwise decompress and 285 * run the kernel from LOAD_PHYSICAL_ADDR 286 * 287 * We cannot rely on the calculation done in 32-bit mode, since we 288 * may have been invoked via the 64-bit entry point. 289 */ 290 291 /* Start with the delta to where the kernel will run at. */ 292#ifdef CONFIG_RELOCATABLE 293 leaq startup_32(%rip) /* - $startup_32 */, %rbp 294 movl BP_kernel_alignment(%rsi), %eax 295 decl %eax 296 addq %rax, %rbp 297 notq %rax 298 andq %rax, %rbp 299 cmpq $LOAD_PHYSICAL_ADDR, %rbp 300 jge 1f 301#endif 302 movq $LOAD_PHYSICAL_ADDR, %rbp 3031: 304 305 /* Target address to relocate to for decompression */ 306 movl BP_init_size(%rsi), %ebx 307 subl $_end, %ebx 308 addq %rbp, %rbx 309 310 /* Set up the stack */ 311 leaq boot_stack_end(%rbx), %rsp 312 313 /* 314 * paging_prepare() and cleanup_trampoline() below can have GOT 315 * references. Adjust the table with address we are running at. 316 * 317 * Zero RAX for adjust_got: the GOT was not adjusted before; 318 * there's no adjustment to undo. 319 */ 320 xorq %rax, %rax 321 322 /* 323 * Calculate the address the binary is loaded at and use it as 324 * a GOT adjustment. 325 */ 326 call 1f 3271: popq %rdi 328 subq $1b, %rdi 329 330 call .Ladjust_got 331 332 /* 333 * At this point we are in long mode with 4-level paging enabled, 334 * but we might want to enable 5-level paging or vice versa. 335 * 336 * The problem is that we cannot do it directly. Setting or clearing 337 * CR4.LA57 in long mode would trigger #GP. So we need to switch off 338 * long mode and paging first. 339 * 340 * We also need a trampoline in lower memory to switch over from 341 * 4- to 5-level paging for cases when the bootloader puts the kernel 342 * above 4G, but didn't enable 5-level paging for us. 343 * 344 * The same trampoline can be used to switch from 5- to 4-level paging 345 * mode, like when starting 4-level paging kernel via kexec() when 346 * original kernel worked in 5-level paging mode. 347 * 348 * For the trampoline, we need the top page table to reside in lower 349 * memory as we don't have a way to load 64-bit values into CR3 in 350 * 32-bit mode. 351 * 352 * We go though the trampoline even if we don't have to: if we're 353 * already in a desired paging mode. This way the trampoline code gets 354 * tested on every boot. 355 */ 356 357 /* Make sure we have GDT with 32-bit code segment */ 358 leaq gdt(%rip), %rax 359 movq %rax, gdt64+2(%rip) 360 lgdt gdt64(%rip) 361 362 /* 363 * paging_prepare() sets up the trampoline and checks if we need to 364 * enable 5-level paging. 365 * 366 * paging_prepare() returns a two-quadword structure which lands 367 * into RDX:RAX: 368 * - Address of the trampoline is returned in RAX. 369 * - Non zero RDX means trampoline needs to enable 5-level 370 * paging. 371 * 372 * RSI holds real mode data and needs to be preserved across 373 * this function call. 374 */ 375 pushq %rsi 376 movq %rsi, %rdi /* real mode address */ 377 call paging_prepare 378 popq %rsi 379 380 /* Save the trampoline address in RCX */ 381 movq %rax, %rcx 382 383 /* 384 * Load the address of trampoline_return() into RDI. 385 * It will be used by the trampoline to return to the main code. 386 */ 387 leaq trampoline_return(%rip), %rdi 388 389 /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ 390 pushq $__KERNEL32_CS 391 leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax 392 pushq %rax 393 lretq 394trampoline_return: 395 /* Restore the stack, the 32-bit trampoline uses its own stack */ 396 leaq boot_stack_end(%rbx), %rsp 397 398 /* 399 * cleanup_trampoline() would restore trampoline memory. 400 * 401 * RDI is address of the page table to use instead of page table 402 * in trampoline memory (if required). 403 * 404 * RSI holds real mode data and needs to be preserved across 405 * this function call. 406 */ 407 pushq %rsi 408 leaq top_pgtable(%rbx), %rdi 409 call cleanup_trampoline 410 popq %rsi 411 412 /* Zero EFLAGS */ 413 pushq $0 414 popfq 415 416 /* 417 * Previously we've adjusted the GOT with address the binary was 418 * loaded at. Now we need to re-adjust for relocation address. 419 * 420 * Calculate the address the binary is loaded at, so that we can 421 * undo the previous GOT adjustment. 422 */ 423 call 1f 4241: popq %rax 425 subq $1b, %rax 426 427 /* The new adjustment is the relocation address */ 428 movq %rbx, %rdi 429 call .Ladjust_got 430 431/* 432 * Copy the compressed kernel to the end of our buffer 433 * where decompression in place becomes safe. 434 */ 435 pushq %rsi 436 leaq (_bss-8)(%rip), %rsi 437 leaq (_bss-8)(%rbx), %rdi 438 movq $_bss /* - $startup_32 */, %rcx 439 shrq $3, %rcx 440 std 441 rep movsq 442 cld 443 popq %rsi 444 445/* 446 * Jump to the relocated address. 447 */ 448 leaq .Lrelocated(%rbx), %rax 449 jmp *%rax 450SYM_CODE_END(startup_64) 451 452#ifdef CONFIG_EFI_STUB 453 454/* The entry point for the PE/COFF executable is efi_pe_entry. */ 455SYM_FUNC_START(efi_pe_entry) 456 movq %rcx, efi64_config(%rip) /* Handle */ 457 movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ 458 459 leaq efi64_config(%rip), %rax 460 movq %rax, efi_config(%rip) 461 462 call 1f 4631: popq %rbp 464 subq $1b, %rbp 465 466 /* 467 * Relocate efi_config->call(). 468 */ 469 addq %rbp, efi64_config+40(%rip) 470 471 movq %rax, %rdi 472 call make_boot_params 473 cmpq $0,%rax 474 je fail 475 mov %rax, %rsi 476 leaq startup_32(%rip), %rax 477 movl %eax, BP_code32_start(%rsi) 478 jmp 2f /* Skip the relocation */ 479 480handover_entry: 481 call 1f 4821: popq %rbp 483 subq $1b, %rbp 484 485 /* 486 * Relocate efi_config->call(). 487 */ 488 movq efi_config(%rip), %rax 489 addq %rbp, 40(%rax) 4902: 491 movq efi_config(%rip), %rdi 492 call efi_main 493 movq %rax,%rsi 494 cmpq $0,%rax 495 jne 2f 496fail: 497 /* EFI init failed, so hang. */ 498 hlt 499 jmp fail 5002: 501 movl BP_code32_start(%esi), %eax 502 leaq startup_64(%rax), %rax 503 jmp *%rax 504SYM_FUNC_END(efi_pe_entry) 505 506 .org 0x390 507SYM_FUNC_START(efi64_stub_entry) 508 movq %rdi, efi64_config(%rip) /* Handle */ 509 movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ 510 511 leaq efi64_config(%rip), %rax 512 movq %rax, efi_config(%rip) 513 514 movq %rdx, %rsi 515 jmp handover_entry 516SYM_FUNC_END(efi64_stub_entry) 517#endif 518 519 .text 520SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) 521 522/* 523 * Clear BSS (stack is currently empty) 524 */ 525 xorl %eax, %eax 526 leaq _bss(%rip), %rdi 527 leaq _ebss(%rip), %rcx 528 subq %rdi, %rcx 529 shrq $3, %rcx 530 rep stosq 531 532/* 533 * Do the extraction, and jump to the new kernel.. 534 */ 535 pushq %rsi /* Save the real mode argument */ 536 movq %rsi, %rdi /* real mode address */ 537 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ 538 leaq input_data(%rip), %rdx /* input_data */ 539 movl $z_input_len, %ecx /* input_len */ 540 movq %rbp, %r8 /* output target address */ 541 movq $z_output_len, %r9 /* decompressed length, end of relocs */ 542 call extract_kernel /* returns kernel location in %rax */ 543 popq %rsi 544 545/* 546 * Jump to the decompressed kernel. 547 */ 548 jmp *%rax 549SYM_FUNC_END(.Lrelocated) 550 551/* 552 * Adjust the global offset table 553 * 554 * RAX is the previous adjustment of the table to undo (use 0 if it's the 555 * first time we touch GOT). 556 * RDI is the new adjustment to apply. 557 */ 558.Ladjust_got: 559 /* Walk through the GOT adding the address to the entries */ 560 leaq _got(%rip), %rdx 561 leaq _egot(%rip), %rcx 5621: 563 cmpq %rcx, %rdx 564 jae 2f 565 subq %rax, (%rdx) /* Undo previous adjustment */ 566 addq %rdi, (%rdx) /* Apply the new adjustment */ 567 addq $8, %rdx 568 jmp 1b 5692: 570 ret 571 572 .code32 573/* 574 * This is the 32-bit trampoline that will be copied over to low memory. 575 * 576 * RDI contains the return address (might be above 4G). 577 * ECX contains the base address of the trampoline memory. 578 * Non zero RDX means trampoline needs to enable 5-level paging. 579 */ 580SYM_CODE_START(trampoline_32bit_src) 581 /* Set up data and stack segments */ 582 movl $__KERNEL_DS, %eax 583 movl %eax, %ds 584 movl %eax, %ss 585 586 /* Set up new stack */ 587 leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp 588 589 /* Disable paging */ 590 movl %cr0, %eax 591 btrl $X86_CR0_PG_BIT, %eax 592 movl %eax, %cr0 593 594 /* Check what paging mode we want to be in after the trampoline */ 595 cmpl $0, %edx 596 jz 1f 597 598 /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ 599 movl %cr4, %eax 600 testl $X86_CR4_LA57, %eax 601 jnz 3f 602 jmp 2f 6031: 604 /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ 605 movl %cr4, %eax 606 testl $X86_CR4_LA57, %eax 607 jz 3f 6082: 609 /* Point CR3 to the trampoline's new top level page table */ 610 leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax 611 movl %eax, %cr3 6123: 613 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ 614 pushl %ecx 615 pushl %edx 616 movl $MSR_EFER, %ecx 617 rdmsr 618 btsl $_EFER_LME, %eax 619 wrmsr 620 popl %edx 621 popl %ecx 622 623 /* Enable PAE and LA57 (if required) paging modes */ 624 movl $X86_CR4_PAE, %eax 625 cmpl $0, %edx 626 jz 1f 627 orl $X86_CR4_LA57, %eax 6281: 629 movl %eax, %cr4 630 631 /* Calculate address of paging_enabled() once we are executing in the trampoline */ 632 leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax 633 634 /* Prepare the stack for far return to Long Mode */ 635 pushl $__KERNEL_CS 636 pushl %eax 637 638 /* Enable paging again */ 639 movl $(X86_CR0_PG | X86_CR0_PE), %eax 640 movl %eax, %cr0 641 642 lret 643SYM_CODE_END(trampoline_32bit_src) 644 645 .code64 646SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) 647 /* Return from the trampoline */ 648 jmp *%rdi 649SYM_FUNC_END(.Lpaging_enabled) 650 651 /* 652 * The trampoline code has a size limit. 653 * Make sure we fail to compile if the trampoline code grows 654 * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. 655 */ 656 .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE 657 658 .code32 659SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) 660 /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 6611: 662 hlt 663 jmp 1b 664SYM_FUNC_END(.Lno_longmode) 665 666#include "../../kernel/verify_cpu.S" 667 668 .data 669SYM_DATA_START_LOCAL(gdt64) 670 .word gdt_end - gdt 671 .quad 0 672SYM_DATA_END(gdt64) 673 .balign 8 674SYM_DATA_START_LOCAL(gdt) 675 .word gdt_end - gdt 676 .long gdt 677 .word 0 678 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 679 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 680 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 681 .quad 0x0080890000000000 /* TS descriptor */ 682 .quad 0x0000000000000000 /* TS continued */ 683SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) 684 685#ifdef CONFIG_EFI_STUB 686SYM_DATA_LOCAL(efi_config, .quad 0) 687 688#ifdef CONFIG_EFI_MIXED 689SYM_DATA_START(efi32_config) 690 .fill 5,8,0 691 .quad efi64_thunk 692 .byte 0 693SYM_DATA_END(efi32_config) 694#endif 695 696SYM_DATA_START(efi64_config) 697 .fill 5,8,0 698 .quad efi_call 699 .byte 1 700SYM_DATA_END(efi64_config) 701#endif /* CONFIG_EFI_STUB */ 702 703/* 704 * Stack and heap for uncompression 705 */ 706 .bss 707 .balign 4 708SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) 709 710SYM_DATA_START_LOCAL(boot_stack) 711 .fill BOOT_STACK_SIZE, 1, 0 712SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) 713 714/* 715 * Space for page tables (not in .bss so not zeroed) 716 */ 717 .section ".pgtable","a",@nobits 718 .balign 4096 719SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0) 720 721/* 722 * The page table is going to be used instead of page table in the trampoline 723 * memory. 724 */ 725SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0) 726