1/* 2 * x86_64 boot and support code 3 * 4 * Copyright 2019 Linaro 5 * 6 * This work is licensed under the terms of the GNU GPL, version 3 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 * Unlike the i386 version we instead use Xen's PVHVM booting header 10 * which should drop us automatically into 32 bit mode ready to go. I've 11 * nabbed bits of the Linux kernel setup to achieve this. 12 * 13 * SPDX-License-Identifier: GPL-3.0-or-later 14 */ 15 16 .section .head 17 18#define ELFNOTE_START(name, type, flags) \ 19.pushsection .note.name, flags,@note ; \ 20 .balign 4 ; \ 21 .long 2f - 1f /* namesz */ ; \ 22 .long 4484f - 3f /* descsz */ ; \ 23 .long type ; \ 241:.asciz #name ; \ 252:.balign 4 ; \ 263: 27 28#define ELFNOTE_END \ 294484:.balign 4 ; \ 30.popsection ; 31 32#define ELFNOTE(name, type, desc) \ 33 ELFNOTE_START(name, type, "") \ 34 desc ; \ 35 ELFNOTE_END 36 37#define XEN_ELFNOTE_ENTRY 1 38#define XEN_ELFNOTE_HYPERCALL_PAGE 2 39#define XEN_ELFNOTE_VIRT_BASE 3 40#define XEN_ELFNOTE_PADDR_OFFSET 4 41#define XEN_ELFNOTE_PHYS32_ENTRY 18 42 43#define __ASM_FORM(x) x 44#define __ASM_FORM_RAW(x) x 45#define __ASM_FORM_COMMA(x) x, 46#define __ASM_SEL(a,b) __ASM_FORM(b) 47#define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) 48#define _ASM_PTR __ASM_SEL(.long, .quad) 49 50 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR 0x100000) 51 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR _start) 52 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR _start) /* entry == virtbase */ 53 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) 54 55 /* 56 * Entry point for PVH guests. 57 * 58 * Xen ABI specifies the following register state when we come here: 59 * 60 * - `ebx`: contains the physical memory address where the loader has placed 61 * the boot start info structure. 62 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. 63 * - `cr4`: all bits are cleared. 64 * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’ 65 * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified. 66 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of 67 * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all 68 * unspecified. 69 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit 70 * of '0x67'. 71 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. 72 * Bit 8 (TF) must be cleared. Other bits are all unspecified. 73 * 74 * All other processor registers and flag bits are unspecified. The OS is in 75 * charge of setting up it's own stack, GDT and IDT. 76 */ 77 .code32 78 .section .text 79 80.global _start 81_start: 82 cld 83 lgdt gdtr 84 85 ljmp $0x8,$.Lloadcs 86.Lloadcs: 87 mov $0x10,%eax 88 mov %eax,%ds 89 mov %eax,%es 90 mov %eax,%fs 91 mov %eax,%gs 92 mov %eax,%ss 93 94 /* Enable PAE mode (bit 5). */ 95 mov %cr4, %eax 96 btsl $5, %eax 97 mov %eax, %cr4 98 99#define MSR_EFER 0xc0000080 /* extended feature register */ 100 101 /* Enable Long mode. */ 102 mov $MSR_EFER, %ecx 103 rdmsr 104 btsl $8, %eax 105 wrmsr 106 107 /* Enable paging */ 108 mov $.Lpml4, %ecx 109 mov %ecx, %cr3 110 111 mov %cr0, %eax 112 btsl $31, %eax 113 mov %eax, %cr0 114 115 /* Jump to 64-bit mode. */ 116 lgdt gdtr64 117 ljmp $0x8,$.Lenter64 118 119 .code64 120 .section .text 121.Lenter64: 122 123 124 // Setup stack ASAP 125 movq $stack_end,%rsp 126 127 /* don't worry about stack frame, assume everthing is garbage when we return */ 128 call main 129 130 /* output any non-zero result in eax to isa-debug-exit device */ 131 test %al, %al 132 jz 1f 133 out %ax, $0xf4 134 1351: /* QEMU ACPI poweroff */ 136 mov $0x604,%edx 137 mov $0x2000,%eax 138 out %ax,%dx 139 hlt 140 jmp 1b 141 142 /* 143 * Helper Functions 144 * 145 * x86_64 calling convention is rdi, rsi, rdx, rcx, r8, r9 146 */ 147 148 /* Output a single character to serial port */ 149 .global __sys_outc 150__sys_outc: 151 pushq %rax 152 mov %rax, %rdx 153 out %al,$0xE9 154 popq %rax 155 ret 156 157 /* Interrupt Descriptor Table */ 158 159 .section .data 160 .align 16 161 162idt_00: .int 0, 0 163idt_01: .int 0, 0 164idt_02: .int 0, 0 165idt_03: .int 0, 0 166idt_04: .int 0, 0 167idt_05: .int 0, 0 168idt_06: .int 0, 0 /* intr_6_opcode, Invalid Opcode */ 169idt_07: .int 0, 0 170idt_08: .int 0, 0 171idt_09: .int 0, 0 172idt_0A: .int 0, 0 173idt_0B: .int 0, 0 174idt_0C: .int 0, 0 175idt_0D: .int 0, 0 176idt_0E: .int 0, 0 177idt_0F: .int 0, 0 178idt_10: .int 0, 0 179idt_11: .int 0, 0 180idt_12: .int 0, 0 181idt_13: .int 0, 0 182idt_14: .int 0, 0 183idt_15: .int 0, 0 184idt_16: .int 0, 0 185idt_17: .int 0, 0 186idt_18: .int 0, 0 187idt_19: .int 0, 0 188idt_1A: .int 0, 0 189idt_1B: .int 0, 0 190idt_1C: .int 0, 0 191idt_1D: .int 0, 0 192idt_1E: .int 0, 0 193idt_1F: .int 0, 0 194 195 196 /* 197 * Global Descriptor Table (GDT) 198 * 199 * This describes various memory areas (segments) through 200 * segment descriptors. In 32 bit mode each segment each 201 * segement is associated with segment registers which are 202 * implicitly (or explicitly) referenced depending on the 203 * instruction. However in 64 bit mode selectors are flat and 204 * segmented addressing isn't used. 205 */ 206gdt: 207 .short 0 208gdtr: 209 .short gdt_en - gdt - 1 210 .int gdt 211 212 // Code cs: 213 .short 0xFFFF 214 .short 0 215 .byte 0 216 .byte 0x9b 217 .byte 0xCF 218 .byte 0 219 220 // Data ds:, ss:, es:, fs:, and gs: 221 .short 0xFFFF 222 .short 0 223 .byte 0 224 .byte 0x93 225 .byte 0xCF 226 .byte 0 227gdt_en: 228 229gdt64: 230 .short 0 231gdtr64: 232 .short gdt64_en - gdt64 - 1 233 .int gdt64 234 235 // Code 236 .short 0xFFFF 237 .short 0 238 .byte 0 239 .byte 0x9b 240 .byte 0xAF 241 .byte 0 242 243 // Data 244 .short 0xFFFF 245 .short 0 246 .byte 0 247 .byte 0x93 248 .byte 0xCF 249 .byte 0 250gdt64_en: 251 252 .section .bss 253 .align 16 254 255stack: .space 65536 256stack_end: 257 258 .section .data 259 260.align 4096 261.Lpd: 262i = 0 263 .rept 512 * 4 264 .quad 0x1e7 | (i << 21) 265 i = i + 1 266 .endr 267 268.align 4096 269.Lpdp: 270 .quad .Lpd + 7 + 0 * 4096 /* 0-1 GB */ 271 .quad .Lpd + 7 + 1 * 4096 /* 1-2 GB */ 272 .quad .Lpd + 7 + 2 * 4096 /* 2-3 GB */ 273 .quad .Lpd + 7 + 3 * 4096 /* 3-4 GB */ 274 275.align 4096 276.Lpml4: 277 .quad .Lpdp + 7 /* 0-512 GB */ 278