1 /* 2 * prepare to run common code 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 */ 6 7 #define DISABLE_BRANCH_PROFILING 8 #include <linux/init.h> 9 #include <linux/linkage.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/percpu.h> 14 #include <linux/start_kernel.h> 15 #include <linux/io.h> 16 #include <linux/memblock.h> 17 #include <linux/mem_encrypt.h> 18 19 #include <asm/processor.h> 20 #include <asm/proto.h> 21 #include <asm/smp.h> 22 #include <asm/setup.h> 23 #include <asm/desc.h> 24 #include <asm/pgtable.h> 25 #include <asm/tlbflush.h> 26 #include <asm/sections.h> 27 #include <asm/kdebug.h> 28 #include <asm/e820/api.h> 29 #include <asm/bios_ebda.h> 30 #include <asm/bootparam_utils.h> 31 #include <asm/microcode.h> 32 #include <asm/kasan.h> 33 34 /* 35 * Manage page tables very early on. 36 */ 37 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 38 static unsigned int __initdata next_early_pgt; 39 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); 40 41 #define __head __section(.head.text) 42 43 static void __head *fixup_pointer(void *ptr, unsigned long physaddr) 44 { 45 return ptr - (void *)_text + (void *)physaddr; 46 } 47 48 unsigned long __head __startup_64(unsigned long physaddr, 49 struct boot_params *bp) 50 { 51 unsigned long load_delta, *p; 52 unsigned long pgtable_flags; 53 pgdval_t *pgd; 54 p4dval_t *p4d; 55 pudval_t *pud; 56 pmdval_t *pmd, pmd_entry; 57 int i; 58 unsigned int *next_pgt_ptr; 59 60 /* Is the address too large? */ 61 if (physaddr >> MAX_PHYSMEM_BITS) 62 for (;;); 63 64 /* 65 * Compute the delta between the address I am compiled to run at 66 * and the address I am actually running at. 67 */ 68 load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); 69 70 /* Is the address not 2M aligned? */ 71 if (load_delta & ~PMD_PAGE_MASK) 72 for (;;); 73 74 /* Activate Secure Memory Encryption (SME) if supported and enabled */ 75 sme_enable(bp); 76 77 /* Include the SME encryption mask in the fixup value */ 78 load_delta += sme_get_me_mask(); 79 80 /* Fixup the physical addresses in the page table */ 81 82 pgd = fixup_pointer(&early_top_pgt, physaddr); 83 pgd[pgd_index(__START_KERNEL_map)] += load_delta; 84 85 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 86 p4d = fixup_pointer(&level4_kernel_pgt, physaddr); 87 p4d[511] += load_delta; 88 } 89 90 pud = fixup_pointer(&level3_kernel_pgt, physaddr); 91 pud[510] += load_delta; 92 pud[511] += load_delta; 93 94 pmd = fixup_pointer(level2_fixmap_pgt, physaddr); 95 pmd[506] += load_delta; 96 97 /* 98 * Set up the identity mapping for the switchover. These 99 * entries should *NOT* have the global bit set! This also 100 * creates a bunch of nonsense entries but that is fine -- 101 * it avoids problems around wraparound. 102 */ 103 104 next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); 105 pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); 106 pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); 107 108 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 109 110 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 111 p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); 112 113 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 114 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 115 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 116 117 i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; 118 p4d[i + 0] = (pgdval_t)pud + pgtable_flags; 119 p4d[i + 1] = (pgdval_t)pud + pgtable_flags; 120 } else { 121 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 122 pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 123 pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 124 } 125 126 i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; 127 pud[i + 0] = (pudval_t)pmd + pgtable_flags; 128 pud[i + 1] = (pudval_t)pmd + pgtable_flags; 129 130 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 131 pmd_entry += sme_get_me_mask(); 132 pmd_entry += physaddr; 133 134 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { 135 int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; 136 pmd[idx] = pmd_entry + i * PMD_SIZE; 137 } 138 139 /* 140 * Fixup the kernel text+data virtual addresses. Note that 141 * we might write invalid pmds, when the kernel is relocated 142 * cleanup_highmap() fixes this up along with the mappings 143 * beyond _end. 144 */ 145 146 pmd = fixup_pointer(level2_kernel_pgt, physaddr); 147 for (i = 0; i < PTRS_PER_PMD; i++) { 148 if (pmd[i] & _PAGE_PRESENT) 149 pmd[i] += load_delta; 150 } 151 152 /* 153 * Fixup phys_base - remove the memory encryption mask to obtain 154 * the true physical address. 155 */ 156 p = fixup_pointer(&phys_base, physaddr); 157 *p += load_delta - sme_get_me_mask(); 158 159 /* Encrypt the kernel (if SME is active) */ 160 sme_encrypt_kernel(); 161 162 /* 163 * Return the SME encryption mask (if SME is active) to be used as a 164 * modifier for the initial pgdir entry programmed into CR3. 165 */ 166 return sme_get_me_mask(); 167 } 168 169 unsigned long __startup_secondary_64(void) 170 { 171 /* 172 * Return the SME encryption mask (if SME is active) to be used as a 173 * modifier for the initial pgdir entry programmed into CR3. 174 */ 175 return sme_get_me_mask(); 176 } 177 178 /* Wipe all early page tables except for the kernel symbol map */ 179 static void __init reset_early_page_tables(void) 180 { 181 memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); 182 next_early_pgt = 0; 183 write_cr3(__sme_pa_nodebug(early_top_pgt)); 184 } 185 186 /* Create a new PMD entry */ 187 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd) 188 { 189 unsigned long physaddr = address - __PAGE_OFFSET; 190 pgdval_t pgd, *pgd_p; 191 p4dval_t p4d, *p4d_p; 192 pudval_t pud, *pud_p; 193 pmdval_t *pmd_p; 194 195 /* Invalid address or early pgt is done ? */ 196 if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt)) 197 return -1; 198 199 again: 200 pgd_p = &early_top_pgt[pgd_index(address)].pgd; 201 pgd = *pgd_p; 202 203 /* 204 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is 205 * critical -- __PAGE_OFFSET would point us back into the dynamic 206 * range and we might end up looping forever... 207 */ 208 if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 209 p4d_p = pgd_p; 210 else if (pgd) 211 p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); 212 else { 213 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { 214 reset_early_page_tables(); 215 goto again; 216 } 217 218 p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++]; 219 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); 220 *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; 221 } 222 p4d_p += p4d_index(address); 223 p4d = *p4d_p; 224 225 if (p4d) 226 pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); 227 else { 228 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { 229 reset_early_page_tables(); 230 goto again; 231 } 232 233 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; 234 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 235 *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; 236 } 237 pud_p += pud_index(address); 238 pud = *pud_p; 239 240 if (pud) 241 pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); 242 else { 243 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { 244 reset_early_page_tables(); 245 goto again; 246 } 247 248 pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; 249 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); 250 *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; 251 } 252 pmd_p[pmd_index(address)] = pmd; 253 254 return 0; 255 } 256 257 int __init early_make_pgtable(unsigned long address) 258 { 259 unsigned long physaddr = address - __PAGE_OFFSET; 260 pmdval_t pmd; 261 262 pmd = (physaddr & PMD_MASK) + early_pmd_flags; 263 264 return __early_make_pgtable(address, pmd); 265 } 266 267 /* Don't add a printk in there. printk relies on the PDA which is not initialized 268 yet. */ 269 static void __init clear_bss(void) 270 { 271 memset(__bss_start, 0, 272 (unsigned long) __bss_stop - (unsigned long) __bss_start); 273 } 274 275 static unsigned long get_cmd_line_ptr(void) 276 { 277 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; 278 279 cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; 280 281 return cmd_line_ptr; 282 } 283 284 static void __init copy_bootdata(char *real_mode_data) 285 { 286 char * command_line; 287 unsigned long cmd_line_ptr; 288 289 /* 290 * If SME is active, this will create decrypted mappings of the 291 * boot data in advance of the copy operations. 292 */ 293 sme_map_bootdata(real_mode_data); 294 295 memcpy(&boot_params, real_mode_data, sizeof boot_params); 296 sanitize_boot_params(&boot_params); 297 cmd_line_ptr = get_cmd_line_ptr(); 298 if (cmd_line_ptr) { 299 command_line = __va(cmd_line_ptr); 300 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 301 } 302 303 /* 304 * The old boot data is no longer needed and won't be reserved, 305 * freeing up that memory for use by the system. If SME is active, 306 * we need to remove the mappings that were created so that the 307 * memory doesn't remain mapped as decrypted. 308 */ 309 sme_unmap_bootdata(real_mode_data); 310 } 311 312 asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) 313 { 314 /* 315 * Build-time sanity checks on the kernel image and module 316 * area mappings. (these are purely build-time and produce no code) 317 */ 318 BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map); 319 BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE); 320 BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE); 321 BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); 322 BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); 323 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 324 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 325 (__START_KERNEL & PGDIR_MASK))); 326 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 327 328 cr4_init_shadow(); 329 330 /* Kill off the identity-map trampoline */ 331 reset_early_page_tables(); 332 333 clear_bss(); 334 335 clear_page(init_top_pgt); 336 337 /* 338 * SME support may update early_pmd_flags to include the memory 339 * encryption mask, so it needs to be called before anything 340 * that may generate a page fault. 341 */ 342 sme_early_init(); 343 344 kasan_early_init(); 345 346 idt_setup_early_handler(); 347 348 copy_bootdata(__va(real_mode_data)); 349 350 /* 351 * Load microcode early on BSP. 352 */ 353 load_ucode_bsp(); 354 355 /* set init_top_pgt kernel high mapping*/ 356 init_top_pgt[511] = early_top_pgt[511]; 357 358 x86_64_start_reservations(real_mode_data); 359 } 360 361 void __init x86_64_start_reservations(char *real_mode_data) 362 { 363 /* version is always not zero if it is copied */ 364 if (!boot_params.hdr.version) 365 copy_bootdata(__va(real_mode_data)); 366 367 x86_early_init_platform_quirks(); 368 369 switch (boot_params.hdr.hardware_subarch) { 370 case X86_SUBARCH_INTEL_MID: 371 x86_intel_mid_early_setup(); 372 break; 373 default: 374 break; 375 } 376 377 start_kernel(); 378 } 379