1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file implements KASLR memory randomization for x86_64. It randomizes 4 * the virtual address space of kernel memory regions (physical memory 5 * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates 6 * exploits relying on predictable kernel addresses. 7 * 8 * Entropy is generated using the KASLR early boot functions now shared in 9 * the lib directory (originally written by Kees Cook). Randomization is 10 * done on PGD & P4D/PUD page table levels to increase possible addresses. 11 * The physical memory mapping code was adapted to support P4D/PUD level 12 * virtual addresses. This implementation on the best configuration provides 13 * 30,000 possible virtual addresses in average for each memory region. 14 * An additional low memory page is used to ensure each CPU can start with 15 * a PGD aligned virtual address (for realmode). 16 * 17 * The order of each memory region is not changed. The feature looks at 18 * the available space for the regions based on different configuration 19 * options and randomizes the base and space between each. The size of the 20 * physical memory mapping is the available physical memory. 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/init.h> 25 #include <linux/random.h> 26 27 #include <asm/pgalloc.h> 28 #include <asm/pgtable.h> 29 #include <asm/setup.h> 30 #include <asm/kaslr.h> 31 32 #include "mm_internal.h" 33 34 #define TB_SHIFT 40 35 36 /* 37 * Virtual address start and end range for randomization. 38 * 39 * The end address could depend on more configuration options to make the 40 * highest amount of space for randomization available, but that's too hard 41 * to keep straight and caused issues already. 42 */ 43 static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; 44 static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; 45 46 /* Default values */ 47 unsigned long page_offset_base = __PAGE_OFFSET_BASE; 48 EXPORT_SYMBOL(page_offset_base); 49 unsigned long vmalloc_base = __VMALLOC_BASE; 50 EXPORT_SYMBOL(vmalloc_base); 51 unsigned long vmemmap_base = __VMEMMAP_BASE; 52 EXPORT_SYMBOL(vmemmap_base); 53 54 /* 55 * Memory regions randomized by KASLR (except modules that use a separate logic 56 * earlier during boot). The list is ordered based on virtual addresses. This 57 * order is kept after randomization. 58 */ 59 static __initdata struct kaslr_memory_region { 60 unsigned long *base; 61 unsigned long size_tb; 62 } kaslr_regions[] = { 63 { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, 64 { &vmalloc_base, VMALLOC_SIZE_TB }, 65 { &vmemmap_base, 1 }, 66 }; 67 68 /* Get size in bytes used by the memory region */ 69 static inline unsigned long get_padding(struct kaslr_memory_region *region) 70 { 71 return (region->size_tb << TB_SHIFT); 72 } 73 74 /* 75 * Apply no randomization if KASLR was disabled at boot or if KASAN 76 * is enabled. KASAN shadow mappings rely on regions being PGD aligned. 77 */ 78 static inline bool kaslr_memory_enabled(void) 79 { 80 return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); 81 } 82 83 /* Initialize base and padding for each memory region randomized with KASLR */ 84 void __init kernel_randomize_memory(void) 85 { 86 size_t i; 87 unsigned long vaddr = vaddr_start; 88 unsigned long rand, memory_tb; 89 struct rnd_state rand_state; 90 unsigned long remain_entropy; 91 92 /* 93 * These BUILD_BUG_ON checks ensure the memory layout is consistent 94 * with the vaddr_start/vaddr_end variables. These checks are very 95 * limited.... 96 */ 97 BUILD_BUG_ON(vaddr_start >= vaddr_end); 98 BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); 99 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); 100 101 if (!kaslr_memory_enabled()) 102 return; 103 104 /* 105 * Update Physical memory mapping to available and 106 * add padding if needed (especially for memory hotplug support). 107 */ 108 BUG_ON(kaslr_regions[0].base != &page_offset_base); 109 memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) + 110 CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; 111 112 /* Adapt phyiscal memory region size based on available memory */ 113 if (memory_tb < kaslr_regions[0].size_tb) 114 kaslr_regions[0].size_tb = memory_tb; 115 116 /* Calculate entropy available between regions */ 117 remain_entropy = vaddr_end - vaddr_start; 118 for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) 119 remain_entropy -= get_padding(&kaslr_regions[i]); 120 121 prandom_seed_state(&rand_state, kaslr_get_random_long("Memory")); 122 123 for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) { 124 unsigned long entropy; 125 126 /* 127 * Select a random virtual address using the extra entropy 128 * available. 129 */ 130 entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); 131 prandom_bytes_state(&rand_state, &rand, sizeof(rand)); 132 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 133 entropy = (rand % (entropy + 1)) & P4D_MASK; 134 else 135 entropy = (rand % (entropy + 1)) & PUD_MASK; 136 vaddr += entropy; 137 *kaslr_regions[i].base = vaddr; 138 139 /* 140 * Jump the region and add a minimum padding based on 141 * randomization alignment. 142 */ 143 vaddr += get_padding(&kaslr_regions[i]); 144 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 145 vaddr = round_up(vaddr + 1, P4D_SIZE); 146 else 147 vaddr = round_up(vaddr + 1, PUD_SIZE); 148 remain_entropy -= entropy; 149 } 150 } 151 152 static void __meminit init_trampoline_pud(void) 153 { 154 unsigned long paddr, paddr_next; 155 pgd_t *pgd; 156 pud_t *pud_page, *pud_page_tramp; 157 int i; 158 159 pud_page_tramp = alloc_low_page(); 160 161 paddr = 0; 162 pgd = pgd_offset_k((unsigned long)__va(paddr)); 163 pud_page = (pud_t *) pgd_page_vaddr(*pgd); 164 165 for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { 166 pud_t *pud, *pud_tramp; 167 unsigned long vaddr = (unsigned long)__va(paddr); 168 169 pud_tramp = pud_page_tramp + pud_index(paddr); 170 pud = pud_page + pud_index(vaddr); 171 paddr_next = (paddr & PUD_MASK) + PUD_SIZE; 172 173 *pud_tramp = *pud; 174 } 175 176 set_pgd(&trampoline_pgd_entry, 177 __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); 178 } 179 180 static void __meminit init_trampoline_p4d(void) 181 { 182 unsigned long paddr, paddr_next; 183 pgd_t *pgd; 184 p4d_t *p4d_page, *p4d_page_tramp; 185 int i; 186 187 p4d_page_tramp = alloc_low_page(); 188 189 paddr = 0; 190 pgd = pgd_offset_k((unsigned long)__va(paddr)); 191 p4d_page = (p4d_t *) pgd_page_vaddr(*pgd); 192 193 for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) { 194 p4d_t *p4d, *p4d_tramp; 195 unsigned long vaddr = (unsigned long)__va(paddr); 196 197 p4d_tramp = p4d_page_tramp + p4d_index(paddr); 198 p4d = p4d_page + p4d_index(vaddr); 199 paddr_next = (paddr & P4D_MASK) + P4D_SIZE; 200 201 *p4d_tramp = *p4d; 202 } 203 204 set_pgd(&trampoline_pgd_entry, 205 __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); 206 } 207 208 /* 209 * Create PGD aligned trampoline table to allow real mode initialization 210 * of additional CPUs. Consume only 1 low memory page. 211 */ 212 void __meminit init_trampoline(void) 213 { 214 215 if (!kaslr_memory_enabled()) { 216 init_trampoline_default(); 217 return; 218 } 219 220 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 221 init_trampoline_p4d(); 222 else 223 init_trampoline_pud(); 224 } 225