1 /* 2 * kaslr.c 3 * 4 * This contains the routines needed to generate a reasonable level of 5 * entropy to choose a randomized kernel base address offset in support 6 * of Kernel Address Space Layout Randomization (KASLR). Additionally 7 * handles walking the physical memory maps (and tracking memory regions 8 * to avoid) in order to select a physical memory location that can 9 * contain the entire properly aligned running kernel image. 10 * 11 */ 12 #include "misc.h" 13 #include "error.h" 14 15 #include <generated/compile.h> 16 #include <linux/module.h> 17 #include <linux/uts.h> 18 #include <linux/utsname.h> 19 #include <generated/utsrelease.h> 20 21 /* Simplified build-specific string for starting entropy. */ 22 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 23 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 24 25 static unsigned long rotate_xor(unsigned long hash, const void *area, 26 size_t size) 27 { 28 size_t i; 29 unsigned long *ptr = (unsigned long *)area; 30 31 for (i = 0; i < size / sizeof(hash); i++) { 32 /* Rotate by odd number of bits and XOR. */ 33 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); 34 hash ^= ptr[i]; 35 } 36 37 return hash; 38 } 39 40 /* Attempt to create a simple but unpredictable starting entropy. */ 41 static unsigned long get_boot_seed(void) 42 { 43 unsigned long hash = 0; 44 45 hash = rotate_xor(hash, build_str, sizeof(build_str)); 46 hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); 47 48 return hash; 49 } 50 51 #define KASLR_COMPRESSED_BOOT 52 #include "../../lib/kaslr.c" 53 54 struct mem_vector { 55 unsigned long start; 56 unsigned long size; 57 }; 58 59 enum mem_avoid_index { 60 MEM_AVOID_ZO_RANGE = 0, 61 MEM_AVOID_INITRD, 62 MEM_AVOID_CMDLINE, 63 MEM_AVOID_BOOTPARAMS, 64 MEM_AVOID_MAX, 65 }; 66 67 static struct mem_vector mem_avoid[MEM_AVOID_MAX]; 68 69 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) 70 { 71 /* Item one is entirely before item two. */ 72 if (one->start + one->size <= two->start) 73 return false; 74 /* Item one is entirely after item two. */ 75 if (one->start >= two->start + two->size) 76 return false; 77 return true; 78 } 79 80 /* 81 * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). 82 * The mem_avoid array is used to store the ranges that need to be avoided 83 * when KASLR searches for an appropriate random address. We must avoid any 84 * regions that are unsafe to overlap with during decompression, and other 85 * things like the initrd, cmdline and boot_params. This comment seeks to 86 * explain mem_avoid as clearly as possible since incorrect mem_avoid 87 * memory ranges lead to really hard to debug boot failures. 88 * 89 * The initrd, cmdline, and boot_params are trivial to identify for 90 * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and 91 * MEM_AVOID_BOOTPARAMS respectively below. 92 * 93 * What is not obvious how to avoid is the range of memory that is used 94 * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover 95 * the compressed kernel (ZO) and its run space, which is used to extract 96 * the uncompressed kernel (VO) and relocs. 97 * 98 * ZO's full run size sits against the end of the decompression buffer, so 99 * we can calculate where text, data, bss, etc of ZO are positioned more 100 * easily. 101 * 102 * For additional background, the decompression calculations can be found 103 * in header.S, and the memory diagram is based on the one found in misc.c. 104 * 105 * The following conditions are already enforced by the image layouts and 106 * associated code: 107 * - input + input_size >= output + output_size 108 * - kernel_total_size <= init_size 109 * - kernel_total_size <= output_size (see Note below) 110 * - output + init_size >= output + output_size 111 * 112 * (Note that kernel_total_size and output_size have no fundamental 113 * relationship, but output_size is passed to choose_random_location 114 * as a maximum of the two. The diagram is showing a case where 115 * kernel_total_size is larger than output_size, but this case is 116 * handled by bumping output_size.) 117 * 118 * The above conditions can be illustrated by a diagram: 119 * 120 * 0 output input input+input_size output+init_size 121 * | | | | | 122 * | | | | | 123 * |-----|--------|--------|--------------|-----------|--|-------------| 124 * | | | 125 * | | | 126 * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size 127 * 128 * [output, output+init_size) is the entire memory range used for 129 * extracting the compressed image. 130 * 131 * [output, output+kernel_total_size) is the range needed for the 132 * uncompressed kernel (VO) and its run size (bss, brk, etc). 133 * 134 * [output, output+output_size) is VO plus relocs (i.e. the entire 135 * uncompressed payload contained by ZO). This is the area of the buffer 136 * written to during decompression. 137 * 138 * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case 139 * range of the copied ZO and decompression code. (i.e. the range 140 * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) 141 * 142 * [input, input+input_size) is the original copied compressed image (ZO) 143 * (i.e. it does not include its run size). This range must be avoided 144 * because it contains the data used for decompression. 145 * 146 * [input+input_size, output+init_size) is [_text, _end) for ZO. This 147 * range includes ZO's heap and stack, and must be avoided since it 148 * performs the decompression. 149 * 150 * Since the above two ranges need to be avoided and they are adjacent, 151 * they can be merged, resulting in: [input, output+init_size) which 152 * becomes the MEM_AVOID_ZO_RANGE below. 153 */ 154 static void mem_avoid_init(unsigned long input, unsigned long input_size, 155 unsigned long output) 156 { 157 unsigned long init_size = boot_params->hdr.init_size; 158 u64 initrd_start, initrd_size; 159 u64 cmd_line, cmd_line_size; 160 char *ptr; 161 162 /* 163 * Avoid the region that is unsafe to overlap during 164 * decompression. 165 */ 166 mem_avoid[MEM_AVOID_ZO_RANGE].start = input; 167 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; 168 add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, 169 mem_avoid[MEM_AVOID_ZO_RANGE].size); 170 171 /* Avoid initrd. */ 172 initrd_start = (u64)boot_params->ext_ramdisk_image << 32; 173 initrd_start |= boot_params->hdr.ramdisk_image; 174 initrd_size = (u64)boot_params->ext_ramdisk_size << 32; 175 initrd_size |= boot_params->hdr.ramdisk_size; 176 mem_avoid[MEM_AVOID_INITRD].start = initrd_start; 177 mem_avoid[MEM_AVOID_INITRD].size = initrd_size; 178 /* No need to set mapping for initrd, it will be handled in VO. */ 179 180 /* Avoid kernel command line. */ 181 cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; 182 cmd_line |= boot_params->hdr.cmd_line_ptr; 183 /* Calculate size of cmd_line. */ 184 ptr = (char *)(unsigned long)cmd_line; 185 for (cmd_line_size = 0; ptr[cmd_line_size++]; ) 186 ; 187 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; 188 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; 189 add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, 190 mem_avoid[MEM_AVOID_CMDLINE].size); 191 192 /* Avoid boot parameters. */ 193 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; 194 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); 195 add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, 196 mem_avoid[MEM_AVOID_BOOTPARAMS].size); 197 198 /* We don't need to set a mapping for setup_data. */ 199 200 #ifdef CONFIG_X86_VERBOSE_BOOTUP 201 /* Make sure video RAM can be used. */ 202 add_identity_map(0, PMD_SIZE); 203 #endif 204 } 205 206 /* 207 * Does this memory vector overlap a known avoided area? If so, record the 208 * overlap region with the lowest address. 209 */ 210 static bool mem_avoid_overlap(struct mem_vector *img, 211 struct mem_vector *overlap) 212 { 213 int i; 214 struct setup_data *ptr; 215 unsigned long earliest = img->start + img->size; 216 bool is_overlapping = false; 217 218 for (i = 0; i < MEM_AVOID_MAX; i++) { 219 if (mem_overlaps(img, &mem_avoid[i]) && 220 mem_avoid[i].start < earliest) { 221 *overlap = mem_avoid[i]; 222 earliest = overlap->start; 223 is_overlapping = true; 224 } 225 } 226 227 /* Avoid all entries in the setup_data linked list. */ 228 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; 229 while (ptr) { 230 struct mem_vector avoid; 231 232 avoid.start = (unsigned long)ptr; 233 avoid.size = sizeof(*ptr) + ptr->len; 234 235 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 236 *overlap = avoid; 237 earliest = overlap->start; 238 is_overlapping = true; 239 } 240 241 ptr = (struct setup_data *)(unsigned long)ptr->next; 242 } 243 244 return is_overlapping; 245 } 246 247 struct slot_area { 248 unsigned long addr; 249 int num; 250 }; 251 252 #define MAX_SLOT_AREA 100 253 254 static struct slot_area slot_areas[MAX_SLOT_AREA]; 255 256 static unsigned long slot_max; 257 258 static unsigned long slot_area_index; 259 260 static void store_slot_info(struct mem_vector *region, unsigned long image_size) 261 { 262 struct slot_area slot_area; 263 264 if (slot_area_index == MAX_SLOT_AREA) 265 return; 266 267 slot_area.addr = region->start; 268 slot_area.num = (region->size - image_size) / 269 CONFIG_PHYSICAL_ALIGN + 1; 270 271 if (slot_area.num > 0) { 272 slot_areas[slot_area_index++] = slot_area; 273 slot_max += slot_area.num; 274 } 275 } 276 277 static unsigned long slots_fetch_random(void) 278 { 279 unsigned long slot; 280 int i; 281 282 /* Handle case of no slots stored. */ 283 if (slot_max == 0) 284 return 0; 285 286 slot = kaslr_get_random_long("Physical") % slot_max; 287 288 for (i = 0; i < slot_area_index; i++) { 289 if (slot >= slot_areas[i].num) { 290 slot -= slot_areas[i].num; 291 continue; 292 } 293 return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN; 294 } 295 296 if (i == slot_area_index) 297 debug_putstr("slots_fetch_random() failed!?\n"); 298 return 0; 299 } 300 301 static void process_e820_entry(struct e820entry *entry, 302 unsigned long minimum, 303 unsigned long image_size) 304 { 305 struct mem_vector region, overlap; 306 struct slot_area slot_area; 307 unsigned long start_orig; 308 309 /* Skip non-RAM entries. */ 310 if (entry->type != E820_RAM) 311 return; 312 313 /* On 32-bit, ignore entries entirely above our maximum. */ 314 if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE) 315 return; 316 317 /* Ignore entries entirely below our minimum. */ 318 if (entry->addr + entry->size < minimum) 319 return; 320 321 region.start = entry->addr; 322 region.size = entry->size; 323 324 /* Give up if slot area array is full. */ 325 while (slot_area_index < MAX_SLOT_AREA) { 326 start_orig = region.start; 327 328 /* Potentially raise address to minimum location. */ 329 if (region.start < minimum) 330 region.start = minimum; 331 332 /* Potentially raise address to meet alignment needs. */ 333 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); 334 335 /* Did we raise the address above this e820 region? */ 336 if (region.start > entry->addr + entry->size) 337 return; 338 339 /* Reduce size by any delta from the original address. */ 340 region.size -= region.start - start_orig; 341 342 /* On 32-bit, reduce region size to fit within max size. */ 343 if (IS_ENABLED(CONFIG_X86_32) && 344 region.start + region.size > KERNEL_IMAGE_SIZE) 345 region.size = KERNEL_IMAGE_SIZE - region.start; 346 347 /* Return if region can't contain decompressed kernel */ 348 if (region.size < image_size) 349 return; 350 351 /* If nothing overlaps, store the region and return. */ 352 if (!mem_avoid_overlap(®ion, &overlap)) { 353 store_slot_info(®ion, image_size); 354 return; 355 } 356 357 /* Store beginning of region if holds at least image_size. */ 358 if (overlap.start > region.start + image_size) { 359 struct mem_vector beginning; 360 361 beginning.start = region.start; 362 beginning.size = overlap.start - region.start; 363 store_slot_info(&beginning, image_size); 364 } 365 366 /* Return if overlap extends to or past end of region. */ 367 if (overlap.start + overlap.size >= region.start + region.size) 368 return; 369 370 /* Clip off the overlapping region and start over. */ 371 region.size -= overlap.start - region.start + overlap.size; 372 region.start = overlap.start + overlap.size; 373 } 374 } 375 376 static unsigned long find_random_phys_addr(unsigned long minimum, 377 unsigned long image_size) 378 { 379 int i; 380 unsigned long addr; 381 382 /* Make sure minimum is aligned. */ 383 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); 384 385 /* Verify potential e820 positions, appending to slots list. */ 386 for (i = 0; i < boot_params->e820_entries; i++) { 387 process_e820_entry(&boot_params->e820_map[i], minimum, 388 image_size); 389 if (slot_area_index == MAX_SLOT_AREA) { 390 debug_putstr("Aborted e820 scan (slot_areas full)!\n"); 391 break; 392 } 393 } 394 395 return slots_fetch_random(); 396 } 397 398 static unsigned long find_random_virt_addr(unsigned long minimum, 399 unsigned long image_size) 400 { 401 unsigned long slots, random_addr; 402 403 /* Make sure minimum is aligned. */ 404 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); 405 /* Align image_size for easy slot calculations. */ 406 image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); 407 408 /* 409 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots 410 * that can hold image_size within the range of minimum to 411 * KERNEL_IMAGE_SIZE? 412 */ 413 slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / 414 CONFIG_PHYSICAL_ALIGN + 1; 415 416 random_addr = kaslr_get_random_long("Virtual") % slots; 417 418 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; 419 } 420 421 /* 422 * Since this function examines addresses much more numerically, 423 * it takes the input and output pointers as 'unsigned long'. 424 */ 425 void choose_random_location(unsigned long input, 426 unsigned long input_size, 427 unsigned long *output, 428 unsigned long output_size, 429 unsigned long *virt_addr) 430 { 431 unsigned long random_addr, min_addr; 432 433 /* By default, keep output position unchanged. */ 434 *virt_addr = *output; 435 436 if (cmdline_find_option_bool("nokaslr")) { 437 warn("KASLR disabled: 'nokaslr' on cmdline."); 438 return; 439 } 440 441 boot_params->hdr.loadflags |= KASLR_FLAG; 442 443 /* Prepare to add new identity pagetables on demand. */ 444 initialize_identity_maps(); 445 446 /* Record the various known unsafe memory ranges. */ 447 mem_avoid_init(input, input_size, *output); 448 449 /* 450 * Low end of the randomization range should be the 451 * smaller of 512M or the initial kernel image 452 * location: 453 */ 454 min_addr = min(*output, 512UL << 20); 455 456 /* Walk e820 and find a random address. */ 457 random_addr = find_random_phys_addr(min_addr, output_size); 458 if (!random_addr) { 459 warn("KASLR disabled: could not find suitable E820 region!"); 460 } else { 461 /* Update the new physical address location. */ 462 if (*output != random_addr) { 463 add_identity_map(random_addr, output_size); 464 *output = random_addr; 465 } 466 } 467 468 /* This actually loads the identity pagetable on x86_64. */ 469 finalize_identity_maps(); 470 471 /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ 472 if (IS_ENABLED(CONFIG_X86_64)) 473 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); 474 *virt_addr = random_addr; 475 } 476