1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * kaslr.c 4 * 5 * This contains the routines needed to generate a reasonable level of 6 * entropy to choose a randomized kernel base address offset in support 7 * of Kernel Address Space Layout Randomization (KASLR). Additionally 8 * handles walking the physical memory maps (and tracking memory regions 9 * to avoid) in order to select a physical memory location that can 10 * contain the entire properly aligned running kernel image. 11 * 12 */ 13 14 /* 15 * isspace() in linux/ctype.h is expected by next_args() to filter 16 * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, 17 * since isdigit() is implemented in both of them. Hence disable it 18 * here. 19 */ 20 #define BOOT_CTYPE_H 21 22 #include "misc.h" 23 #include "error.h" 24 #include "../string.h" 25 26 #include <generated/compile.h> 27 #include <linux/module.h> 28 #include <linux/uts.h> 29 #include <linux/utsname.h> 30 #include <linux/ctype.h> 31 #include <linux/efi.h> 32 #include <generated/utsrelease.h> 33 #include <asm/efi.h> 34 35 /* Macros used by the included decompressor code below. */ 36 #define STATIC 37 #include <linux/decompress/mm.h> 38 39 #ifdef CONFIG_X86_5LEVEL 40 unsigned int __pgtable_l5_enabled; 41 unsigned int pgdir_shift __ro_after_init = 39; 42 unsigned int ptrs_per_p4d __ro_after_init = 1; 43 #endif 44 45 extern unsigned long get_cmd_line_ptr(void); 46 47 /* Used by PAGE_KERN* macros: */ 48 pteval_t __default_kernel_pte_mask __read_mostly = ~0; 49 50 /* Simplified build-specific string for starting entropy. */ 51 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 52 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 53 54 static unsigned long rotate_xor(unsigned long hash, const void *area, 55 size_t size) 56 { 57 size_t i; 58 unsigned long *ptr = (unsigned long *)area; 59 60 for (i = 0; i < size / sizeof(hash); i++) { 61 /* Rotate by odd number of bits and XOR. */ 62 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); 63 hash ^= ptr[i]; 64 } 65 66 return hash; 67 } 68 69 /* Attempt to create a simple but unpredictable starting entropy. */ 70 static unsigned long get_boot_seed(void) 71 { 72 unsigned long hash = 0; 73 74 hash = rotate_xor(hash, build_str, sizeof(build_str)); 75 hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); 76 77 return hash; 78 } 79 80 #define KASLR_COMPRESSED_BOOT 81 #include "../../lib/kaslr.c" 82 83 84 /* Only supporting at most 4 unusable memmap regions with kaslr */ 85 #define MAX_MEMMAP_REGIONS 4 86 87 static bool memmap_too_large; 88 89 90 /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ 91 static unsigned long long mem_limit = ULLONG_MAX; 92 93 /* Number of immovable memory regions */ 94 static int num_immovable_mem; 95 96 enum mem_avoid_index { 97 MEM_AVOID_ZO_RANGE = 0, 98 MEM_AVOID_INITRD, 99 MEM_AVOID_CMDLINE, 100 MEM_AVOID_BOOTPARAMS, 101 MEM_AVOID_MEMMAP_BEGIN, 102 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, 103 MEM_AVOID_MAX, 104 }; 105 106 static struct mem_vector mem_avoid[MEM_AVOID_MAX]; 107 108 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) 109 { 110 /* Item one is entirely before item two. */ 111 if (one->start + one->size <= two->start) 112 return false; 113 /* Item one is entirely after item two. */ 114 if (one->start >= two->start + two->size) 115 return false; 116 return true; 117 } 118 119 char *skip_spaces(const char *str) 120 { 121 while (isspace(*str)) 122 ++str; 123 return (char *)str; 124 } 125 #include "../../../../lib/ctype.c" 126 #include "../../../../lib/cmdline.c" 127 128 enum parse_mode { 129 PARSE_MEMMAP, 130 PARSE_EFI, 131 }; 132 133 static int 134 parse_memmap(char *p, unsigned long long *start, unsigned long long *size, 135 enum parse_mode mode) 136 { 137 char *oldp; 138 139 if (!p) 140 return -EINVAL; 141 142 /* We don't care about this option here */ 143 if (!strncmp(p, "exactmap", 8)) 144 return -EINVAL; 145 146 oldp = p; 147 *size = memparse(p, &p); 148 if (p == oldp) 149 return -EINVAL; 150 151 switch (*p) { 152 case '#': 153 case '$': 154 case '!': 155 *start = memparse(p + 1, &p); 156 return 0; 157 case '@': 158 if (mode == PARSE_MEMMAP) { 159 /* 160 * memmap=nn@ss specifies usable region, should 161 * be skipped 162 */ 163 *size = 0; 164 } else { 165 unsigned long long flags; 166 167 /* 168 * efi_fake_mem=nn@ss:attr the attr specifies 169 * flags that might imply a soft-reservation. 170 */ 171 *start = memparse(p + 1, &p); 172 if (p && *p == ':') { 173 p++; 174 if (kstrtoull(p, 0, &flags) < 0) 175 *size = 0; 176 else if (flags & EFI_MEMORY_SP) 177 return 0; 178 } 179 *size = 0; 180 } 181 fallthrough; 182 default: 183 /* 184 * If w/o offset, only size specified, memmap=nn[KMG] has the 185 * same behaviour as mem=nn[KMG]. It limits the max address 186 * system can use. Region above the limit should be avoided. 187 */ 188 *start = 0; 189 return 0; 190 } 191 192 return -EINVAL; 193 } 194 195 static void mem_avoid_memmap(enum parse_mode mode, char *str) 196 { 197 static int i; 198 199 if (i >= MAX_MEMMAP_REGIONS) 200 return; 201 202 while (str && (i < MAX_MEMMAP_REGIONS)) { 203 int rc; 204 unsigned long long start, size; 205 char *k = strchr(str, ','); 206 207 if (k) 208 *k++ = 0; 209 210 rc = parse_memmap(str, &start, &size, mode); 211 if (rc < 0) 212 break; 213 str = k; 214 215 if (start == 0) { 216 /* Store the specified memory limit if size > 0 */ 217 if (size > 0) 218 mem_limit = size; 219 220 continue; 221 } 222 223 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; 224 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; 225 i++; 226 } 227 228 /* More than 4 memmaps, fail kaslr */ 229 if ((i >= MAX_MEMMAP_REGIONS) && str) 230 memmap_too_large = true; 231 } 232 233 /* Store the number of 1GB huge pages which users specified: */ 234 static unsigned long max_gb_huge_pages; 235 236 static void parse_gb_huge_pages(char *param, char *val) 237 { 238 static bool gbpage_sz; 239 char *p; 240 241 if (!strcmp(param, "hugepagesz")) { 242 p = val; 243 if (memparse(p, &p) != PUD_SIZE) { 244 gbpage_sz = false; 245 return; 246 } 247 248 if (gbpage_sz) 249 warn("Repeatedly set hugeTLB page size of 1G!\n"); 250 gbpage_sz = true; 251 return; 252 } 253 254 if (!strcmp(param, "hugepages") && gbpage_sz) { 255 p = val; 256 max_gb_huge_pages = simple_strtoull(p, &p, 0); 257 return; 258 } 259 } 260 261 static void handle_mem_options(void) 262 { 263 char *args = (char *)get_cmd_line_ptr(); 264 size_t len = strlen((char *)args); 265 char *tmp_cmdline; 266 char *param, *val; 267 u64 mem_size; 268 269 if (!strstr(args, "memmap=") && !strstr(args, "mem=") && 270 !strstr(args, "hugepages")) 271 return; 272 273 tmp_cmdline = malloc(len + 1); 274 if (!tmp_cmdline) 275 error("Failed to allocate space for tmp_cmdline"); 276 277 memcpy(tmp_cmdline, args, len); 278 tmp_cmdline[len] = 0; 279 args = tmp_cmdline; 280 281 /* Chew leading spaces */ 282 args = skip_spaces(args); 283 284 while (*args) { 285 args = next_arg(args, ¶m, &val); 286 /* Stop at -- */ 287 if (!val && strcmp(param, "--") == 0) { 288 warn("Only '--' specified in cmdline"); 289 goto out; 290 } 291 292 if (!strcmp(param, "memmap")) { 293 mem_avoid_memmap(PARSE_MEMMAP, val); 294 } else if (strstr(param, "hugepages")) { 295 parse_gb_huge_pages(param, val); 296 } else if (!strcmp(param, "mem")) { 297 char *p = val; 298 299 if (!strcmp(p, "nopentium")) 300 continue; 301 mem_size = memparse(p, &p); 302 if (mem_size == 0) 303 goto out; 304 305 mem_limit = mem_size; 306 } else if (!strcmp(param, "efi_fake_mem")) { 307 mem_avoid_memmap(PARSE_EFI, val); 308 } 309 } 310 311 out: 312 free(tmp_cmdline); 313 return; 314 } 315 316 /* 317 * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). 318 * The mem_avoid array is used to store the ranges that need to be avoided 319 * when KASLR searches for an appropriate random address. We must avoid any 320 * regions that are unsafe to overlap with during decompression, and other 321 * things like the initrd, cmdline and boot_params. This comment seeks to 322 * explain mem_avoid as clearly as possible since incorrect mem_avoid 323 * memory ranges lead to really hard to debug boot failures. 324 * 325 * The initrd, cmdline, and boot_params are trivial to identify for 326 * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and 327 * MEM_AVOID_BOOTPARAMS respectively below. 328 * 329 * What is not obvious how to avoid is the range of memory that is used 330 * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover 331 * the compressed kernel (ZO) and its run space, which is used to extract 332 * the uncompressed kernel (VO) and relocs. 333 * 334 * ZO's full run size sits against the end of the decompression buffer, so 335 * we can calculate where text, data, bss, etc of ZO are positioned more 336 * easily. 337 * 338 * For additional background, the decompression calculations can be found 339 * in header.S, and the memory diagram is based on the one found in misc.c. 340 * 341 * The following conditions are already enforced by the image layouts and 342 * associated code: 343 * - input + input_size >= output + output_size 344 * - kernel_total_size <= init_size 345 * - kernel_total_size <= output_size (see Note below) 346 * - output + init_size >= output + output_size 347 * 348 * (Note that kernel_total_size and output_size have no fundamental 349 * relationship, but output_size is passed to choose_random_location 350 * as a maximum of the two. The diagram is showing a case where 351 * kernel_total_size is larger than output_size, but this case is 352 * handled by bumping output_size.) 353 * 354 * The above conditions can be illustrated by a diagram: 355 * 356 * 0 output input input+input_size output+init_size 357 * | | | | | 358 * | | | | | 359 * |-----|--------|--------|--------------|-----------|--|-------------| 360 * | | | 361 * | | | 362 * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size 363 * 364 * [output, output+init_size) is the entire memory range used for 365 * extracting the compressed image. 366 * 367 * [output, output+kernel_total_size) is the range needed for the 368 * uncompressed kernel (VO) and its run size (bss, brk, etc). 369 * 370 * [output, output+output_size) is VO plus relocs (i.e. the entire 371 * uncompressed payload contained by ZO). This is the area of the buffer 372 * written to during decompression. 373 * 374 * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case 375 * range of the copied ZO and decompression code. (i.e. the range 376 * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) 377 * 378 * [input, input+input_size) is the original copied compressed image (ZO) 379 * (i.e. it does not include its run size). This range must be avoided 380 * because it contains the data used for decompression. 381 * 382 * [input+input_size, output+init_size) is [_text, _end) for ZO. This 383 * range includes ZO's heap and stack, and must be avoided since it 384 * performs the decompression. 385 * 386 * Since the above two ranges need to be avoided and they are adjacent, 387 * they can be merged, resulting in: [input, output+init_size) which 388 * becomes the MEM_AVOID_ZO_RANGE below. 389 */ 390 static void mem_avoid_init(unsigned long input, unsigned long input_size, 391 unsigned long output) 392 { 393 unsigned long init_size = boot_params->hdr.init_size; 394 u64 initrd_start, initrd_size; 395 u64 cmd_line, cmd_line_size; 396 char *ptr; 397 398 /* 399 * Avoid the region that is unsafe to overlap during 400 * decompression. 401 */ 402 mem_avoid[MEM_AVOID_ZO_RANGE].start = input; 403 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; 404 add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, 405 mem_avoid[MEM_AVOID_ZO_RANGE].size); 406 407 /* Avoid initrd. */ 408 initrd_start = (u64)boot_params->ext_ramdisk_image << 32; 409 initrd_start |= boot_params->hdr.ramdisk_image; 410 initrd_size = (u64)boot_params->ext_ramdisk_size << 32; 411 initrd_size |= boot_params->hdr.ramdisk_size; 412 mem_avoid[MEM_AVOID_INITRD].start = initrd_start; 413 mem_avoid[MEM_AVOID_INITRD].size = initrd_size; 414 /* No need to set mapping for initrd, it will be handled in VO. */ 415 416 /* Avoid kernel command line. */ 417 cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; 418 cmd_line |= boot_params->hdr.cmd_line_ptr; 419 /* Calculate size of cmd_line. */ 420 ptr = (char *)(unsigned long)cmd_line; 421 for (cmd_line_size = 0; ptr[cmd_line_size++];) 422 ; 423 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; 424 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; 425 add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, 426 mem_avoid[MEM_AVOID_CMDLINE].size); 427 428 /* Avoid boot parameters. */ 429 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; 430 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); 431 add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, 432 mem_avoid[MEM_AVOID_BOOTPARAMS].size); 433 434 /* We don't need to set a mapping for setup_data. */ 435 436 /* Mark the memmap regions we need to avoid */ 437 handle_mem_options(); 438 439 /* Enumerate the immovable memory regions */ 440 num_immovable_mem = count_immovable_mem_regions(); 441 442 #ifdef CONFIG_X86_VERBOSE_BOOTUP 443 /* Make sure video RAM can be used. */ 444 add_identity_map(0, PMD_SIZE); 445 #endif 446 } 447 448 /* 449 * Does this memory vector overlap a known avoided area? If so, record the 450 * overlap region with the lowest address. 451 */ 452 static bool mem_avoid_overlap(struct mem_vector *img, 453 struct mem_vector *overlap) 454 { 455 int i; 456 struct setup_data *ptr; 457 unsigned long earliest = img->start + img->size; 458 bool is_overlapping = false; 459 460 for (i = 0; i < MEM_AVOID_MAX; i++) { 461 if (mem_overlaps(img, &mem_avoid[i]) && 462 mem_avoid[i].start < earliest) { 463 *overlap = mem_avoid[i]; 464 earliest = overlap->start; 465 is_overlapping = true; 466 } 467 } 468 469 /* Avoid all entries in the setup_data linked list. */ 470 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; 471 while (ptr) { 472 struct mem_vector avoid; 473 474 avoid.start = (unsigned long)ptr; 475 avoid.size = sizeof(*ptr) + ptr->len; 476 477 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 478 *overlap = avoid; 479 earliest = overlap->start; 480 is_overlapping = true; 481 } 482 483 if (ptr->type == SETUP_INDIRECT && 484 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { 485 avoid.start = ((struct setup_indirect *)ptr->data)->addr; 486 avoid.size = ((struct setup_indirect *)ptr->data)->len; 487 488 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 489 *overlap = avoid; 490 earliest = overlap->start; 491 is_overlapping = true; 492 } 493 } 494 495 ptr = (struct setup_data *)(unsigned long)ptr->next; 496 } 497 498 return is_overlapping; 499 } 500 501 struct slot_area { 502 unsigned long addr; 503 int num; 504 }; 505 506 #define MAX_SLOT_AREA 100 507 508 static struct slot_area slot_areas[MAX_SLOT_AREA]; 509 510 static unsigned long slot_max; 511 512 static unsigned long slot_area_index; 513 514 static void store_slot_info(struct mem_vector *region, unsigned long image_size) 515 { 516 struct slot_area slot_area; 517 518 if (slot_area_index == MAX_SLOT_AREA) 519 return; 520 521 slot_area.addr = region->start; 522 slot_area.num = (region->size - image_size) / 523 CONFIG_PHYSICAL_ALIGN + 1; 524 525 if (slot_area.num > 0) { 526 slot_areas[slot_area_index++] = slot_area; 527 slot_max += slot_area.num; 528 } 529 } 530 531 /* 532 * Skip as many 1GB huge pages as possible in the passed region 533 * according to the number which users specified: 534 */ 535 static void 536 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size) 537 { 538 unsigned long addr, size = 0; 539 struct mem_vector tmp; 540 int i = 0; 541 542 if (!max_gb_huge_pages) { 543 store_slot_info(region, image_size); 544 return; 545 } 546 547 addr = ALIGN(region->start, PUD_SIZE); 548 /* Did we raise the address above the passed in memory entry? */ 549 if (addr < region->start + region->size) 550 size = region->size - (addr - region->start); 551 552 /* Check how many 1GB huge pages can be filtered out: */ 553 while (size > PUD_SIZE && max_gb_huge_pages) { 554 size -= PUD_SIZE; 555 max_gb_huge_pages--; 556 i++; 557 } 558 559 /* No good 1GB huge pages found: */ 560 if (!i) { 561 store_slot_info(region, image_size); 562 return; 563 } 564 565 /* 566 * Skip those 'i'*1GB good huge pages, and continue checking and 567 * processing the remaining head or tail part of the passed region 568 * if available. 569 */ 570 571 if (addr >= region->start + image_size) { 572 tmp.start = region->start; 573 tmp.size = addr - region->start; 574 store_slot_info(&tmp, image_size); 575 } 576 577 size = region->size - (addr - region->start) - i * PUD_SIZE; 578 if (size >= image_size) { 579 tmp.start = addr + i * PUD_SIZE; 580 tmp.size = size; 581 store_slot_info(&tmp, image_size); 582 } 583 } 584 585 static unsigned long slots_fetch_random(void) 586 { 587 unsigned long slot; 588 int i; 589 590 /* Handle case of no slots stored. */ 591 if (slot_max == 0) 592 return 0; 593 594 slot = kaslr_get_random_long("Physical") % slot_max; 595 596 for (i = 0; i < slot_area_index; i++) { 597 if (slot >= slot_areas[i].num) { 598 slot -= slot_areas[i].num; 599 continue; 600 } 601 return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN; 602 } 603 604 if (i == slot_area_index) 605 debug_putstr("slots_fetch_random() failed!?\n"); 606 return 0; 607 } 608 609 static void __process_mem_region(struct mem_vector *entry, 610 unsigned long minimum, 611 unsigned long image_size) 612 { 613 struct mem_vector region, overlap; 614 unsigned long start_orig, end; 615 struct mem_vector cur_entry; 616 617 /* On 32-bit, ignore entries entirely above our maximum. */ 618 if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE) 619 return; 620 621 /* Ignore entries entirely below our minimum. */ 622 if (entry->start + entry->size < minimum) 623 return; 624 625 /* Ignore entries above memory limit */ 626 end = min(entry->size + entry->start, mem_limit); 627 if (entry->start >= end) 628 return; 629 cur_entry.start = entry->start; 630 cur_entry.size = end - entry->start; 631 632 region.start = cur_entry.start; 633 region.size = cur_entry.size; 634 635 /* Give up if slot area array is full. */ 636 while (slot_area_index < MAX_SLOT_AREA) { 637 start_orig = region.start; 638 639 /* Potentially raise address to minimum location. */ 640 if (region.start < minimum) 641 region.start = minimum; 642 643 /* Potentially raise address to meet alignment needs. */ 644 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); 645 646 /* Did we raise the address above the passed in memory entry? */ 647 if (region.start > cur_entry.start + cur_entry.size) 648 return; 649 650 /* Reduce size by any delta from the original address. */ 651 region.size -= region.start - start_orig; 652 653 /* On 32-bit, reduce region size to fit within max size. */ 654 if (IS_ENABLED(CONFIG_X86_32) && 655 region.start + region.size > KERNEL_IMAGE_SIZE) 656 region.size = KERNEL_IMAGE_SIZE - region.start; 657 658 /* Return if region can't contain decompressed kernel */ 659 if (region.size < image_size) 660 return; 661 662 /* If nothing overlaps, store the region and return. */ 663 if (!mem_avoid_overlap(®ion, &overlap)) { 664 process_gb_huge_pages(®ion, image_size); 665 return; 666 } 667 668 /* Store beginning of region if holds at least image_size. */ 669 if (overlap.start > region.start + image_size) { 670 struct mem_vector beginning; 671 672 beginning.start = region.start; 673 beginning.size = overlap.start - region.start; 674 process_gb_huge_pages(&beginning, image_size); 675 } 676 677 /* Return if overlap extends to or past end of region. */ 678 if (overlap.start + overlap.size >= region.start + region.size) 679 return; 680 681 /* Clip off the overlapping region and start over. */ 682 region.size -= overlap.start - region.start + overlap.size; 683 region.start = overlap.start + overlap.size; 684 } 685 } 686 687 static bool process_mem_region(struct mem_vector *region, 688 unsigned long long minimum, 689 unsigned long long image_size) 690 { 691 int i; 692 /* 693 * If no immovable memory found, or MEMORY_HOTREMOVE disabled, 694 * use @region directly. 695 */ 696 if (!num_immovable_mem) { 697 __process_mem_region(region, minimum, image_size); 698 699 if (slot_area_index == MAX_SLOT_AREA) { 700 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n"); 701 return 1; 702 } 703 return 0; 704 } 705 706 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) 707 /* 708 * If immovable memory found, filter the intersection between 709 * immovable memory and @region. 710 */ 711 for (i = 0; i < num_immovable_mem; i++) { 712 unsigned long long start, end, entry_end, region_end; 713 struct mem_vector entry; 714 715 if (!mem_overlaps(region, &immovable_mem[i])) 716 continue; 717 718 start = immovable_mem[i].start; 719 end = start + immovable_mem[i].size; 720 region_end = region->start + region->size; 721 722 entry.start = clamp(region->start, start, end); 723 entry_end = clamp(region_end, start, end); 724 entry.size = entry_end - entry.start; 725 726 __process_mem_region(&entry, minimum, image_size); 727 728 if (slot_area_index == MAX_SLOT_AREA) { 729 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); 730 return 1; 731 } 732 } 733 #endif 734 return 0; 735 } 736 737 #ifdef CONFIG_EFI 738 /* 739 * Returns true if mirror region found (and must have been processed 740 * for slots adding) 741 */ 742 static bool 743 process_efi_entries(unsigned long minimum, unsigned long image_size) 744 { 745 struct efi_info *e = &boot_params->efi_info; 746 bool efi_mirror_found = false; 747 struct mem_vector region; 748 efi_memory_desc_t *md; 749 unsigned long pmap; 750 char *signature; 751 u32 nr_desc; 752 int i; 753 754 signature = (char *)&e->efi_loader_signature; 755 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && 756 strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) 757 return false; 758 759 #ifdef CONFIG_X86_32 760 /* Can't handle data above 4GB at this time */ 761 if (e->efi_memmap_hi) { 762 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n"); 763 return false; 764 } 765 pmap = e->efi_memmap; 766 #else 767 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); 768 #endif 769 770 nr_desc = e->efi_memmap_size / e->efi_memdesc_size; 771 for (i = 0; i < nr_desc; i++) { 772 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 773 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { 774 efi_mirror_found = true; 775 break; 776 } 777 } 778 779 for (i = 0; i < nr_desc; i++) { 780 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 781 782 /* 783 * Here we are more conservative in picking free memory than 784 * the EFI spec allows: 785 * 786 * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also 787 * free memory and thus available to place the kernel image into, 788 * but in practice there's firmware where using that memory leads 789 * to crashes. 790 * 791 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. 792 */ 793 if (md->type != EFI_CONVENTIONAL_MEMORY) 794 continue; 795 796 if (efi_soft_reserve_enabled() && 797 (md->attribute & EFI_MEMORY_SP)) 798 continue; 799 800 if (efi_mirror_found && 801 !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) 802 continue; 803 804 region.start = md->phys_addr; 805 region.size = md->num_pages << EFI_PAGE_SHIFT; 806 if (process_mem_region(®ion, minimum, image_size)) 807 break; 808 } 809 return true; 810 } 811 #else 812 static inline bool 813 process_efi_entries(unsigned long minimum, unsigned long image_size) 814 { 815 return false; 816 } 817 #endif 818 819 static void process_e820_entries(unsigned long minimum, 820 unsigned long image_size) 821 { 822 int i; 823 struct mem_vector region; 824 struct boot_e820_entry *entry; 825 826 /* Verify potential e820 positions, appending to slots list. */ 827 for (i = 0; i < boot_params->e820_entries; i++) { 828 entry = &boot_params->e820_table[i]; 829 /* Skip non-RAM entries. */ 830 if (entry->type != E820_TYPE_RAM) 831 continue; 832 region.start = entry->addr; 833 region.size = entry->size; 834 if (process_mem_region(®ion, minimum, image_size)) 835 break; 836 } 837 } 838 839 static unsigned long find_random_phys_addr(unsigned long minimum, 840 unsigned long image_size) 841 { 842 /* Check if we had too many memmaps. */ 843 if (memmap_too_large) { 844 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); 845 return 0; 846 } 847 848 /* Make sure minimum is aligned. */ 849 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); 850 851 if (process_efi_entries(minimum, image_size)) 852 return slots_fetch_random(); 853 854 process_e820_entries(minimum, image_size); 855 return slots_fetch_random(); 856 } 857 858 static unsigned long find_random_virt_addr(unsigned long minimum, 859 unsigned long image_size) 860 { 861 unsigned long slots, random_addr; 862 863 /* Make sure minimum is aligned. */ 864 minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); 865 /* Align image_size for easy slot calculations. */ 866 image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); 867 868 /* 869 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots 870 * that can hold image_size within the range of minimum to 871 * KERNEL_IMAGE_SIZE? 872 */ 873 slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / 874 CONFIG_PHYSICAL_ALIGN + 1; 875 876 random_addr = kaslr_get_random_long("Virtual") % slots; 877 878 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; 879 } 880 881 /* 882 * Since this function examines addresses much more numerically, 883 * it takes the input and output pointers as 'unsigned long'. 884 */ 885 void choose_random_location(unsigned long input, 886 unsigned long input_size, 887 unsigned long *output, 888 unsigned long output_size, 889 unsigned long *virt_addr) 890 { 891 unsigned long random_addr, min_addr; 892 893 if (cmdline_find_option_bool("nokaslr")) { 894 warn("KASLR disabled: 'nokaslr' on cmdline."); 895 return; 896 } 897 898 #ifdef CONFIG_X86_5LEVEL 899 if (__read_cr4() & X86_CR4_LA57) { 900 __pgtable_l5_enabled = 1; 901 pgdir_shift = 48; 902 ptrs_per_p4d = 512; 903 } 904 #endif 905 906 boot_params->hdr.loadflags |= KASLR_FLAG; 907 908 /* Prepare to add new identity pagetables on demand. */ 909 initialize_identity_maps(); 910 911 /* Record the various known unsafe memory ranges. */ 912 mem_avoid_init(input, input_size, *output); 913 914 /* 915 * Low end of the randomization range should be the 916 * smaller of 512M or the initial kernel image 917 * location: 918 */ 919 min_addr = min(*output, 512UL << 20); 920 921 /* Walk available memory entries to find a random address. */ 922 random_addr = find_random_phys_addr(min_addr, output_size); 923 if (!random_addr) { 924 warn("Physical KASLR disabled: no suitable memory region!"); 925 } else { 926 /* Update the new physical address location. */ 927 if (*output != random_addr) { 928 add_identity_map(random_addr, output_size); 929 *output = random_addr; 930 } 931 932 /* 933 * This loads the identity mapping page table. 934 * This should only be done if a new physical address 935 * is found for the kernel, otherwise we should keep 936 * the old page table to make it be like the "nokaslr" 937 * case. 938 */ 939 finalize_identity_maps(); 940 } 941 942 943 /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ 944 if (IS_ENABLED(CONFIG_X86_64)) 945 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); 946 *virt_addr = random_addr; 947 } 948