xref: /openbmc/linux/arch/x86/boot/compressed/kaslr.c (revision b24413180f5600bcb3bb70fbed5cf186b60864bd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * kaslr.c
4  *
5  * This contains the routines needed to generate a reasonable level of
6  * entropy to choose a randomized kernel base address offset in support
7  * of Kernel Address Space Layout Randomization (KASLR). Additionally
8  * handles walking the physical memory maps (and tracking memory regions
9  * to avoid) in order to select a physical memory location that can
10  * contain the entire properly aligned running kernel image.
11  *
12  */
13 
14 /*
15  * isspace() in linux/ctype.h is expected by next_args() to filter
16  * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
17  * since isdigit() is implemented in both of them. Hence disable it
18  * here.
19  */
20 #define BOOT_CTYPE_H
21 
22 /*
23  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
24  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
25  * which is meaningless and will cause compiling error in some cases.
26  * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
27  * as empty.
28  */
29 #define _LINUX_EXPORT_H
30 #define EXPORT_SYMBOL(sym)
31 
32 #include "misc.h"
33 #include "error.h"
34 #include "../string.h"
35 
36 #include <generated/compile.h>
37 #include <linux/module.h>
38 #include <linux/uts.h>
39 #include <linux/utsname.h>
40 #include <linux/ctype.h>
41 #include <linux/efi.h>
42 #include <generated/utsrelease.h>
43 #include <asm/efi.h>
44 
45 /* Macros used by the included decompressor code below. */
46 #define STATIC
47 #include <linux/decompress/mm.h>
48 
49 extern unsigned long get_cmd_line_ptr(void);
50 
51 /* Simplified build-specific string for starting entropy. */
52 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
53 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
54 
55 static unsigned long rotate_xor(unsigned long hash, const void *area,
56 				size_t size)
57 {
58 	size_t i;
59 	unsigned long *ptr = (unsigned long *)area;
60 
61 	for (i = 0; i < size / sizeof(hash); i++) {
62 		/* Rotate by odd number of bits and XOR. */
63 		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
64 		hash ^= ptr[i];
65 	}
66 
67 	return hash;
68 }
69 
70 /* Attempt to create a simple but unpredictable starting entropy. */
71 static unsigned long get_boot_seed(void)
72 {
73 	unsigned long hash = 0;
74 
75 	hash = rotate_xor(hash, build_str, sizeof(build_str));
76 	hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
77 
78 	return hash;
79 }
80 
81 #define KASLR_COMPRESSED_BOOT
82 #include "../../lib/kaslr.c"
83 
84 struct mem_vector {
85 	unsigned long long start;
86 	unsigned long long size;
87 };
88 
89 /* Only supporting at most 4 unusable memmap regions with kaslr */
90 #define MAX_MEMMAP_REGIONS	4
91 
92 static bool memmap_too_large;
93 
94 
95 /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
96 unsigned long long mem_limit = ULLONG_MAX;
97 
98 
99 enum mem_avoid_index {
100 	MEM_AVOID_ZO_RANGE = 0,
101 	MEM_AVOID_INITRD,
102 	MEM_AVOID_CMDLINE,
103 	MEM_AVOID_BOOTPARAMS,
104 	MEM_AVOID_MEMMAP_BEGIN,
105 	MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
106 	MEM_AVOID_MAX,
107 };
108 
109 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
110 
111 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
112 {
113 	/* Item one is entirely before item two. */
114 	if (one->start + one->size <= two->start)
115 		return false;
116 	/* Item one is entirely after item two. */
117 	if (one->start >= two->start + two->size)
118 		return false;
119 	return true;
120 }
121 
122 char *skip_spaces(const char *str)
123 {
124 	while (isspace(*str))
125 		++str;
126 	return (char *)str;
127 }
128 #include "../../../../lib/ctype.c"
129 #include "../../../../lib/cmdline.c"
130 
131 static int
132 parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
133 {
134 	char *oldp;
135 
136 	if (!p)
137 		return -EINVAL;
138 
139 	/* We don't care about this option here */
140 	if (!strncmp(p, "exactmap", 8))
141 		return -EINVAL;
142 
143 	oldp = p;
144 	*size = memparse(p, &p);
145 	if (p == oldp)
146 		return -EINVAL;
147 
148 	switch (*p) {
149 	case '#':
150 	case '$':
151 	case '!':
152 		*start = memparse(p + 1, &p);
153 		return 0;
154 	case '@':
155 		/* memmap=nn@ss specifies usable region, should be skipped */
156 		*size = 0;
157 		/* Fall through */
158 	default:
159 		/*
160 		 * If w/o offset, only size specified, memmap=nn[KMG] has the
161 		 * same behaviour as mem=nn[KMG]. It limits the max address
162 		 * system can use. Region above the limit should be avoided.
163 		 */
164 		*start = 0;
165 		return 0;
166 	}
167 
168 	return -EINVAL;
169 }
170 
171 static void mem_avoid_memmap(char *str)
172 {
173 	static int i;
174 	int rc;
175 
176 	if (i >= MAX_MEMMAP_REGIONS)
177 		return;
178 
179 	while (str && (i < MAX_MEMMAP_REGIONS)) {
180 		int rc;
181 		unsigned long long start, size;
182 		char *k = strchr(str, ',');
183 
184 		if (k)
185 			*k++ = 0;
186 
187 		rc = parse_memmap(str, &start, &size);
188 		if (rc < 0)
189 			break;
190 		str = k;
191 
192 		if (start == 0) {
193 			/* Store the specified memory limit if size > 0 */
194 			if (size > 0)
195 				mem_limit = size;
196 
197 			continue;
198 		}
199 
200 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
201 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
202 		i++;
203 	}
204 
205 	/* More than 4 memmaps, fail kaslr */
206 	if ((i >= MAX_MEMMAP_REGIONS) && str)
207 		memmap_too_large = true;
208 }
209 
210 static int handle_mem_memmap(void)
211 {
212 	char *args = (char *)get_cmd_line_ptr();
213 	size_t len = strlen((char *)args);
214 	char *tmp_cmdline;
215 	char *param, *val;
216 	u64 mem_size;
217 
218 	if (!strstr(args, "memmap=") && !strstr(args, "mem="))
219 		return 0;
220 
221 	tmp_cmdline = malloc(len + 1);
222 	if (!tmp_cmdline )
223 		error("Failed to allocate space for tmp_cmdline");
224 
225 	memcpy(tmp_cmdline, args, len);
226 	tmp_cmdline[len] = 0;
227 	args = tmp_cmdline;
228 
229 	/* Chew leading spaces */
230 	args = skip_spaces(args);
231 
232 	while (*args) {
233 		args = next_arg(args, &param, &val);
234 		/* Stop at -- */
235 		if (!val && strcmp(param, "--") == 0) {
236 			warn("Only '--' specified in cmdline");
237 			free(tmp_cmdline);
238 			return -1;
239 		}
240 
241 		if (!strcmp(param, "memmap")) {
242 			mem_avoid_memmap(val);
243 		} else if (!strcmp(param, "mem")) {
244 			char *p = val;
245 
246 			if (!strcmp(p, "nopentium"))
247 				continue;
248 			mem_size = memparse(p, &p);
249 			if (mem_size == 0) {
250 				free(tmp_cmdline);
251 				return -EINVAL;
252 			}
253 			mem_limit = mem_size;
254 		}
255 	}
256 
257 	free(tmp_cmdline);
258 	return 0;
259 }
260 
261 /*
262  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
263  * The mem_avoid array is used to store the ranges that need to be avoided
264  * when KASLR searches for an appropriate random address. We must avoid any
265  * regions that are unsafe to overlap with during decompression, and other
266  * things like the initrd, cmdline and boot_params. This comment seeks to
267  * explain mem_avoid as clearly as possible since incorrect mem_avoid
268  * memory ranges lead to really hard to debug boot failures.
269  *
270  * The initrd, cmdline, and boot_params are trivial to identify for
271  * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
272  * MEM_AVOID_BOOTPARAMS respectively below.
273  *
274  * What is not obvious how to avoid is the range of memory that is used
275  * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
276  * the compressed kernel (ZO) and its run space, which is used to extract
277  * the uncompressed kernel (VO) and relocs.
278  *
279  * ZO's full run size sits against the end of the decompression buffer, so
280  * we can calculate where text, data, bss, etc of ZO are positioned more
281  * easily.
282  *
283  * For additional background, the decompression calculations can be found
284  * in header.S, and the memory diagram is based on the one found in misc.c.
285  *
286  * The following conditions are already enforced by the image layouts and
287  * associated code:
288  *  - input + input_size >= output + output_size
289  *  - kernel_total_size <= init_size
290  *  - kernel_total_size <= output_size (see Note below)
291  *  - output + init_size >= output + output_size
292  *
293  * (Note that kernel_total_size and output_size have no fundamental
294  * relationship, but output_size is passed to choose_random_location
295  * as a maximum of the two. The diagram is showing a case where
296  * kernel_total_size is larger than output_size, but this case is
297  * handled by bumping output_size.)
298  *
299  * The above conditions can be illustrated by a diagram:
300  *
301  * 0   output            input            input+input_size    output+init_size
302  * |     |                 |                             |             |
303  * |     |                 |                             |             |
304  * |-----|--------|--------|--------------|-----------|--|-------------|
305  *                |                       |           |
306  *                |                       |           |
307  * output+init_size-ZO_INIT_SIZE  output+output_size  output+kernel_total_size
308  *
309  * [output, output+init_size) is the entire memory range used for
310  * extracting the compressed image.
311  *
312  * [output, output+kernel_total_size) is the range needed for the
313  * uncompressed kernel (VO) and its run size (bss, brk, etc).
314  *
315  * [output, output+output_size) is VO plus relocs (i.e. the entire
316  * uncompressed payload contained by ZO). This is the area of the buffer
317  * written to during decompression.
318  *
319  * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
320  * range of the copied ZO and decompression code. (i.e. the range
321  * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
322  *
323  * [input, input+input_size) is the original copied compressed image (ZO)
324  * (i.e. it does not include its run size). This range must be avoided
325  * because it contains the data used for decompression.
326  *
327  * [input+input_size, output+init_size) is [_text, _end) for ZO. This
328  * range includes ZO's heap and stack, and must be avoided since it
329  * performs the decompression.
330  *
331  * Since the above two ranges need to be avoided and they are adjacent,
332  * they can be merged, resulting in: [input, output+init_size) which
333  * becomes the MEM_AVOID_ZO_RANGE below.
334  */
335 static void mem_avoid_init(unsigned long input, unsigned long input_size,
336 			   unsigned long output)
337 {
338 	unsigned long init_size = boot_params->hdr.init_size;
339 	u64 initrd_start, initrd_size;
340 	u64 cmd_line, cmd_line_size;
341 	char *ptr;
342 
343 	/*
344 	 * Avoid the region that is unsafe to overlap during
345 	 * decompression.
346 	 */
347 	mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
348 	mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
349 	add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
350 			 mem_avoid[MEM_AVOID_ZO_RANGE].size);
351 
352 	/* Avoid initrd. */
353 	initrd_start  = (u64)boot_params->ext_ramdisk_image << 32;
354 	initrd_start |= boot_params->hdr.ramdisk_image;
355 	initrd_size  = (u64)boot_params->ext_ramdisk_size << 32;
356 	initrd_size |= boot_params->hdr.ramdisk_size;
357 	mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
358 	mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
359 	/* No need to set mapping for initrd, it will be handled in VO. */
360 
361 	/* Avoid kernel command line. */
362 	cmd_line  = (u64)boot_params->ext_cmd_line_ptr << 32;
363 	cmd_line |= boot_params->hdr.cmd_line_ptr;
364 	/* Calculate size of cmd_line. */
365 	ptr = (char *)(unsigned long)cmd_line;
366 	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
367 		;
368 	mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
369 	mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
370 	add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
371 			 mem_avoid[MEM_AVOID_CMDLINE].size);
372 
373 	/* Avoid boot parameters. */
374 	mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
375 	mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
376 	add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
377 			 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
378 
379 	/* We don't need to set a mapping for setup_data. */
380 
381 	/* Mark the memmap regions we need to avoid */
382 	handle_mem_memmap();
383 
384 #ifdef CONFIG_X86_VERBOSE_BOOTUP
385 	/* Make sure video RAM can be used. */
386 	add_identity_map(0, PMD_SIZE);
387 #endif
388 }
389 
390 /*
391  * Does this memory vector overlap a known avoided area? If so, record the
392  * overlap region with the lowest address.
393  */
394 static bool mem_avoid_overlap(struct mem_vector *img,
395 			      struct mem_vector *overlap)
396 {
397 	int i;
398 	struct setup_data *ptr;
399 	unsigned long earliest = img->start + img->size;
400 	bool is_overlapping = false;
401 
402 	for (i = 0; i < MEM_AVOID_MAX; i++) {
403 		if (mem_overlaps(img, &mem_avoid[i]) &&
404 		    mem_avoid[i].start < earliest) {
405 			*overlap = mem_avoid[i];
406 			earliest = overlap->start;
407 			is_overlapping = true;
408 		}
409 	}
410 
411 	/* Avoid all entries in the setup_data linked list. */
412 	ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
413 	while (ptr) {
414 		struct mem_vector avoid;
415 
416 		avoid.start = (unsigned long)ptr;
417 		avoid.size = sizeof(*ptr) + ptr->len;
418 
419 		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
420 			*overlap = avoid;
421 			earliest = overlap->start;
422 			is_overlapping = true;
423 		}
424 
425 		ptr = (struct setup_data *)(unsigned long)ptr->next;
426 	}
427 
428 	return is_overlapping;
429 }
430 
431 struct slot_area {
432 	unsigned long addr;
433 	int num;
434 };
435 
436 #define MAX_SLOT_AREA 100
437 
438 static struct slot_area slot_areas[MAX_SLOT_AREA];
439 
440 static unsigned long slot_max;
441 
442 static unsigned long slot_area_index;
443 
444 static void store_slot_info(struct mem_vector *region, unsigned long image_size)
445 {
446 	struct slot_area slot_area;
447 
448 	if (slot_area_index == MAX_SLOT_AREA)
449 		return;
450 
451 	slot_area.addr = region->start;
452 	slot_area.num = (region->size - image_size) /
453 			CONFIG_PHYSICAL_ALIGN + 1;
454 
455 	if (slot_area.num > 0) {
456 		slot_areas[slot_area_index++] = slot_area;
457 		slot_max += slot_area.num;
458 	}
459 }
460 
461 static unsigned long slots_fetch_random(void)
462 {
463 	unsigned long slot;
464 	int i;
465 
466 	/* Handle case of no slots stored. */
467 	if (slot_max == 0)
468 		return 0;
469 
470 	slot = kaslr_get_random_long("Physical") % slot_max;
471 
472 	for (i = 0; i < slot_area_index; i++) {
473 		if (slot >= slot_areas[i].num) {
474 			slot -= slot_areas[i].num;
475 			continue;
476 		}
477 		return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
478 	}
479 
480 	if (i == slot_area_index)
481 		debug_putstr("slots_fetch_random() failed!?\n");
482 	return 0;
483 }
484 
485 static void process_mem_region(struct mem_vector *entry,
486 			       unsigned long minimum,
487 			       unsigned long image_size)
488 {
489 	struct mem_vector region, overlap;
490 	struct slot_area slot_area;
491 	unsigned long start_orig, end;
492 	struct mem_vector cur_entry;
493 
494 	/* On 32-bit, ignore entries entirely above our maximum. */
495 	if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
496 		return;
497 
498 	/* Ignore entries entirely below our minimum. */
499 	if (entry->start + entry->size < minimum)
500 		return;
501 
502 	/* Ignore entries above memory limit */
503 	end = min(entry->size + entry->start, mem_limit);
504 	if (entry->start >= end)
505 		return;
506 	cur_entry.start = entry->start;
507 	cur_entry.size = end - entry->start;
508 
509 	region.start = cur_entry.start;
510 	region.size = cur_entry.size;
511 
512 	/* Give up if slot area array is full. */
513 	while (slot_area_index < MAX_SLOT_AREA) {
514 		start_orig = region.start;
515 
516 		/* Potentially raise address to minimum location. */
517 		if (region.start < minimum)
518 			region.start = minimum;
519 
520 		/* Potentially raise address to meet alignment needs. */
521 		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
522 
523 		/* Did we raise the address above the passed in memory entry? */
524 		if (region.start > cur_entry.start + cur_entry.size)
525 			return;
526 
527 		/* Reduce size by any delta from the original address. */
528 		region.size -= region.start - start_orig;
529 
530 		/* On 32-bit, reduce region size to fit within max size. */
531 		if (IS_ENABLED(CONFIG_X86_32) &&
532 		    region.start + region.size > KERNEL_IMAGE_SIZE)
533 			region.size = KERNEL_IMAGE_SIZE - region.start;
534 
535 		/* Return if region can't contain decompressed kernel */
536 		if (region.size < image_size)
537 			return;
538 
539 		/* If nothing overlaps, store the region and return. */
540 		if (!mem_avoid_overlap(&region, &overlap)) {
541 			store_slot_info(&region, image_size);
542 			return;
543 		}
544 
545 		/* Store beginning of region if holds at least image_size. */
546 		if (overlap.start > region.start + image_size) {
547 			struct mem_vector beginning;
548 
549 			beginning.start = region.start;
550 			beginning.size = overlap.start - region.start;
551 			store_slot_info(&beginning, image_size);
552 		}
553 
554 		/* Return if overlap extends to or past end of region. */
555 		if (overlap.start + overlap.size >= region.start + region.size)
556 			return;
557 
558 		/* Clip off the overlapping region and start over. */
559 		region.size -= overlap.start - region.start + overlap.size;
560 		region.start = overlap.start + overlap.size;
561 	}
562 }
563 
564 #ifdef CONFIG_EFI
565 /*
566  * Returns true if mirror region found (and must have been processed
567  * for slots adding)
568  */
569 static bool
570 process_efi_entries(unsigned long minimum, unsigned long image_size)
571 {
572 	struct efi_info *e = &boot_params->efi_info;
573 	bool efi_mirror_found = false;
574 	struct mem_vector region;
575 	efi_memory_desc_t *md;
576 	unsigned long pmap;
577 	char *signature;
578 	u32 nr_desc;
579 	int i;
580 
581 	signature = (char *)&e->efi_loader_signature;
582 	if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
583 	    strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
584 		return false;
585 
586 #ifdef CONFIG_X86_32
587 	/* Can't handle data above 4GB at this time */
588 	if (e->efi_memmap_hi) {
589 		warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
590 		return false;
591 	}
592 	pmap =  e->efi_memmap;
593 #else
594 	pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
595 #endif
596 
597 	nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
598 	for (i = 0; i < nr_desc; i++) {
599 		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
600 		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
601 			efi_mirror_found = true;
602 			break;
603 		}
604 	}
605 
606 	for (i = 0; i < nr_desc; i++) {
607 		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
608 
609 		/*
610 		 * Here we are more conservative in picking free memory than
611 		 * the EFI spec allows:
612 		 *
613 		 * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
614 		 * free memory and thus available to place the kernel image into,
615 		 * but in practice there's firmware where using that memory leads
616 		 * to crashes.
617 		 *
618 		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
619 		 */
620 		if (md->type != EFI_CONVENTIONAL_MEMORY)
621 			continue;
622 
623 		if (efi_mirror_found &&
624 		    !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
625 			continue;
626 
627 		region.start = md->phys_addr;
628 		region.size = md->num_pages << EFI_PAGE_SHIFT;
629 		process_mem_region(&region, minimum, image_size);
630 		if (slot_area_index == MAX_SLOT_AREA) {
631 			debug_putstr("Aborted EFI scan (slot_areas full)!\n");
632 			break;
633 		}
634 	}
635 	return true;
636 }
637 #else
638 static inline bool
639 process_efi_entries(unsigned long minimum, unsigned long image_size)
640 {
641 	return false;
642 }
643 #endif
644 
645 static void process_e820_entries(unsigned long minimum,
646 				 unsigned long image_size)
647 {
648 	int i;
649 	struct mem_vector region;
650 	struct boot_e820_entry *entry;
651 
652 	/* Verify potential e820 positions, appending to slots list. */
653 	for (i = 0; i < boot_params->e820_entries; i++) {
654 		entry = &boot_params->e820_table[i];
655 		/* Skip non-RAM entries. */
656 		if (entry->type != E820_TYPE_RAM)
657 			continue;
658 		region.start = entry->addr;
659 		region.size = entry->size;
660 		process_mem_region(&region, minimum, image_size);
661 		if (slot_area_index == MAX_SLOT_AREA) {
662 			debug_putstr("Aborted e820 scan (slot_areas full)!\n");
663 			break;
664 		}
665 	}
666 }
667 
668 static unsigned long find_random_phys_addr(unsigned long minimum,
669 					   unsigned long image_size)
670 {
671 	/* Check if we had too many memmaps. */
672 	if (memmap_too_large) {
673 		debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
674 		return 0;
675 	}
676 
677 	/* Make sure minimum is aligned. */
678 	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
679 
680 	if (process_efi_entries(minimum, image_size))
681 		return slots_fetch_random();
682 
683 	process_e820_entries(minimum, image_size);
684 	return slots_fetch_random();
685 }
686 
687 static unsigned long find_random_virt_addr(unsigned long minimum,
688 					   unsigned long image_size)
689 {
690 	unsigned long slots, random_addr;
691 
692 	/* Make sure minimum is aligned. */
693 	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
694 	/* Align image_size for easy slot calculations. */
695 	image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
696 
697 	/*
698 	 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
699 	 * that can hold image_size within the range of minimum to
700 	 * KERNEL_IMAGE_SIZE?
701 	 */
702 	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
703 		 CONFIG_PHYSICAL_ALIGN + 1;
704 
705 	random_addr = kaslr_get_random_long("Virtual") % slots;
706 
707 	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
708 }
709 
710 /*
711  * Since this function examines addresses much more numerically,
712  * it takes the input and output pointers as 'unsigned long'.
713  */
714 void choose_random_location(unsigned long input,
715 			    unsigned long input_size,
716 			    unsigned long *output,
717 			    unsigned long output_size,
718 			    unsigned long *virt_addr)
719 {
720 	unsigned long random_addr, min_addr;
721 
722 	if (cmdline_find_option_bool("nokaslr")) {
723 		warn("KASLR disabled: 'nokaslr' on cmdline.");
724 		return;
725 	}
726 
727 	boot_params->hdr.loadflags |= KASLR_FLAG;
728 
729 	/* Prepare to add new identity pagetables on demand. */
730 	initialize_identity_maps();
731 
732 	/* Record the various known unsafe memory ranges. */
733 	mem_avoid_init(input, input_size, *output);
734 
735 	/*
736 	 * Low end of the randomization range should be the
737 	 * smaller of 512M or the initial kernel image
738 	 * location:
739 	 */
740 	min_addr = min(*output, 512UL << 20);
741 
742 	/* Walk available memory entries to find a random address. */
743 	random_addr = find_random_phys_addr(min_addr, output_size);
744 	if (!random_addr) {
745 		warn("Physical KASLR disabled: no suitable memory region!");
746 	} else {
747 		/* Update the new physical address location. */
748 		if (*output != random_addr) {
749 			add_identity_map(random_addr, output_size);
750 			*output = random_addr;
751 		}
752 
753 		/*
754 		 * This loads the identity mapping page table.
755 		 * This should only be done if a new physical address
756 		 * is found for the kernel, otherwise we should keep
757 		 * the old page table to make it be like the "nokaslr"
758 		 * case.
759 		 */
760 		finalize_identity_maps();
761 	}
762 
763 
764 	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
765 	if (IS_ENABLED(CONFIG_X86_64))
766 		random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
767 	*virt_addr = random_addr;
768 }
769