xref: /openbmc/linux/arch/x86/boot/compressed/kaslr.c (revision 8ebc80a25f9d9bf7a8e368b266d5b740c485c362)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
27de828dfSKees Cook /*
37de828dfSKees Cook  * kaslr.c
47de828dfSKees Cook  *
57de828dfSKees Cook  * This contains the routines needed to generate a reasonable level of
67de828dfSKees Cook  * entropy to choose a randomized kernel base address offset in support
77de828dfSKees Cook  * of Kernel Address Space Layout Randomization (KASLR). Additionally
87de828dfSKees Cook  * handles walking the physical memory maps (and tracking memory regions
97de828dfSKees Cook  * to avoid) in order to select a physical memory location that can
107de828dfSKees Cook  * contain the entire properly aligned running kernel image.
117de828dfSKees Cook  *
127de828dfSKees Cook  */
13d52e7d5aSBaoquan He 
14d52e7d5aSBaoquan He /*
15d52e7d5aSBaoquan He  * isspace() in linux/ctype.h is expected by next_args() to filter
16d52e7d5aSBaoquan He  * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
17d52e7d5aSBaoquan He  * since isdigit() is implemented in both of them. Hence disable it
18d52e7d5aSBaoquan He  * here.
19d52e7d5aSBaoquan He  */
20d52e7d5aSBaoquan He #define BOOT_CTYPE_H
21d52e7d5aSBaoquan He 
229b238748SKees Cook #include "misc.h"
23dc425a6eSKees Cook #include "error.h"
245b8b9cf7SArnd Bergmann #include "../string.h"
255dc91f2dSBorislav Petkov #include "efi.h"
269b238748SKees Cook 
279b238748SKees Cook #include <generated/compile.h>
289b238748SKees Cook #include <linux/module.h>
299b238748SKees Cook #include <linux/uts.h>
309b238748SKees Cook #include <linux/utsname.h>
31d52e7d5aSBaoquan He #include <linux/ctype.h>
322df8220cSMasahiro Yamada #include <generated/utsversion.h>
339b238748SKees Cook #include <generated/utsrelease.h>
349b238748SKees Cook 
3576167e5cSArvind Sankar #define _SETUP
3676167e5cSArvind Sankar #include <asm/setup.h>	/* For COMMAND_LINE_SIZE */
3776167e5cSArvind Sankar #undef _SETUP
3876167e5cSArvind Sankar 
39d52e7d5aSBaoquan He extern unsigned long get_cmd_line_ptr(void);
40d52e7d5aSBaoquan He 
419b238748SKees Cook /* Simplified build-specific string for starting entropy. */
429b238748SKees Cook static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
439b238748SKees Cook 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
449b238748SKees Cook 
rotate_xor(unsigned long hash,const void * area,size_t size)459b238748SKees Cook static unsigned long rotate_xor(unsigned long hash, const void *area,
469b238748SKees Cook 				size_t size)
479b238748SKees Cook {
489b238748SKees Cook 	size_t i;
499b238748SKees Cook 	unsigned long *ptr = (unsigned long *)area;
509b238748SKees Cook 
519b238748SKees Cook 	for (i = 0; i < size / sizeof(hash); i++) {
529b238748SKees Cook 		/* Rotate by odd number of bits and XOR. */
539b238748SKees Cook 		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
549b238748SKees Cook 		hash ^= ptr[i];
559b238748SKees Cook 	}
569b238748SKees Cook 
579b238748SKees Cook 	return hash;
589b238748SKees Cook }
599b238748SKees Cook 
609b238748SKees Cook /* Attempt to create a simple but unpredictable starting entropy. */
get_boot_seed(void)61d899a7d1SThomas Garnier static unsigned long get_boot_seed(void)
629b238748SKees Cook {
639b238748SKees Cook 	unsigned long hash = 0;
649b238748SKees Cook 
659b238748SKees Cook 	hash = rotate_xor(hash, build_str, sizeof(build_str));
66*c59843e8SArd Biesheuvel 	hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr));
679b238748SKees Cook 
689b238748SKees Cook 	return hash;
699b238748SKees Cook }
709b238748SKees Cook 
71d899a7d1SThomas Garnier #define KASLR_COMPRESSED_BOOT
72d899a7d1SThomas Garnier #include "../../lib/kaslr.c"
739b238748SKees Cook 
749b238748SKees Cook 
75f2844249SDave Jiang /* Only supporting at most 4 unusable memmap regions with kaslr */
76f2844249SDave Jiang #define MAX_MEMMAP_REGIONS	4
77f2844249SDave Jiang 
78f2844249SDave Jiang static bool memmap_too_large;
79f2844249SDave Jiang 
80d52e7d5aSBaoquan He 
8145128694SArvind Sankar /*
8245128694SArvind Sankar  * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
8345128694SArvind Sankar  * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
8445128694SArvind Sankar  */
853a066990SArvind Sankar static u64 mem_limit;
864cdba14fSBaoquan He 
87690eaa53SChao Fan /* Number of immovable memory regions */
88690eaa53SChao Fan static int num_immovable_mem;
894cdba14fSBaoquan He 
90ed09acdeSKees Cook enum mem_avoid_index {
91ed09acdeSKees Cook 	MEM_AVOID_ZO_RANGE = 0,
92ed09acdeSKees Cook 	MEM_AVOID_INITRD,
93ed09acdeSKees Cook 	MEM_AVOID_CMDLINE,
94ed09acdeSKees Cook 	MEM_AVOID_BOOTPARAMS,
95f2844249SDave Jiang 	MEM_AVOID_MEMMAP_BEGIN,
96f2844249SDave Jiang 	MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
97ed09acdeSKees Cook 	MEM_AVOID_MAX,
98ed09acdeSKees Cook };
99ed09acdeSKees Cook 
1009b238748SKees Cook static struct mem_vector mem_avoid[MEM_AVOID_MAX];
1019b238748SKees Cook 
mem_overlaps(struct mem_vector * one,struct mem_vector * two)1029b238748SKees Cook static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
1039b238748SKees Cook {
1049b238748SKees Cook 	/* Item one is entirely before item two. */
1059b238748SKees Cook 	if (one->start + one->size <= two->start)
1069b238748SKees Cook 		return false;
1079b238748SKees Cook 	/* Item one is entirely after item two. */
1089b238748SKees Cook 	if (one->start >= two->start + two->size)
1099b238748SKees Cook 		return false;
1109b238748SKees Cook 	return true;
1119b238748SKees Cook }
1129b238748SKees Cook 
skip_spaces(const char * str)113d52e7d5aSBaoquan He char *skip_spaces(const char *str)
114f2844249SDave Jiang {
115d52e7d5aSBaoquan He 	while (isspace(*str))
116d52e7d5aSBaoquan He 		++str;
117d52e7d5aSBaoquan He 	return (char *)str;
118f2844249SDave Jiang }
119d52e7d5aSBaoquan He #include "../../../../lib/ctype.c"
120d52e7d5aSBaoquan He #include "../../../../lib/cmdline.c"
121f2844249SDave Jiang 
122199c8471SDan Williams enum parse_mode {
123199c8471SDan Williams 	PARSE_MEMMAP,
124199c8471SDan Williams 	PARSE_EFI,
125199c8471SDan Williams };
126199c8471SDan Williams 
127f2844249SDave Jiang static int
parse_memmap(char * p,u64 * start,u64 * size,enum parse_mode mode)1283a066990SArvind Sankar parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
129f2844249SDave Jiang {
130f2844249SDave Jiang 	char *oldp;
131f2844249SDave Jiang 
132f2844249SDave Jiang 	if (!p)
133f2844249SDave Jiang 		return -EINVAL;
134f2844249SDave Jiang 
135f2844249SDave Jiang 	/* We don't care about this option here */
136f2844249SDave Jiang 	if (!strncmp(p, "exactmap", 8))
137f2844249SDave Jiang 		return -EINVAL;
138f2844249SDave Jiang 
139f2844249SDave Jiang 	oldp = p;
140d52e7d5aSBaoquan He 	*size = memparse(p, &p);
141f2844249SDave Jiang 	if (p == oldp)
142f2844249SDave Jiang 		return -EINVAL;
143f2844249SDave Jiang 
144f2844249SDave Jiang 	switch (*p) {
145f2844249SDave Jiang 	case '#':
146f2844249SDave Jiang 	case '$':
147f2844249SDave Jiang 	case '!':
148d52e7d5aSBaoquan He 		*start = memparse(p + 1, &p);
149f2844249SDave Jiang 		return 0;
1504cdba14fSBaoquan He 	case '@':
151199c8471SDan Williams 		if (mode == PARSE_MEMMAP) {
152199c8471SDan Williams 			/*
153199c8471SDan Williams 			 * memmap=nn@ss specifies usable region, should
154199c8471SDan Williams 			 * be skipped
155199c8471SDan Williams 			 */
1564cdba14fSBaoquan He 			*size = 0;
157199c8471SDan Williams 		} else {
1583a066990SArvind Sankar 			u64 flags;
159199c8471SDan Williams 
160199c8471SDan Williams 			/*
161199c8471SDan Williams 			 * efi_fake_mem=nn@ss:attr the attr specifies
162199c8471SDan Williams 			 * flags that might imply a soft-reservation.
163199c8471SDan Williams 			 */
164199c8471SDan Williams 			*start = memparse(p + 1, &p);
165199c8471SDan Williams 			if (p && *p == ':') {
166199c8471SDan Williams 				p++;
167199c8471SDan Williams 				if (kstrtoull(p, 0, &flags) < 0)
168199c8471SDan Williams 					*size = 0;
169199c8471SDan Williams 				else if (flags & EFI_MEMORY_SP)
170199c8471SDan Williams 					return 0;
171199c8471SDan Williams 			}
172199c8471SDan Williams 			*size = 0;
173199c8471SDan Williams 		}
174df561f66SGustavo A. R. Silva 		fallthrough;
1754cdba14fSBaoquan He 	default:
1764cdba14fSBaoquan He 		/*
1774cdba14fSBaoquan He 		 * If w/o offset, only size specified, memmap=nn[KMG] has the
1784cdba14fSBaoquan He 		 * same behaviour as mem=nn[KMG]. It limits the max address
1794cdba14fSBaoquan He 		 * system can use. Region above the limit should be avoided.
1804cdba14fSBaoquan He 		 */
1814cdba14fSBaoquan He 		*start = 0;
182f2844249SDave Jiang 		return 0;
183f2844249SDave Jiang 	}
184f2844249SDave Jiang 
185f2844249SDave Jiang 	return -EINVAL;
186f2844249SDave Jiang }
187f2844249SDave Jiang 
mem_avoid_memmap(enum parse_mode mode,char * str)188199c8471SDan Williams static void mem_avoid_memmap(enum parse_mode mode, char *str)
189f2844249SDave Jiang {
190d52e7d5aSBaoquan He 	static int i;
191f2844249SDave Jiang 
192d52e7d5aSBaoquan He 	if (i >= MAX_MEMMAP_REGIONS)
193f2844249SDave Jiang 		return;
194f2844249SDave Jiang 
195f2844249SDave Jiang 	while (str && (i < MAX_MEMMAP_REGIONS)) {
196f2844249SDave Jiang 		int rc;
1973a066990SArvind Sankar 		u64 start, size;
198f2844249SDave Jiang 		char *k = strchr(str, ',');
199f2844249SDave Jiang 
200f2844249SDave Jiang 		if (k)
201f2844249SDave Jiang 			*k++ = 0;
202f2844249SDave Jiang 
203199c8471SDan Williams 		rc = parse_memmap(str, &start, &size, mode);
204f2844249SDave Jiang 		if (rc < 0)
205f2844249SDave Jiang 			break;
206f2844249SDave Jiang 		str = k;
2074cdba14fSBaoquan He 
2084cdba14fSBaoquan He 		if (start == 0) {
2094cdba14fSBaoquan He 			/* Store the specified memory limit if size > 0 */
21045128694SArvind Sankar 			if (size > 0 && size < mem_limit)
2114cdba14fSBaoquan He 				mem_limit = size;
2124cdba14fSBaoquan He 
213f2844249SDave Jiang 			continue;
2144cdba14fSBaoquan He 		}
215f2844249SDave Jiang 
216f2844249SDave Jiang 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
217f2844249SDave Jiang 		mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
218f2844249SDave Jiang 		i++;
219f2844249SDave Jiang 	}
220f2844249SDave Jiang 
221f2844249SDave Jiang 	/* More than 4 memmaps, fail kaslr */
222f2844249SDave Jiang 	if ((i >= MAX_MEMMAP_REGIONS) && str)
223f2844249SDave Jiang 		memmap_too_large = true;
224f2844249SDave Jiang }
225f2844249SDave Jiang 
2269b912485SBaoquan He /* Store the number of 1GB huge pages which users specified: */
2279b912485SBaoquan He static unsigned long max_gb_huge_pages;
2289b912485SBaoquan He 
parse_gb_huge_pages(char * param,char * val)2299b912485SBaoquan He static void parse_gb_huge_pages(char *param, char *val)
2309b912485SBaoquan He {
2319b912485SBaoquan He 	static bool gbpage_sz;
2329b912485SBaoquan He 	char *p;
2339b912485SBaoquan He 
2349b912485SBaoquan He 	if (!strcmp(param, "hugepagesz")) {
2359b912485SBaoquan He 		p = val;
2369b912485SBaoquan He 		if (memparse(p, &p) != PUD_SIZE) {
2379b912485SBaoquan He 			gbpage_sz = false;
2389b912485SBaoquan He 			return;
2399b912485SBaoquan He 		}
2409b912485SBaoquan He 
2419b912485SBaoquan He 		if (gbpage_sz)
2429b912485SBaoquan He 			warn("Repeatedly set hugeTLB page size of 1G!\n");
2439b912485SBaoquan He 		gbpage_sz = true;
2449b912485SBaoquan He 		return;
2459b912485SBaoquan He 	}
2469b912485SBaoquan He 
2479b912485SBaoquan He 	if (!strcmp(param, "hugepages") && gbpage_sz) {
2489b912485SBaoquan He 		p = val;
2499b912485SBaoquan He 		max_gb_huge_pages = simple_strtoull(p, &p, 0);
2509b912485SBaoquan He 		return;
2519b912485SBaoquan He 	}
2529b912485SBaoquan He }
2539b912485SBaoquan He 
handle_mem_options(void)25444060e8aSChao Fan static void handle_mem_options(void)
255d52e7d5aSBaoquan He {
256d52e7d5aSBaoquan He 	char *args = (char *)get_cmd_line_ptr();
257709709acSArvind Sankar 	size_t len;
258d52e7d5aSBaoquan He 	char *tmp_cmdline;
259d52e7d5aSBaoquan He 	char *param, *val;
2604cdba14fSBaoquan He 	u64 mem_size;
261d52e7d5aSBaoquan He 
262709709acSArvind Sankar 	if (!args)
26344060e8aSChao Fan 		return;
264d52e7d5aSBaoquan He 
26576167e5cSArvind Sankar 	len = strnlen(args, COMMAND_LINE_SIZE-1);
266d52e7d5aSBaoquan He 	tmp_cmdline = malloc(len + 1);
267d52e7d5aSBaoquan He 	if (!tmp_cmdline)
268d52e7d5aSBaoquan He 		error("Failed to allocate space for tmp_cmdline");
269d52e7d5aSBaoquan He 
270d52e7d5aSBaoquan He 	memcpy(tmp_cmdline, args, len);
271d52e7d5aSBaoquan He 	tmp_cmdline[len] = 0;
272d52e7d5aSBaoquan He 	args = tmp_cmdline;
273d52e7d5aSBaoquan He 
274d52e7d5aSBaoquan He 	/* Chew leading spaces */
275d52e7d5aSBaoquan He 	args = skip_spaces(args);
276d52e7d5aSBaoquan He 
277d52e7d5aSBaoquan He 	while (*args) {
278d52e7d5aSBaoquan He 		args = next_arg(args, &param, &val);
279d52e7d5aSBaoquan He 		/* Stop at -- */
280e2ee6173SArvind Sankar 		if (!val && strcmp(param, "--") == 0)
281e2ee6173SArvind Sankar 			break;
282d52e7d5aSBaoquan He 
2834cdba14fSBaoquan He 		if (!strcmp(param, "memmap")) {
284199c8471SDan Williams 			mem_avoid_memmap(PARSE_MEMMAP, val);
28550def269SArvind Sankar 		} else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
286747ff626SBaoquan He 			parse_gb_huge_pages(param, val);
2874cdba14fSBaoquan He 		} else if (!strcmp(param, "mem")) {
2884cdba14fSBaoquan He 			char *p = val;
2894cdba14fSBaoquan He 
2904cdba14fSBaoquan He 			if (!strcmp(p, "nopentium"))
2914cdba14fSBaoquan He 				continue;
2924cdba14fSBaoquan He 			mem_size = memparse(p, &p);
29344060e8aSChao Fan 			if (mem_size == 0)
294e2ee6173SArvind Sankar 				break;
29544060e8aSChao Fan 
29645128694SArvind Sankar 			if (mem_size < mem_limit)
2974cdba14fSBaoquan He 				mem_limit = mem_size;
298199c8471SDan Williams 		} else if (!strcmp(param, "efi_fake_mem")) {
299199c8471SDan Williams 			mem_avoid_memmap(PARSE_EFI, val);
3004cdba14fSBaoquan He 		}
301d52e7d5aSBaoquan He 	}
302d52e7d5aSBaoquan He 
303d52e7d5aSBaoquan He 	free(tmp_cmdline);
30444060e8aSChao Fan 	return;
305d52e7d5aSBaoquan He }
306d52e7d5aSBaoquan He 
3079dc1969cSYinghai Lu /*
30845128694SArvind Sankar  * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
30945128694SArvind Sankar  * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
31045128694SArvind Sankar  *
311ed09acdeSKees Cook  * The mem_avoid array is used to store the ranges that need to be avoided
312ed09acdeSKees Cook  * when KASLR searches for an appropriate random address. We must avoid any
3139dc1969cSYinghai Lu  * regions that are unsafe to overlap with during decompression, and other
314ed09acdeSKees Cook  * things like the initrd, cmdline and boot_params. This comment seeks to
315ed09acdeSKees Cook  * explain mem_avoid as clearly as possible since incorrect mem_avoid
316ed09acdeSKees Cook  * memory ranges lead to really hard to debug boot failures.
3179dc1969cSYinghai Lu  *
318ed09acdeSKees Cook  * The initrd, cmdline, and boot_params are trivial to identify for
319cb18ef0dSKees Cook  * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
320ed09acdeSKees Cook  * MEM_AVOID_BOOTPARAMS respectively below.
3219dc1969cSYinghai Lu  *
322ed09acdeSKees Cook  * What is not obvious how to avoid is the range of memory that is used
323ed09acdeSKees Cook  * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
324ed09acdeSKees Cook  * the compressed kernel (ZO) and its run space, which is used to extract
325ed09acdeSKees Cook  * the uncompressed kernel (VO) and relocs.
3269dc1969cSYinghai Lu  *
327ed09acdeSKees Cook  * ZO's full run size sits against the end of the decompression buffer, so
328ed09acdeSKees Cook  * we can calculate where text, data, bss, etc of ZO are positioned more
329ed09acdeSKees Cook  * easily.
3309dc1969cSYinghai Lu  *
331ed09acdeSKees Cook  * For additional background, the decompression calculations can be found
332ed09acdeSKees Cook  * in header.S, and the memory diagram is based on the one found in misc.c.
3339dc1969cSYinghai Lu  *
334ed09acdeSKees Cook  * The following conditions are already enforced by the image layouts and
335ed09acdeSKees Cook  * associated code:
336ed09acdeSKees Cook  *  - input + input_size >= output + output_size
337ed09acdeSKees Cook  *  - kernel_total_size <= init_size
338ed09acdeSKees Cook  *  - kernel_total_size <= output_size (see Note below)
339ed09acdeSKees Cook  *  - output + init_size >= output + output_size
3409dc1969cSYinghai Lu  *
341ed09acdeSKees Cook  * (Note that kernel_total_size and output_size have no fundamental
342ed09acdeSKees Cook  * relationship, but output_size is passed to choose_random_location
343ed09acdeSKees Cook  * as a maximum of the two. The diagram is showing a case where
344ed09acdeSKees Cook  * kernel_total_size is larger than output_size, but this case is
345ed09acdeSKees Cook  * handled by bumping output_size.)
346ed09acdeSKees Cook  *
347ed09acdeSKees Cook  * The above conditions can be illustrated by a diagram:
348ed09acdeSKees Cook  *
349ed09acdeSKees Cook  * 0   output            input            input+input_size    output+init_size
3509dc1969cSYinghai Lu  * |     |                 |                             |             |
3519dc1969cSYinghai Lu  * |     |                 |                             |             |
352ed09acdeSKees Cook  * |-----|--------|--------|--------------|-----------|--|-------------|
3539dc1969cSYinghai Lu  *                |                       |           |
3549dc1969cSYinghai Lu  *                |                       |           |
355ed09acdeSKees Cook  * output+init_size-ZO_INIT_SIZE  output+output_size  output+kernel_total_size
3569dc1969cSYinghai Lu  *
357ed09acdeSKees Cook  * [output, output+init_size) is the entire memory range used for
358ed09acdeSKees Cook  * extracting the compressed image.
3599dc1969cSYinghai Lu  *
360ed09acdeSKees Cook  * [output, output+kernel_total_size) is the range needed for the
361ed09acdeSKees Cook  * uncompressed kernel (VO) and its run size (bss, brk, etc).
3629dc1969cSYinghai Lu  *
363ed09acdeSKees Cook  * [output, output+output_size) is VO plus relocs (i.e. the entire
364ed09acdeSKees Cook  * uncompressed payload contained by ZO). This is the area of the buffer
365ed09acdeSKees Cook  * written to during decompression.
3669dc1969cSYinghai Lu  *
367ed09acdeSKees Cook  * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
368ed09acdeSKees Cook  * range of the copied ZO and decompression code. (i.e. the range
369ed09acdeSKees Cook  * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
370ed09acdeSKees Cook  *
371ed09acdeSKees Cook  * [input, input+input_size) is the original copied compressed image (ZO)
372ed09acdeSKees Cook  * (i.e. it does not include its run size). This range must be avoided
373ed09acdeSKees Cook  * because it contains the data used for decompression.
374ed09acdeSKees Cook  *
375ed09acdeSKees Cook  * [input+input_size, output+init_size) is [_text, _end) for ZO. This
376ed09acdeSKees Cook  * range includes ZO's heap and stack, and must be avoided since it
377ed09acdeSKees Cook  * performs the decompression.
378ed09acdeSKees Cook  *
379ed09acdeSKees Cook  * Since the above two ranges need to be avoided and they are adjacent,
380ed09acdeSKees Cook  * they can be merged, resulting in: [input, output+init_size) which
381ed09acdeSKees Cook  * becomes the MEM_AVOID_ZO_RANGE below.
3829dc1969cSYinghai Lu  */
mem_avoid_init(unsigned long input,unsigned long input_size,unsigned long output)3839b238748SKees Cook static void mem_avoid_init(unsigned long input, unsigned long input_size,
3849dc1969cSYinghai Lu 			   unsigned long output)
3859b238748SKees Cook {
386*c59843e8SArd Biesheuvel 	unsigned long init_size = boot_params_ptr->hdr.init_size;
3879b238748SKees Cook 	u64 initrd_start, initrd_size;
388709709acSArvind Sankar 	unsigned long cmd_line, cmd_line_size;
3899b238748SKees Cook 
3909b238748SKees Cook 	/*
3919b238748SKees Cook 	 * Avoid the region that is unsafe to overlap during
3929dc1969cSYinghai Lu 	 * decompression.
3939b238748SKees Cook 	 */
394ed09acdeSKees Cook 	mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
395ed09acdeSKees Cook 	mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
3969b238748SKees Cook 
3979b238748SKees Cook 	/* Avoid initrd. */
398*c59843e8SArd Biesheuvel 	initrd_start  = (u64)boot_params_ptr->ext_ramdisk_image << 32;
399*c59843e8SArd Biesheuvel 	initrd_start |= boot_params_ptr->hdr.ramdisk_image;
400*c59843e8SArd Biesheuvel 	initrd_size  = (u64)boot_params_ptr->ext_ramdisk_size << 32;
401*c59843e8SArd Biesheuvel 	initrd_size |= boot_params_ptr->hdr.ramdisk_size;
402ed09acdeSKees Cook 	mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
403ed09acdeSKees Cook 	mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
4043a94707dSKees Cook 	/* No need to set mapping for initrd, it will be handled in VO. */
4059b238748SKees Cook 
4069b238748SKees Cook 	/* Avoid kernel command line. */
407709709acSArvind Sankar 	cmd_line = get_cmd_line_ptr();
4089b238748SKees Cook 	/* Calculate size of cmd_line. */
409709709acSArvind Sankar 	if (cmd_line) {
41076167e5cSArvind Sankar 		cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
411ed09acdeSKees Cook 		mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
412ed09acdeSKees Cook 		mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
413709709acSArvind Sankar 	}
4149b238748SKees Cook 
415ed09acdeSKees Cook 	/* Avoid boot parameters. */
416*c59843e8SArd Biesheuvel 	mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr;
417*c59843e8SArd Biesheuvel 	mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr);
4183a94707dSKees Cook 
4193a94707dSKees Cook 	/* We don't need to set a mapping for setup_data. */
4203a94707dSKees Cook 
421f2844249SDave Jiang 	/* Mark the memmap regions we need to avoid */
422747ff626SBaoquan He 	handle_mem_options();
423f2844249SDave Jiang 
424690eaa53SChao Fan 	/* Enumerate the immovable memory regions */
425690eaa53SChao Fan 	num_immovable_mem = count_immovable_mem_regions();
4269b238748SKees Cook }
4279b238748SKees Cook 
42806486d6cSKees Cook /*
42906486d6cSKees Cook  * Does this memory vector overlap a known avoided area? If so, record the
43006486d6cSKees Cook  * overlap region with the lowest address.
43106486d6cSKees Cook  */
mem_avoid_overlap(struct mem_vector * img,struct mem_vector * overlap)43206486d6cSKees Cook static bool mem_avoid_overlap(struct mem_vector *img,
43306486d6cSKees Cook 			      struct mem_vector *overlap)
4349b238748SKees Cook {
4359b238748SKees Cook 	int i;
4369b238748SKees Cook 	struct setup_data *ptr;
4370eb1a8afSArvind Sankar 	u64 earliest = img->start + img->size;
43806486d6cSKees Cook 	bool is_overlapping = false;
4399b238748SKees Cook 
4409b238748SKees Cook 	for (i = 0; i < MEM_AVOID_MAX; i++) {
44106486d6cSKees Cook 		if (mem_overlaps(img, &mem_avoid[i]) &&
44206486d6cSKees Cook 		    mem_avoid[i].start < earliest) {
44306486d6cSKees Cook 			*overlap = mem_avoid[i];
4446daa2ec0SBaoquan He 			earliest = overlap->start;
44506486d6cSKees Cook 			is_overlapping = true;
44606486d6cSKees Cook 		}
4479b238748SKees Cook 	}
4489b238748SKees Cook 
4499b238748SKees Cook 	/* Avoid all entries in the setup_data linked list. */
450*c59843e8SArd Biesheuvel 	ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data;
4519b238748SKees Cook 	while (ptr) {
4529b238748SKees Cook 		struct mem_vector avoid;
4539b238748SKees Cook 
4549b238748SKees Cook 		avoid.start = (unsigned long)ptr;
4559b238748SKees Cook 		avoid.size = sizeof(*ptr) + ptr->len;
4569b238748SKees Cook 
45706486d6cSKees Cook 		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
45806486d6cSKees Cook 			*overlap = avoid;
4596daa2ec0SBaoquan He 			earliest = overlap->start;
46006486d6cSKees Cook 			is_overlapping = true;
46106486d6cSKees Cook 		}
4629b238748SKees Cook 
463b3c72fc9SDaniel Kiper 		if (ptr->type == SETUP_INDIRECT &&
464b3c72fc9SDaniel Kiper 		    ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
465b3c72fc9SDaniel Kiper 			avoid.start = ((struct setup_indirect *)ptr->data)->addr;
466b3c72fc9SDaniel Kiper 			avoid.size = ((struct setup_indirect *)ptr->data)->len;
467b3c72fc9SDaniel Kiper 
468b3c72fc9SDaniel Kiper 			if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
469b3c72fc9SDaniel Kiper 				*overlap = avoid;
470b3c72fc9SDaniel Kiper 				earliest = overlap->start;
471b3c72fc9SDaniel Kiper 				is_overlapping = true;
472b3c72fc9SDaniel Kiper 			}
473b3c72fc9SDaniel Kiper 		}
474b3c72fc9SDaniel Kiper 
4759b238748SKees Cook 		ptr = (struct setup_data *)(unsigned long)ptr->next;
4769b238748SKees Cook 	}
4779b238748SKees Cook 
47806486d6cSKees Cook 	return is_overlapping;
4799b238748SKees Cook }
4809b238748SKees Cook 
481c401cf15SBaoquan He struct slot_area {
4820eb1a8afSArvind Sankar 	u64 addr;
483d6d0f36cSArvind Sankar 	unsigned long num;
484c401cf15SBaoquan He };
485c401cf15SBaoquan He 
486c401cf15SBaoquan He #define MAX_SLOT_AREA 100
487c401cf15SBaoquan He 
488c401cf15SBaoquan He static struct slot_area slot_areas[MAX_SLOT_AREA];
489d6d0f36cSArvind Sankar static unsigned int slot_area_index;
4909b238748SKees Cook static unsigned long slot_max;
4919b238748SKees Cook 
store_slot_info(struct mem_vector * region,unsigned long image_size)492c401cf15SBaoquan He static void store_slot_info(struct mem_vector *region, unsigned long image_size)
493c401cf15SBaoquan He {
494c401cf15SBaoquan He 	struct slot_area slot_area;
495c401cf15SBaoquan He 
496c401cf15SBaoquan He 	if (slot_area_index == MAX_SLOT_AREA)
497c401cf15SBaoquan He 		return;
498c401cf15SBaoquan He 
499c401cf15SBaoquan He 	slot_area.addr = region->start;
50046a5b29aSArvind Sankar 	slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
501c401cf15SBaoquan He 
502c401cf15SBaoquan He 	slot_areas[slot_area_index++] = slot_area;
503c401cf15SBaoquan He 	slot_max += slot_area.num;
504c401cf15SBaoquan He }
505c401cf15SBaoquan He 
5069b912485SBaoquan He /*
5079b912485SBaoquan He  * Skip as many 1GB huge pages as possible in the passed region
5089b912485SBaoquan He  * according to the number which users specified:
5099b912485SBaoquan He  */
5109b912485SBaoquan He static void
process_gb_huge_pages(struct mem_vector * region,unsigned long image_size)5119b912485SBaoquan He process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
5129b912485SBaoquan He {
5130eb1a8afSArvind Sankar 	u64 pud_start, pud_end;
5140eb1a8afSArvind Sankar 	unsigned long gb_huge_pages;
5159b912485SBaoquan He 	struct mem_vector tmp;
5169b912485SBaoquan He 
51750def269SArvind Sankar 	if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
5189b912485SBaoquan He 		store_slot_info(region, image_size);
5199b912485SBaoquan He 		return;
5209b912485SBaoquan He 	}
5219b912485SBaoquan He 
522be9e8d95SArvind Sankar 	/* Are there any 1GB pages in the region? */
523be9e8d95SArvind Sankar 	pud_start = ALIGN(region->start, PUD_SIZE);
524be9e8d95SArvind Sankar 	pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
5259b912485SBaoquan He 
5269b912485SBaoquan He 	/* No good 1GB huge pages found: */
527be9e8d95SArvind Sankar 	if (pud_start >= pud_end) {
5289b912485SBaoquan He 		store_slot_info(region, image_size);
5299b912485SBaoquan He 		return;
5309b912485SBaoquan He 	}
5319b912485SBaoquan He 
532be9e8d95SArvind Sankar 	/* Check if the head part of the region is usable. */
533be9e8d95SArvind Sankar 	if (pud_start >= region->start + image_size) {
5349b912485SBaoquan He 		tmp.start = region->start;
535be9e8d95SArvind Sankar 		tmp.size = pud_start - region->start;
5369b912485SBaoquan He 		store_slot_info(&tmp, image_size);
5379b912485SBaoquan He 	}
5389b912485SBaoquan He 
539be9e8d95SArvind Sankar 	/* Skip the good 1GB pages. */
540be9e8d95SArvind Sankar 	gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
541be9e8d95SArvind Sankar 	if (gb_huge_pages > max_gb_huge_pages) {
542be9e8d95SArvind Sankar 		pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
543be9e8d95SArvind Sankar 		max_gb_huge_pages = 0;
544be9e8d95SArvind Sankar 	} else {
545be9e8d95SArvind Sankar 		max_gb_huge_pages -= gb_huge_pages;
546be9e8d95SArvind Sankar 	}
547be9e8d95SArvind Sankar 
548be9e8d95SArvind Sankar 	/* Check if the tail part of the region is usable. */
549be9e8d95SArvind Sankar 	if (region->start + region->size >= pud_end + image_size) {
550be9e8d95SArvind Sankar 		tmp.start = pud_end;
551be9e8d95SArvind Sankar 		tmp.size = region->start + region->size - pud_end;
5529b912485SBaoquan He 		store_slot_info(&tmp, image_size);
5539b912485SBaoquan He 	}
5549b912485SBaoquan He }
5559b912485SBaoquan He 
slots_fetch_random(void)5560eb1a8afSArvind Sankar static u64 slots_fetch_random(void)
5579b238748SKees Cook {
558ed9f007eSKees Cook 	unsigned long slot;
559d6d0f36cSArvind Sankar 	unsigned int i;
560ed9f007eSKees Cook 
5619b238748SKees Cook 	/* Handle case of no slots stored. */
5629b238748SKees Cook 	if (slot_max == 0)
5639b238748SKees Cook 		return 0;
5649b238748SKees Cook 
565d899a7d1SThomas Garnier 	slot = kaslr_get_random_long("Physical") % slot_max;
566ed9f007eSKees Cook 
567ed9f007eSKees Cook 	for (i = 0; i < slot_area_index; i++) {
568ed9f007eSKees Cook 		if (slot >= slot_areas[i].num) {
569ed9f007eSKees Cook 			slot -= slot_areas[i].num;
570ed9f007eSKees Cook 			continue;
571ed9f007eSKees Cook 		}
5720eb1a8afSArvind Sankar 		return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
573ed9f007eSKees Cook 	}
574ed9f007eSKees Cook 
575ed9f007eSKees Cook 	if (i == slot_area_index)
576ed9f007eSKees Cook 		debug_putstr("slots_fetch_random() failed!?\n");
577ed9f007eSKees Cook 	return 0;
5789b238748SKees Cook }
5799b238748SKees Cook 
__process_mem_region(struct mem_vector * entry,unsigned long minimum,unsigned long image_size)580690eaa53SChao Fan static void __process_mem_region(struct mem_vector *entry,
5819b238748SKees Cook 				 unsigned long minimum,
5829b238748SKees Cook 				 unsigned long image_size)
5839b238748SKees Cook {
584ed9f007eSKees Cook 	struct mem_vector region, overlap;
5850eb1a8afSArvind Sankar 	u64 region_end;
5869b238748SKees Cook 
587bf457be1SArvind Sankar 	/* Enforce minimum and memory limit. */
5883a066990SArvind Sankar 	region.start = max_t(u64, entry->start, minimum);
589bf457be1SArvind Sankar 	region_end = min(entry->start + entry->size, mem_limit);
5909b238748SKees Cook 
591ed9f007eSKees Cook 	/* Give up if slot area array is full. */
592ed9f007eSKees Cook 	while (slot_area_index < MAX_SLOT_AREA) {
593ed9f007eSKees Cook 		/* Potentially raise address to meet alignment needs. */
5949b238748SKees Cook 		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
5959b238748SKees Cook 
59627aac205SBaoquan He 		/* Did we raise the address above the passed in memory entry? */
597bf457be1SArvind Sankar 		if (region.start > region_end)
5989b238748SKees Cook 			return;
5999b238748SKees Cook 
6009b238748SKees Cook 		/* Reduce size by any delta from the original address. */
601bf457be1SArvind Sankar 		region.size = region_end - region.start;
6029b238748SKees Cook 
603ed9f007eSKees Cook 		/* Return if region can't contain decompressed kernel */
604ed9f007eSKees Cook 		if (region.size < image_size)
605ed9f007eSKees Cook 			return;
606ed9f007eSKees Cook 
607ed9f007eSKees Cook 		/* If nothing overlaps, store the region and return. */
608ed9f007eSKees Cook 		if (!mem_avoid_overlap(&region, &overlap)) {
609747ff626SBaoquan He 			process_gb_huge_pages(&region, image_size);
610ed9f007eSKees Cook 			return;
611ed9f007eSKees Cook 		}
612ed9f007eSKees Cook 
613ed9f007eSKees Cook 		/* Store beginning of region if holds at least image_size. */
6148d1cf859SArvind Sankar 		if (overlap.start >= region.start + image_size) {
615ef7b07d5SArvind Sankar 			region.size = overlap.start - region.start;
616ef7b07d5SArvind Sankar 			process_gb_huge_pages(&region, image_size);
617ed9f007eSKees Cook 		}
618ed9f007eSKees Cook 
619ed9f007eSKees Cook 		/* Clip off the overlapping region and start over. */
620ed9f007eSKees Cook 		region.start = overlap.start + overlap.size;
6219b238748SKees Cook 	}
6229b238748SKees Cook }
6239b238748SKees Cook 
process_mem_region(struct mem_vector * region,unsigned long minimum,unsigned long image_size)624690eaa53SChao Fan static bool process_mem_region(struct mem_vector *region,
625e4cb955bSArvind Sankar 			       unsigned long minimum,
626e4cb955bSArvind Sankar 			       unsigned long image_size)
627690eaa53SChao Fan {
628690eaa53SChao Fan 	int i;
629690eaa53SChao Fan 	/*
630690eaa53SChao Fan 	 * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
631690eaa53SChao Fan 	 * use @region directly.
632690eaa53SChao Fan 	 */
633690eaa53SChao Fan 	if (!num_immovable_mem) {
634690eaa53SChao Fan 		__process_mem_region(region, minimum, image_size);
635690eaa53SChao Fan 
636690eaa53SChao Fan 		if (slot_area_index == MAX_SLOT_AREA) {
637690eaa53SChao Fan 			debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
63821d6a7dcSJiapeng Chong 			return true;
639690eaa53SChao Fan 		}
64021d6a7dcSJiapeng Chong 		return false;
641690eaa53SChao Fan 	}
642690eaa53SChao Fan 
64382df8261SBorislav Petkov #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
644690eaa53SChao Fan 	/*
645690eaa53SChao Fan 	 * If immovable memory found, filter the intersection between
646690eaa53SChao Fan 	 * immovable memory and @region.
647690eaa53SChao Fan 	 */
648690eaa53SChao Fan 	for (i = 0; i < num_immovable_mem; i++) {
6493a066990SArvind Sankar 		u64 start, end, entry_end, region_end;
650690eaa53SChao Fan 		struct mem_vector entry;
651690eaa53SChao Fan 
652690eaa53SChao Fan 		if (!mem_overlaps(region, &immovable_mem[i]))
653690eaa53SChao Fan 			continue;
654690eaa53SChao Fan 
655690eaa53SChao Fan 		start = immovable_mem[i].start;
656690eaa53SChao Fan 		end = start + immovable_mem[i].size;
657690eaa53SChao Fan 		region_end = region->start + region->size;
658690eaa53SChao Fan 
659690eaa53SChao Fan 		entry.start = clamp(region->start, start, end);
660690eaa53SChao Fan 		entry_end = clamp(region_end, start, end);
661690eaa53SChao Fan 		entry.size = entry_end - entry.start;
662690eaa53SChao Fan 
663690eaa53SChao Fan 		__process_mem_region(&entry, minimum, image_size);
664690eaa53SChao Fan 
665690eaa53SChao Fan 		if (slot_area_index == MAX_SLOT_AREA) {
666690eaa53SChao Fan 			debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
6675b3fd8aaSJing Yangyang 			return true;
668690eaa53SChao Fan 		}
669690eaa53SChao Fan 	}
670690eaa53SChao Fan #endif
671ee92fa03SJiapeng Chong 	return false;
672690eaa53SChao Fan }
673690eaa53SChao Fan 
674c05cd797SBaoquan He #ifdef CONFIG_EFI
6753fd1239aSKirill A. Shutemov 
6763fd1239aSKirill A. Shutemov /*
6773fd1239aSKirill A. Shutemov  * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
6783fd1239aSKirill A. Shutemov  * guaranteed to be free.
6793fd1239aSKirill A. Shutemov  *
6803fd1239aSKirill A. Shutemov  * Pick free memory more conservatively than the EFI spec allows: according to
6813fd1239aSKirill A. Shutemov  * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus
6823fd1239aSKirill A. Shutemov  * available to place the kernel image into, but in practice there's firmware
6833fd1239aSKirill A. Shutemov  * where using that memory leads to crashes. Buggy vendor EFI code registers
6843fd1239aSKirill A. Shutemov  * for an event that triggers on SetVirtualAddressMap(). The handler assumes
6853fd1239aSKirill A. Shutemov  * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which
6863fd1239aSKirill A. Shutemov  * is probably true for Windows.
6873fd1239aSKirill A. Shutemov  *
6883fd1239aSKirill A. Shutemov  * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap().
6893fd1239aSKirill A. Shutemov  */
memory_type_is_free(efi_memory_desc_t * md)6903fd1239aSKirill A. Shutemov static inline bool memory_type_is_free(efi_memory_desc_t *md)
6913fd1239aSKirill A. Shutemov {
6923fd1239aSKirill A. Shutemov 	if (md->type == EFI_CONVENTIONAL_MEMORY)
6933fd1239aSKirill A. Shutemov 		return true;
6943fd1239aSKirill A. Shutemov 
6953fd1239aSKirill A. Shutemov 	if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
6963fd1239aSKirill A. Shutemov 	    md->type == EFI_UNACCEPTED_MEMORY)
6973fd1239aSKirill A. Shutemov 		    return true;
6983fd1239aSKirill A. Shutemov 
6993fd1239aSKirill A. Shutemov 	return false;
7003fd1239aSKirill A. Shutemov }
7013fd1239aSKirill A. Shutemov 
702c05cd797SBaoquan He /*
70308705365SArvind Sankar  * Returns true if we processed the EFI memmap, which we prefer over the E820
70408705365SArvind Sankar  * table if it is available.
705c05cd797SBaoquan He  */
706c05cd797SBaoquan He static bool
process_efi_entries(unsigned long minimum,unsigned long image_size)707c05cd797SBaoquan He process_efi_entries(unsigned long minimum, unsigned long image_size)
708c05cd797SBaoquan He {
709*c59843e8SArd Biesheuvel 	struct efi_info *e = &boot_params_ptr->efi_info;
710c05cd797SBaoquan He 	bool efi_mirror_found = false;
711c05cd797SBaoquan He 	struct mem_vector region;
712c05cd797SBaoquan He 	efi_memory_desc_t *md;
713c05cd797SBaoquan He 	unsigned long pmap;
714c05cd797SBaoquan He 	char *signature;
715c05cd797SBaoquan He 	u32 nr_desc;
716c05cd797SBaoquan He 	int i;
717c05cd797SBaoquan He 
718c05cd797SBaoquan He 	signature = (char *)&e->efi_loader_signature;
719c05cd797SBaoquan He 	if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
720c05cd797SBaoquan He 	    strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
721c05cd797SBaoquan He 		return false;
722c05cd797SBaoquan He 
723c05cd797SBaoquan He #ifdef CONFIG_X86_32
724c05cd797SBaoquan He 	/* Can't handle data above 4GB at this time */
725c05cd797SBaoquan He 	if (e->efi_memmap_hi) {
726c05cd797SBaoquan He 		warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
727c05cd797SBaoquan He 		return false;
728c05cd797SBaoquan He 	}
729c05cd797SBaoquan He 	pmap =  e->efi_memmap;
730c05cd797SBaoquan He #else
731c05cd797SBaoquan He 	pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
732c05cd797SBaoquan He #endif
733c05cd797SBaoquan He 
734c05cd797SBaoquan He 	nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
735c05cd797SBaoquan He 	for (i = 0; i < nr_desc; i++) {
736c05cd797SBaoquan He 		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
737c05cd797SBaoquan He 		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
7380982adc7SNaoya Horiguchi 			efi_mirror_found = true;
7390982adc7SNaoya Horiguchi 			break;
7400982adc7SNaoya Horiguchi 		}
7410982adc7SNaoya Horiguchi 	}
7420982adc7SNaoya Horiguchi 
7430982adc7SNaoya Horiguchi 	for (i = 0; i < nr_desc; i++) {
7440982adc7SNaoya Horiguchi 		md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
7450982adc7SNaoya Horiguchi 
7463fd1239aSKirill A. Shutemov 		if (!memory_type_is_free(md))
7470982adc7SNaoya Horiguchi 			continue;
7480982adc7SNaoya Horiguchi 
749262b45aeSDan Williams 		if (efi_soft_reserve_enabled() &&
750262b45aeSDan Williams 		    (md->attribute & EFI_MEMORY_SP))
751262b45aeSDan Williams 			continue;
752262b45aeSDan Williams 
7530982adc7SNaoya Horiguchi 		if (efi_mirror_found &&
7540982adc7SNaoya Horiguchi 		    !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
7550982adc7SNaoya Horiguchi 			continue;
7560982adc7SNaoya Horiguchi 
757c05cd797SBaoquan He 		region.start = md->phys_addr;
758c05cd797SBaoquan He 		region.size = md->num_pages << EFI_PAGE_SHIFT;
759690eaa53SChao Fan 		if (process_mem_region(&region, minimum, image_size))
760c05cd797SBaoquan He 			break;
761c05cd797SBaoquan He 	}
7620982adc7SNaoya Horiguchi 	return true;
763c05cd797SBaoquan He }
764c05cd797SBaoquan He #else
765c05cd797SBaoquan He static inline bool
process_efi_entries(unsigned long minimum,unsigned long image_size)766c05cd797SBaoquan He process_efi_entries(unsigned long minimum, unsigned long image_size)
767c05cd797SBaoquan He {
768c05cd797SBaoquan He 	return false;
769c05cd797SBaoquan He }
770c05cd797SBaoquan He #endif
771c05cd797SBaoquan He 
process_e820_entries(unsigned long minimum,unsigned long image_size)772f62995c9SBaoquan He static void process_e820_entries(unsigned long minimum,
773071a7493SBaoquan He 				 unsigned long image_size)
7749b238748SKees Cook {
7759b238748SKees Cook 	int i;
77687891b01SBaoquan He 	struct mem_vector region;
777f62995c9SBaoquan He 	struct boot_e820_entry *entry;
7789b238748SKees Cook 
779f62995c9SBaoquan He 	/* Verify potential e820 positions, appending to slots list. */
780*c59843e8SArd Biesheuvel 	for (i = 0; i < boot_params_ptr->e820_entries; i++) {
781*c59843e8SArd Biesheuvel 		entry = &boot_params_ptr->e820_table[i];
782f62995c9SBaoquan He 		/* Skip non-RAM entries. */
783f62995c9SBaoquan He 		if (entry->type != E820_TYPE_RAM)
784f62995c9SBaoquan He 			continue;
78587891b01SBaoquan He 		region.start = entry->addr;
78687891b01SBaoquan He 		region.size = entry->size;
787690eaa53SChao Fan 		if (process_mem_region(&region, minimum, image_size))
788f62995c9SBaoquan He 			break;
789f62995c9SBaoquan He 	}
790f62995c9SBaoquan He }
791f62995c9SBaoquan He 
find_random_phys_addr(unsigned long minimum,unsigned long image_size)792f62995c9SBaoquan He static unsigned long find_random_phys_addr(unsigned long minimum,
793f62995c9SBaoquan He 					   unsigned long image_size)
794f62995c9SBaoquan He {
795f49236aeSArvind Sankar 	u64 phys_addr;
796f49236aeSArvind Sankar 
79745128694SArvind Sankar 	/* Bail out early if it's impossible to succeed. */
79845128694SArvind Sankar 	if (minimum + image_size > mem_limit)
79945128694SArvind Sankar 		return 0;
80045128694SArvind Sankar 
801f2844249SDave Jiang 	/* Check if we had too many memmaps. */
802f2844249SDave Jiang 	if (memmap_too_large) {
803c05cd797SBaoquan He 		debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
804f2844249SDave Jiang 		return 0;
805f2844249SDave Jiang 	}
806f2844249SDave Jiang 
8074268b4daSArvind Sankar 	if (!process_efi_entries(minimum, image_size))
808f62995c9SBaoquan He 		process_e820_entries(minimum, image_size);
8094268b4daSArvind Sankar 
810f49236aeSArvind Sankar 	phys_addr = slots_fetch_random();
811f49236aeSArvind Sankar 
812f49236aeSArvind Sankar 	/* Perform a final check to make sure the address is in range. */
813f49236aeSArvind Sankar 	if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
814f49236aeSArvind Sankar 		warn("Invalid physical address chosen!\n");
815f49236aeSArvind Sankar 		return 0;
816f49236aeSArvind Sankar 	}
817f49236aeSArvind Sankar 
818f49236aeSArvind Sankar 	return (unsigned long)phys_addr;
8199b238748SKees Cook }
8209b238748SKees Cook 
find_random_virt_addr(unsigned long minimum,unsigned long image_size)821071a7493SBaoquan He static unsigned long find_random_virt_addr(unsigned long minimum,
822071a7493SBaoquan He 					   unsigned long image_size)
823071a7493SBaoquan He {
824071a7493SBaoquan He 	unsigned long slots, random_addr;
825071a7493SBaoquan He 
826071a7493SBaoquan He 	/*
827071a7493SBaoquan He 	 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
828071a7493SBaoquan He 	 * that can hold image_size within the range of minimum to
829071a7493SBaoquan He 	 * KERNEL_IMAGE_SIZE?
830071a7493SBaoquan He 	 */
831eb38be6dSArvind Sankar 	slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
832071a7493SBaoquan He 
833d899a7d1SThomas Garnier 	random_addr = kaslr_get_random_long("Virtual") % slots;
834071a7493SBaoquan He 
835071a7493SBaoquan He 	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
836071a7493SBaoquan He }
837071a7493SBaoquan He 
838549f90dbSBorislav Petkov /*
839549f90dbSBorislav Petkov  * Since this function examines addresses much more numerically,
840549f90dbSBorislav Petkov  * it takes the input and output pointers as 'unsigned long'.
841549f90dbSBorislav Petkov  */
choose_random_location(unsigned long input,unsigned long input_size,unsigned long * output,unsigned long output_size,unsigned long * virt_addr)8428391c73cSBaoquan He void choose_random_location(unsigned long input,
8439b238748SKees Cook 			    unsigned long input_size,
8448391c73cSBaoquan He 			    unsigned long *output,
8458391c73cSBaoquan He 			    unsigned long output_size,
8468391c73cSBaoquan He 			    unsigned long *virt_addr)
8479b238748SKees Cook {
848e066cc47SYinghai Lu 	unsigned long random_addr, min_addr;
8499b238748SKees Cook 
8509b238748SKees Cook 	if (cmdline_find_option_bool("nokaslr")) {
8510f8ede1bSKees Cook 		warn("KASLR disabled: 'nokaslr' on cmdline.");
8528391c73cSBaoquan He 		return;
8539b238748SKees Cook 	}
8549b238748SKees Cook 
855*c59843e8SArd Biesheuvel 	boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
8569b238748SKees Cook 
85745128694SArvind Sankar 	if (IS_ENABLED(CONFIG_X86_32))
85845128694SArvind Sankar 		mem_limit = KERNEL_IMAGE_SIZE;
85945128694SArvind Sankar 	else
86045128694SArvind Sankar 		mem_limit = MAXMEM;
86145128694SArvind Sankar 
8629b238748SKees Cook 	/* Record the various known unsafe memory ranges. */
8638391c73cSBaoquan He 	mem_avoid_init(input, input_size, *output);
8649b238748SKees Cook 
865e066cc47SYinghai Lu 	/*
866e066cc47SYinghai Lu 	 * Low end of the randomization range should be the
867e066cc47SYinghai Lu 	 * smaller of 512M or the initial kernel image
868e066cc47SYinghai Lu 	 * location:
869e066cc47SYinghai Lu 	 */
870e066cc47SYinghai Lu 	min_addr = min(*output, 512UL << 20);
87145128694SArvind Sankar 	/* Make sure minimum is aligned. */
87245128694SArvind Sankar 	min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
873e066cc47SYinghai Lu 
874c05cd797SBaoquan He 	/* Walk available memory entries to find a random address. */
875e066cc47SYinghai Lu 	random_addr = find_random_phys_addr(min_addr, output_size);
8769016875dSKees Cook 	if (!random_addr) {
877f2844249SDave Jiang 		warn("Physical KASLR disabled: no suitable memory region!");
8788391c73cSBaoquan He 	} else {
8798391c73cSBaoquan He 		/* Update the new physical address location. */
8808570978eSJoerg Roedel 		if (*output != random_addr)
8818391c73cSBaoquan He 			*output = random_addr;
8829b238748SKees Cook 	}
883da63b6b2SBaoquan He 
8848391c73cSBaoquan He 
8858391c73cSBaoquan He 	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
8868391c73cSBaoquan He 	if (IS_ENABLED(CONFIG_X86_64))
8878391c73cSBaoquan He 		random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
8888391c73cSBaoquan He 	*virt_addr = random_addr;
8899b238748SKees Cook }
890