xref: /openbmc/linux/arch/riscv/kernel/elf_kexec.c (revision a39416d8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Load ELF vmlinux file for the kexec_file_load syscall.
4  *
5  * Copyright (C) 2021 Huawei Technologies Co, Ltd.
6  *
7  * Author: Liao Chang (liaochang1@huawei.com)
8  *
9  * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
10  * for kernel.
11  */
12 
13 #define pr_fmt(fmt)	"kexec_image: " fmt
14 
15 #include <linux/elf.h>
16 #include <linux/kexec.h>
17 #include <linux/slab.h>
18 #include <linux/of.h>
19 #include <linux/libfdt.h>
20 #include <linux/types.h>
21 #include <linux/memblock.h>
22 #include <asm/setup.h>
23 
arch_kimage_file_post_load_cleanup(struct kimage * image)24 int arch_kimage_file_post_load_cleanup(struct kimage *image)
25 {
26 	kvfree(image->arch.fdt);
27 	image->arch.fdt = NULL;
28 
29 	vfree(image->elf_headers);
30 	image->elf_headers = NULL;
31 	image->elf_headers_sz = 0;
32 
33 	return kexec_image_post_load_cleanup_default(image);
34 }
35 
riscv_kexec_elf_load(struct kimage * image,struct elfhdr * ehdr,struct kexec_elf_info * elf_info,unsigned long old_pbase,unsigned long new_pbase)36 static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
37 				struct kexec_elf_info *elf_info, unsigned long old_pbase,
38 				unsigned long new_pbase)
39 {
40 	int i;
41 	int ret = 0;
42 	size_t size;
43 	struct kexec_buf kbuf;
44 	const struct elf_phdr *phdr;
45 
46 	kbuf.image = image;
47 
48 	for (i = 0; i < ehdr->e_phnum; i++) {
49 		phdr = &elf_info->proghdrs[i];
50 		if (phdr->p_type != PT_LOAD)
51 			continue;
52 
53 		size = phdr->p_filesz;
54 		if (size > phdr->p_memsz)
55 			size = phdr->p_memsz;
56 
57 		kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
58 		kbuf.bufsz = size;
59 		kbuf.buf_align = phdr->p_align;
60 		kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
61 		kbuf.memsz = phdr->p_memsz;
62 		kbuf.top_down = false;
63 		ret = kexec_add_buffer(&kbuf);
64 		if (ret)
65 			break;
66 	}
67 
68 	return ret;
69 }
70 
71 /*
72  * Go through the available phsyical memory regions and find one that hold
73  * an image of the specified size.
74  */
elf_find_pbase(struct kimage * image,unsigned long kernel_len,struct elfhdr * ehdr,struct kexec_elf_info * elf_info,unsigned long * old_pbase,unsigned long * new_pbase)75 static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
76 			  struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
77 			  unsigned long *old_pbase, unsigned long *new_pbase)
78 {
79 	int i;
80 	int ret;
81 	struct kexec_buf kbuf;
82 	const struct elf_phdr *phdr;
83 	unsigned long lowest_paddr = ULONG_MAX;
84 	unsigned long lowest_vaddr = ULONG_MAX;
85 
86 	for (i = 0; i < ehdr->e_phnum; i++) {
87 		phdr = &elf_info->proghdrs[i];
88 		if (phdr->p_type != PT_LOAD)
89 			continue;
90 
91 		if (lowest_paddr > phdr->p_paddr)
92 			lowest_paddr = phdr->p_paddr;
93 
94 		if (lowest_vaddr > phdr->p_vaddr)
95 			lowest_vaddr = phdr->p_vaddr;
96 	}
97 
98 	kbuf.image = image;
99 	kbuf.buf_min = lowest_paddr;
100 	kbuf.buf_max = ULONG_MAX;
101 
102 	/*
103 	 * Current riscv boot protocol requires 2MB alignment for
104 	 * RV64 and 4MB alignment for RV32
105 	 *
106 	 */
107 	kbuf.buf_align = PMD_SIZE;
108 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
109 	kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
110 	kbuf.top_down = false;
111 	ret = arch_kexec_locate_mem_hole(&kbuf);
112 	if (!ret) {
113 		*old_pbase = lowest_paddr;
114 		*new_pbase = kbuf.mem;
115 		image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
116 	}
117 	return ret;
118 }
119 
get_nr_ram_ranges_callback(struct resource * res,void * arg)120 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
121 {
122 	unsigned int *nr_ranges = arg;
123 
124 	(*nr_ranges)++;
125 	return 0;
126 }
127 
prepare_elf64_ram_headers_callback(struct resource * res,void * arg)128 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
129 {
130 	struct crash_mem *cmem = arg;
131 
132 	cmem->ranges[cmem->nr_ranges].start = res->start;
133 	cmem->ranges[cmem->nr_ranges].end = res->end;
134 	cmem->nr_ranges++;
135 
136 	return 0;
137 }
138 
prepare_elf_headers(void ** addr,unsigned long * sz)139 static int prepare_elf_headers(void **addr, unsigned long *sz)
140 {
141 	struct crash_mem *cmem;
142 	unsigned int nr_ranges;
143 	int ret;
144 
145 	nr_ranges = 1; /* For exclusion of crashkernel region */
146 	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
147 
148 	cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
149 	if (!cmem)
150 		return -ENOMEM;
151 
152 	cmem->max_nr_ranges = nr_ranges;
153 	cmem->nr_ranges = 0;
154 	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
155 	if (ret)
156 		goto out;
157 
158 	/* Exclude crashkernel region */
159 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
160 	if (!ret)
161 		ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
162 
163 out:
164 	kfree(cmem);
165 	return ret;
166 }
167 
setup_kdump_cmdline(struct kimage * image,char * cmdline,unsigned long cmdline_len)168 static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
169 				 unsigned long cmdline_len)
170 {
171 	int elfcorehdr_strlen;
172 	char *cmdline_ptr;
173 
174 	cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
175 	if (!cmdline_ptr)
176 		return NULL;
177 
178 	elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
179 		image->elf_load_addr);
180 
181 	if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
182 		pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
183 		kfree(cmdline_ptr);
184 		return NULL;
185 	}
186 
187 	memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
188 	/* Ensure it's nul terminated */
189 	cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
190 	return cmdline_ptr;
191 }
192 
elf_kexec_load(struct kimage * image,char * kernel_buf,unsigned long kernel_len,char * initrd,unsigned long initrd_len,char * cmdline,unsigned long cmdline_len)193 static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
194 			    unsigned long kernel_len, char *initrd,
195 			    unsigned long initrd_len, char *cmdline,
196 			    unsigned long cmdline_len)
197 {
198 	int ret;
199 	unsigned long old_kernel_pbase = ULONG_MAX;
200 	unsigned long new_kernel_pbase = 0UL;
201 	unsigned long initrd_pbase = 0UL;
202 	unsigned long headers_sz;
203 	unsigned long kernel_start;
204 	void *fdt, *headers;
205 	struct elfhdr ehdr;
206 	struct kexec_buf kbuf;
207 	struct kexec_elf_info elf_info;
208 	char *modified_cmdline = NULL;
209 
210 	ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
211 	if (ret)
212 		return ERR_PTR(ret);
213 
214 	ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
215 			     &old_kernel_pbase, &new_kernel_pbase);
216 	if (ret)
217 		goto out;
218 	kernel_start = image->start;
219 	pr_notice("The entry point of kernel at 0x%lx\n", image->start);
220 
221 	/* Add the kernel binary to the image */
222 	ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
223 				   old_kernel_pbase, new_kernel_pbase);
224 	if (ret)
225 		goto out;
226 
227 	kbuf.image = image;
228 	kbuf.buf_min = new_kernel_pbase + kernel_len;
229 	kbuf.buf_max = ULONG_MAX;
230 
231 	/* Add elfcorehdr */
232 	if (image->type == KEXEC_TYPE_CRASH) {
233 		ret = prepare_elf_headers(&headers, &headers_sz);
234 		if (ret) {
235 			pr_err("Preparing elf core header failed\n");
236 			goto out;
237 		}
238 
239 		kbuf.buffer = headers;
240 		kbuf.bufsz = headers_sz;
241 		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
242 		kbuf.memsz = headers_sz;
243 		kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
244 		kbuf.top_down = true;
245 
246 		ret = kexec_add_buffer(&kbuf);
247 		if (ret) {
248 			vfree(headers);
249 			goto out;
250 		}
251 		image->elf_headers = headers;
252 		image->elf_load_addr = kbuf.mem;
253 		image->elf_headers_sz = headers_sz;
254 
255 		pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
256 			 image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
257 
258 		/* Setup cmdline for kdump kernel case */
259 		modified_cmdline = setup_kdump_cmdline(image, cmdline,
260 						       cmdline_len);
261 		if (!modified_cmdline) {
262 			pr_err("Setting up cmdline for kdump kernel failed\n");
263 			ret = -EINVAL;
264 			goto out;
265 		}
266 		cmdline = modified_cmdline;
267 	}
268 
269 #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
270 	/* Add purgatory to the image */
271 	kbuf.top_down = true;
272 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
273 	ret = kexec_load_purgatory(image, &kbuf);
274 	if (ret) {
275 		pr_err("Error loading purgatory ret=%d\n", ret);
276 		goto out;
277 	}
278 	ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
279 					     &kernel_start,
280 					     sizeof(kernel_start), 0);
281 	if (ret)
282 		pr_err("Error update purgatory ret=%d\n", ret);
283 #endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
284 
285 	/* Add the initrd to the image */
286 	if (initrd != NULL) {
287 		kbuf.buffer = initrd;
288 		kbuf.bufsz = kbuf.memsz = initrd_len;
289 		kbuf.buf_align = PAGE_SIZE;
290 		kbuf.top_down = true;
291 		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
292 		ret = kexec_add_buffer(&kbuf);
293 		if (ret)
294 			goto out;
295 		initrd_pbase = kbuf.mem;
296 		pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase);
297 	}
298 
299 	/* Add the DTB to the image */
300 	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
301 					   initrd_len, cmdline, 0);
302 	if (!fdt) {
303 		pr_err("Error setting up the new device tree.\n");
304 		ret = -EINVAL;
305 		goto out;
306 	}
307 
308 	fdt_pack(fdt);
309 	kbuf.buffer = fdt;
310 	kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
311 	kbuf.buf_align = PAGE_SIZE;
312 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
313 	kbuf.top_down = true;
314 	ret = kexec_add_buffer(&kbuf);
315 	if (ret) {
316 		pr_err("Error add DTB kbuf ret=%d\n", ret);
317 		goto out_free_fdt;
318 	}
319 	/* Cache the fdt buffer address for memory cleanup */
320 	image->arch.fdt = fdt;
321 	pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem);
322 	goto out;
323 
324 out_free_fdt:
325 	kvfree(fdt);
326 out:
327 	kfree(modified_cmdline);
328 	kexec_free_elf_info(&elf_info);
329 	return ret ? ERR_PTR(ret) : NULL;
330 }
331 
332 #define RV_X(x, s, n)  (((x) >> (s)) & ((1 << (n)) - 1))
333 #define RISCV_IMM_BITS 12
334 #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
335 #define RISCV_CONST_HIGH_PART(x) \
336 	(((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
337 #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
338 
339 #define ENCODE_ITYPE_IMM(x) \
340 	(RV_X(x, 0, 12) << 20)
341 #define ENCODE_BTYPE_IMM(x) \
342 	((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
343 	(RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
344 #define ENCODE_UTYPE_IMM(x) \
345 	(RV_X(x, 12, 20) << 12)
346 #define ENCODE_JTYPE_IMM(x) \
347 	((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
348 	(RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
349 #define ENCODE_CBTYPE_IMM(x) \
350 	((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
351 	(RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
352 #define ENCODE_CJTYPE_IMM(x) \
353 	((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
354 	(RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
355 	(RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
356 #define ENCODE_UJTYPE_IMM(x) \
357 	(ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
358 	(ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
359 #define ENCODE_UITYPE_IMM(x) \
360 	(ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
361 
362 #define CLEAN_IMM(type, x) \
363 	((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
364 
arch_kexec_apply_relocations_add(struct purgatory_info * pi,Elf_Shdr * section,const Elf_Shdr * relsec,const Elf_Shdr * symtab)365 int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
366 				     Elf_Shdr *section,
367 				     const Elf_Shdr *relsec,
368 				     const Elf_Shdr *symtab)
369 {
370 	const char *strtab, *name, *shstrtab;
371 	const Elf_Shdr *sechdrs;
372 	Elf64_Rela *relas;
373 	int i, r_type;
374 
375 	/* String & section header string table */
376 	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
377 	strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
378 	shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
379 
380 	relas = (void *)pi->ehdr + relsec->sh_offset;
381 
382 	for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
383 		const Elf_Sym *sym;	/* symbol to relocate */
384 		unsigned long addr;	/* final location after relocation */
385 		unsigned long val;	/* relocated symbol value */
386 		unsigned long sec_base;	/* relocated symbol value */
387 		void *loc;		/* tmp location to modify */
388 
389 		sym = (void *)pi->ehdr + symtab->sh_offset;
390 		sym += ELF64_R_SYM(relas[i].r_info);
391 
392 		if (sym->st_name)
393 			name = strtab + sym->st_name;
394 		else
395 			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
396 
397 		loc = pi->purgatory_buf;
398 		loc += section->sh_offset;
399 		loc += relas[i].r_offset;
400 
401 		if (sym->st_shndx == SHN_ABS)
402 			sec_base = 0;
403 		else if (sym->st_shndx >= pi->ehdr->e_shnum) {
404 			pr_err("Invalid section %d for symbol %s\n",
405 			       sym->st_shndx, name);
406 			return -ENOEXEC;
407 		} else
408 			sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
409 
410 		val = sym->st_value;
411 		val += sec_base;
412 		val += relas[i].r_addend;
413 
414 		addr = section->sh_addr + relas[i].r_offset;
415 
416 		r_type = ELF64_R_TYPE(relas[i].r_info);
417 
418 		switch (r_type) {
419 		case R_RISCV_BRANCH:
420 			*(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
421 				 ENCODE_BTYPE_IMM(val - addr);
422 			break;
423 		case R_RISCV_JAL:
424 			*(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
425 				 ENCODE_JTYPE_IMM(val - addr);
426 			break;
427 		/*
428 		 * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
429 		 * sym is expected to be next to R_RISCV_PCREL_HI20
430 		 * in purgatory relsec. Handle it like R_RISCV_CALL
431 		 * sym, instead of searching the whole relsec.
432 		 */
433 		case R_RISCV_PCREL_HI20:
434 		case R_RISCV_CALL_PLT:
435 		case R_RISCV_CALL:
436 			*(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
437 				 ENCODE_UJTYPE_IMM(val - addr);
438 			break;
439 		case R_RISCV_RVC_BRANCH:
440 			*(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
441 				 ENCODE_CBTYPE_IMM(val - addr);
442 			break;
443 		case R_RISCV_RVC_JUMP:
444 			*(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
445 				 ENCODE_CJTYPE_IMM(val - addr);
446 			break;
447 		case R_RISCV_ADD16:
448 			*(u16 *)loc += val;
449 			break;
450 		case R_RISCV_SUB16:
451 			*(u16 *)loc -= val;
452 			break;
453 		case R_RISCV_ADD32:
454 			*(u32 *)loc += val;
455 			break;
456 		case R_RISCV_SUB32:
457 			*(u32 *)loc -= val;
458 			break;
459 		/* It has been applied by R_RISCV_PCREL_HI20 sym */
460 		case R_RISCV_PCREL_LO12_I:
461 		case R_RISCV_ALIGN:
462 		case R_RISCV_RELAX:
463 			break;
464 		default:
465 			pr_err("Unknown rela relocation: %d\n", r_type);
466 			return -ENOEXEC;
467 		}
468 	}
469 	return 0;
470 }
471 
472 const struct kexec_file_ops elf_kexec_ops = {
473 	.probe = kexec_elf_probe,
474 	.load  = elf_kexec_load,
475 };
476