xref: /openbmc/linux/fs/binfmt_elf.c (revision 6f6249a599e52e1a5f0b632f8edff733cfa76450)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <linux/rseq.h>
50 #include <asm/param.h>
51 #include <asm/page.h>
52 
53 #ifndef ELF_COMPAT
54 #define ELF_COMPAT 0
55 #endif
56 
57 #ifndef user_long_t
58 #define user_long_t long
59 #endif
60 #ifndef user_siginfo_t
61 #define user_siginfo_t siginfo_t
62 #endif
63 
64 /* That's for binfmt_elf_fdpic to deal with */
65 #ifndef elf_check_fdpic
66 #define elf_check_fdpic(ex) false
67 #endif
68 
69 static int load_elf_binary(struct linux_binprm *bprm);
70 
71 #ifdef CONFIG_USELIB
72 static int load_elf_library(struct file *);
73 #else
74 #define load_elf_library NULL
75 #endif
76 
77 /*
78  * If we don't support core dumping, then supply a NULL so we
79  * don't even try.
80  */
81 #ifdef CONFIG_ELF_CORE
82 static int elf_core_dump(struct coredump_params *cprm);
83 #else
84 #define elf_core_dump	NULL
85 #endif
86 
87 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
88 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
89 #else
90 #define ELF_MIN_ALIGN	PAGE_SIZE
91 #endif
92 
93 #ifndef ELF_CORE_EFLAGS
94 #define ELF_CORE_EFLAGS	0
95 #endif
96 
97 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
98 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
99 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
100 
101 static struct linux_binfmt elf_format = {
102 	.module		= THIS_MODULE,
103 	.load_binary	= load_elf_binary,
104 	.load_shlib	= load_elf_library,
105 #ifdef CONFIG_COREDUMP
106 	.core_dump	= elf_core_dump,
107 	.min_coredump	= ELF_EXEC_PAGESIZE,
108 #endif
109 };
110 
111 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
112 
113 /* We need to explicitly zero any fractional pages
114    after the data section (i.e. bss).  This would
115    contain the junk from the file that should not
116    be in memory
117  */
padzero(unsigned long elf_bss)118 static int padzero(unsigned long elf_bss)
119 {
120 	unsigned long nbyte;
121 
122 	nbyte = ELF_PAGEOFFSET(elf_bss);
123 	if (nbyte) {
124 		nbyte = ELF_MIN_ALIGN - nbyte;
125 		if (clear_user((void __user *) elf_bss, nbyte))
126 			return -EFAULT;
127 	}
128 	return 0;
129 }
130 
131 /* Let's use some macros to make this stack manipulation a little clearer */
132 #ifdef CONFIG_STACK_GROWSUP
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134 #define STACK_ROUND(sp, items) \
135 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ \
137 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
138 	old_sp; })
139 #else
140 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141 #define STACK_ROUND(sp, items) \
142 	(((unsigned long) (sp - items)) &~ 15UL)
143 #define STACK_ALLOC(sp, len) (sp -= len)
144 #endif
145 
146 #ifndef ELF_BASE_PLATFORM
147 /*
148  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150  * will be copied to the user stack in the same manner as AT_PLATFORM.
151  */
152 #define ELF_BASE_PLATFORM NULL
153 #endif
154 
155 static int
create_elf_tables(struct linux_binprm * bprm,const struct elfhdr * exec,unsigned long interp_load_addr,unsigned long e_entry,unsigned long phdr_addr)156 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
157 		unsigned long interp_load_addr,
158 		unsigned long e_entry, unsigned long phdr_addr)
159 {
160 	struct mm_struct *mm = current->mm;
161 	unsigned long p = bprm->p;
162 	int argc = bprm->argc;
163 	int envc = bprm->envc;
164 	elf_addr_t __user *sp;
165 	elf_addr_t __user *u_platform;
166 	elf_addr_t __user *u_base_platform;
167 	elf_addr_t __user *u_rand_bytes;
168 	const char *k_platform = ELF_PLATFORM;
169 	const char *k_base_platform = ELF_BASE_PLATFORM;
170 	unsigned char k_rand_bytes[16];
171 	int items;
172 	elf_addr_t *elf_info;
173 	elf_addr_t flags = 0;
174 	int ei_index;
175 	const struct cred *cred = current_cred();
176 	struct vm_area_struct *vma;
177 
178 	/*
179 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
180 	 * evictions by the processes running on the same package. One
181 	 * thing we can do is to shuffle the initial stack for them.
182 	 */
183 
184 	p = arch_align_stack(p);
185 
186 	/*
187 	 * If this architecture has a platform capability string, copy it
188 	 * to userspace.  In some cases (Sparc), this info is impossible
189 	 * for userspace to get any other way, in others (i386) it is
190 	 * merely difficult.
191 	 */
192 	u_platform = NULL;
193 	if (k_platform) {
194 		size_t len = strlen(k_platform) + 1;
195 
196 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 		if (copy_to_user(u_platform, k_platform, len))
198 			return -EFAULT;
199 	}
200 
201 	/*
202 	 * If this architecture has a "base" platform capability
203 	 * string, copy it to userspace.
204 	 */
205 	u_base_platform = NULL;
206 	if (k_base_platform) {
207 		size_t len = strlen(k_base_platform) + 1;
208 
209 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
210 		if (copy_to_user(u_base_platform, k_base_platform, len))
211 			return -EFAULT;
212 	}
213 
214 	/*
215 	 * Generate 16 random bytes for userspace PRNG seeding.
216 	 */
217 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
218 	u_rand_bytes = (elf_addr_t __user *)
219 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
220 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
221 		return -EFAULT;
222 
223 	/* Create the ELF interpreter info */
224 	elf_info = (elf_addr_t *)mm->saved_auxv;
225 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
226 #define NEW_AUX_ENT(id, val) \
227 	do { \
228 		*elf_info++ = id; \
229 		*elf_info++ = val; \
230 	} while (0)
231 
232 #ifdef ARCH_DLINFO
233 	/*
234 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
235 	 * AUXV.
236 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
237 	 * ARCH_DLINFO changes
238 	 */
239 	ARCH_DLINFO;
240 #endif
241 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
242 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
243 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
244 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
245 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
246 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
247 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
248 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
249 		flags |= AT_FLAGS_PRESERVE_ARGV0;
250 	NEW_AUX_ENT(AT_FLAGS, flags);
251 	NEW_AUX_ENT(AT_ENTRY, e_entry);
252 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
253 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
254 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
255 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
256 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
257 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
258 #ifdef ELF_HWCAP2
259 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
260 #endif
261 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
262 	if (k_platform) {
263 		NEW_AUX_ENT(AT_PLATFORM,
264 			    (elf_addr_t)(unsigned long)u_platform);
265 	}
266 	if (k_base_platform) {
267 		NEW_AUX_ENT(AT_BASE_PLATFORM,
268 			    (elf_addr_t)(unsigned long)u_base_platform);
269 	}
270 	if (bprm->have_execfd) {
271 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
272 	}
273 #ifdef CONFIG_RSEQ
274 	NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
275 	NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
276 #endif
277 #undef NEW_AUX_ENT
278 	/* AT_NULL is zero; clear the rest too */
279 	memset(elf_info, 0, (char *)mm->saved_auxv +
280 			sizeof(mm->saved_auxv) - (char *)elf_info);
281 
282 	/* And advance past the AT_NULL entry.  */
283 	elf_info += 2;
284 
285 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
286 	sp = STACK_ADD(p, ei_index);
287 
288 	items = (argc + 1) + (envc + 1) + 1;
289 	bprm->p = STACK_ROUND(sp, items);
290 
291 	/* Point sp at the lowest address on the stack */
292 #ifdef CONFIG_STACK_GROWSUP
293 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
294 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
295 #else
296 	sp = (elf_addr_t __user *)bprm->p;
297 #endif
298 
299 
300 	/*
301 	 * Grow the stack manually; some architectures have a limit on how
302 	 * far ahead a user-space access may be in order to grow the stack.
303 	 */
304 	if (mmap_write_lock_killable(mm))
305 		return -EINTR;
306 	vma = find_extend_vma_locked(mm, bprm->p);
307 	mmap_write_unlock(mm);
308 	if (!vma)
309 		return -EFAULT;
310 
311 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
312 	if (put_user(argc, sp++))
313 		return -EFAULT;
314 
315 	/* Populate list of argv pointers back to argv strings. */
316 	p = mm->arg_end = mm->arg_start;
317 	while (argc-- > 0) {
318 		size_t len;
319 		if (put_user((elf_addr_t)p, sp++))
320 			return -EFAULT;
321 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
322 		if (!len || len > MAX_ARG_STRLEN)
323 			return -EINVAL;
324 		p += len;
325 	}
326 	if (put_user(0, sp++))
327 		return -EFAULT;
328 	mm->arg_end = p;
329 
330 	/* Populate list of envp pointers back to envp strings. */
331 	mm->env_end = mm->env_start = p;
332 	while (envc-- > 0) {
333 		size_t len;
334 		if (put_user((elf_addr_t)p, sp++))
335 			return -EFAULT;
336 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
337 		if (!len || len > MAX_ARG_STRLEN)
338 			return -EINVAL;
339 		p += len;
340 	}
341 	if (put_user(0, sp++))
342 		return -EFAULT;
343 	mm->env_end = p;
344 
345 	/* Put the elf_info on the stack in the right place.  */
346 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
347 		return -EFAULT;
348 	return 0;
349 }
350 
elf_map(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)351 static unsigned long elf_map(struct file *filep, unsigned long addr,
352 		const struct elf_phdr *eppnt, int prot, int type,
353 		unsigned long total_size)
354 {
355 	unsigned long map_addr;
356 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
357 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
358 	addr = ELF_PAGESTART(addr);
359 	size = ELF_PAGEALIGN(size);
360 
361 	/* mmap() will return -EINVAL if given a zero size, but a
362 	 * segment with zero filesize is perfectly valid */
363 	if (!size)
364 		return addr;
365 
366 	/*
367 	* total_size is the size of the ELF (interpreter) image.
368 	* The _first_ mmap needs to know the full size, otherwise
369 	* randomization might put this image into an overlapping
370 	* position with the ELF binary image. (since size < total_size)
371 	* So we first map the 'big' image - and unmap the remainder at
372 	* the end. (which unmap is needed for ELF images with holes.)
373 	*/
374 	if (total_size) {
375 		total_size = ELF_PAGEALIGN(total_size);
376 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
377 		if (!BAD_ADDR(map_addr))
378 			vm_munmap(map_addr+size, total_size-size);
379 	} else
380 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
381 
382 	if ((type & MAP_FIXED_NOREPLACE) &&
383 	    PTR_ERR((void *)map_addr) == -EEXIST)
384 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
385 			task_pid_nr(current), current->comm, (void *)addr);
386 
387 	return(map_addr);
388 }
389 
elf_load(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)390 static unsigned long elf_load(struct file *filep, unsigned long addr,
391 		const struct elf_phdr *eppnt, int prot, int type,
392 		unsigned long total_size)
393 {
394 	unsigned long zero_start, zero_end;
395 	unsigned long map_addr;
396 
397 	if (eppnt->p_filesz) {
398 		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
399 		if (BAD_ADDR(map_addr))
400 			return map_addr;
401 		if (eppnt->p_memsz > eppnt->p_filesz) {
402 			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
403 				eppnt->p_filesz;
404 			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
405 				eppnt->p_memsz;
406 
407 			/* Zero the end of the last mapped page */
408 			padzero(zero_start);
409 		}
410 	} else {
411 		map_addr = zero_start = ELF_PAGESTART(addr);
412 		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
413 			eppnt->p_memsz;
414 	}
415 	if (eppnt->p_memsz > eppnt->p_filesz) {
416 		/*
417 		 * Map the last of the segment.
418 		 * If the header is requesting these pages to be
419 		 * executable, honour that (ppc32 needs this).
420 		 */
421 		int error;
422 
423 		zero_start = ELF_PAGEALIGN(zero_start);
424 		zero_end = ELF_PAGEALIGN(zero_end);
425 
426 		error = vm_brk_flags(zero_start, zero_end - zero_start,
427 				     prot & PROT_EXEC ? VM_EXEC : 0);
428 		if (error)
429 			map_addr = error;
430 	}
431 	return map_addr;
432 }
433 
434 
total_mapping_size(const struct elf_phdr * phdr,int nr)435 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
436 {
437 	elf_addr_t min_addr = -1;
438 	elf_addr_t max_addr = 0;
439 	bool pt_load = false;
440 	int i;
441 
442 	for (i = 0; i < nr; i++) {
443 		if (phdr[i].p_type == PT_LOAD) {
444 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
445 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
446 			pt_load = true;
447 		}
448 	}
449 	return pt_load ? (max_addr - min_addr) : 0;
450 }
451 
elf_read(struct file * file,void * buf,size_t len,loff_t pos)452 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
453 {
454 	ssize_t rv;
455 
456 	rv = kernel_read(file, buf, len, &pos);
457 	if (unlikely(rv != len)) {
458 		return (rv < 0) ? rv : -EIO;
459 	}
460 	return 0;
461 }
462 
maximum_alignment(struct elf_phdr * cmds,int nr)463 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
464 {
465 	unsigned long alignment = 0;
466 	int i;
467 
468 	for (i = 0; i < nr; i++) {
469 		if (cmds[i].p_type == PT_LOAD) {
470 			unsigned long p_align = cmds[i].p_align;
471 
472 			/* skip non-power of two alignments as invalid */
473 			if (!is_power_of_2(p_align))
474 				continue;
475 			alignment = max(alignment, p_align);
476 		}
477 	}
478 
479 	/* ensure we align to at least one page */
480 	return ELF_PAGEALIGN(alignment);
481 }
482 
483 /**
484  * load_elf_phdrs() - load ELF program headers
485  * @elf_ex:   ELF header of the binary whose program headers should be loaded
486  * @elf_file: the opened ELF binary file
487  *
488  * Loads ELF program headers from the binary file elf_file, which has the ELF
489  * header pointed to by elf_ex, into a newly allocated array. The caller is
490  * responsible for freeing the allocated data. Returns NULL upon failure.
491  */
load_elf_phdrs(const struct elfhdr * elf_ex,struct file * elf_file)492 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
493 				       struct file *elf_file)
494 {
495 	struct elf_phdr *elf_phdata = NULL;
496 	int retval = -1;
497 	unsigned int size;
498 
499 	/*
500 	 * If the size of this structure has changed, then punt, since
501 	 * we will be doing the wrong thing.
502 	 */
503 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
504 		goto out;
505 
506 	/* Sanity check the number of program headers... */
507 	/* ...and their total size. */
508 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
509 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
510 		goto out;
511 
512 	elf_phdata = kmalloc(size, GFP_KERNEL);
513 	if (!elf_phdata)
514 		goto out;
515 
516 	/* Read in the program headers */
517 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
518 
519 out:
520 	if (retval) {
521 		kfree(elf_phdata);
522 		elf_phdata = NULL;
523 	}
524 	return elf_phdata;
525 }
526 
527 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
528 
529 /**
530  * struct arch_elf_state - arch-specific ELF loading state
531  *
532  * This structure is used to preserve architecture specific data during
533  * the loading of an ELF file, throughout the checking of architecture
534  * specific ELF headers & through to the point where the ELF load is
535  * known to be proceeding (ie. SET_PERSONALITY).
536  *
537  * This implementation is a dummy for architectures which require no
538  * specific state.
539  */
540 struct arch_elf_state {
541 };
542 
543 #define INIT_ARCH_ELF_STATE {}
544 
545 /**
546  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
547  * @ehdr:	The main ELF header
548  * @phdr:	The program header to check
549  * @elf:	The open ELF file
550  * @is_interp:	True if the phdr is from the interpreter of the ELF being
551  *		loaded, else false.
552  * @state:	Architecture-specific state preserved throughout the process
553  *		of loading the ELF.
554  *
555  * Inspects the program header phdr to validate its correctness and/or
556  * suitability for the system. Called once per ELF program header in the
557  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
558  * interpreter.
559  *
560  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
561  *         with that return code.
562  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)563 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
564 				   struct elf_phdr *phdr,
565 				   struct file *elf, bool is_interp,
566 				   struct arch_elf_state *state)
567 {
568 	/* Dummy implementation, always proceed */
569 	return 0;
570 }
571 
572 /**
573  * arch_check_elf() - check an ELF executable
574  * @ehdr:	The main ELF header
575  * @has_interp:	True if the ELF has an interpreter, else false.
576  * @interp_ehdr: The interpreter's ELF header
577  * @state:	Architecture-specific state preserved throughout the process
578  *		of loading the ELF.
579  *
580  * Provides a final opportunity for architecture code to reject the loading
581  * of the ELF & cause an exec syscall to return an error. This is called after
582  * all program headers to be checked by arch_elf_pt_proc have been.
583  *
584  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
585  *         with that return code.
586  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)587 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
588 				 struct elfhdr *interp_ehdr,
589 				 struct arch_elf_state *state)
590 {
591 	/* Dummy implementation, always proceed */
592 	return 0;
593 }
594 
595 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
596 
make_prot(u32 p_flags,struct arch_elf_state * arch_state,bool has_interp,bool is_interp)597 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
598 			    bool has_interp, bool is_interp)
599 {
600 	int prot = 0;
601 
602 	if (p_flags & PF_R)
603 		prot |= PROT_READ;
604 	if (p_flags & PF_W)
605 		prot |= PROT_WRITE;
606 	if (p_flags & PF_X)
607 		prot |= PROT_EXEC;
608 
609 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
610 }
611 
612 /* This is much more generalized than the library routine read function,
613    so we keep this separate.  Technically the library read function
614    is only provided so that we can read a.out libraries that have
615    an ELF header */
616 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long no_base,struct elf_phdr * interp_elf_phdata,struct arch_elf_state * arch_state)617 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
618 		struct file *interpreter,
619 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
620 		struct arch_elf_state *arch_state)
621 {
622 	struct elf_phdr *eppnt;
623 	unsigned long load_addr = 0;
624 	int load_addr_set = 0;
625 	unsigned long last_bss = 0, elf_bss = 0;
626 	int bss_prot = 0;
627 	unsigned long error = ~0UL;
628 	unsigned long total_size;
629 	int i;
630 
631 	/* First of all, some simple consistency checks */
632 	if (interp_elf_ex->e_type != ET_EXEC &&
633 	    interp_elf_ex->e_type != ET_DYN)
634 		goto out;
635 	if (!elf_check_arch(interp_elf_ex) ||
636 	    elf_check_fdpic(interp_elf_ex))
637 		goto out;
638 	if (!interpreter->f_op->mmap)
639 		goto out;
640 
641 	total_size = total_mapping_size(interp_elf_phdata,
642 					interp_elf_ex->e_phnum);
643 	if (!total_size) {
644 		error = -EINVAL;
645 		goto out;
646 	}
647 
648 	eppnt = interp_elf_phdata;
649 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
650 		if (eppnt->p_type == PT_LOAD) {
651 			int elf_type = MAP_PRIVATE;
652 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
653 						 true, true);
654 			unsigned long vaddr = 0;
655 			unsigned long k, map_addr;
656 
657 			vaddr = eppnt->p_vaddr;
658 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
659 				elf_type |= MAP_FIXED;
660 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
661 				load_addr = -vaddr;
662 
663 			map_addr = elf_map(interpreter, load_addr + vaddr,
664 					eppnt, elf_prot, elf_type, total_size);
665 			total_size = 0;
666 			error = map_addr;
667 			if (BAD_ADDR(map_addr))
668 				goto out;
669 
670 			if (!load_addr_set &&
671 			    interp_elf_ex->e_type == ET_DYN) {
672 				load_addr = map_addr - ELF_PAGESTART(vaddr);
673 				load_addr_set = 1;
674 			}
675 
676 			/*
677 			 * Check to see if the section's size will overflow the
678 			 * allowed task size. Note that p_filesz must always be
679 			 * <= p_memsize so it's only necessary to check p_memsz.
680 			 */
681 			k = load_addr + eppnt->p_vaddr;
682 			if (BAD_ADDR(k) ||
683 			    eppnt->p_filesz > eppnt->p_memsz ||
684 			    eppnt->p_memsz > TASK_SIZE ||
685 			    TASK_SIZE - eppnt->p_memsz < k) {
686 				error = -ENOMEM;
687 				goto out;
688 			}
689 
690 			/*
691 			 * Find the end of the file mapping for this phdr, and
692 			 * keep track of the largest address we see for this.
693 			 */
694 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
695 			if (k > elf_bss)
696 				elf_bss = k;
697 
698 			/*
699 			 * Do the same thing for the memory mapping - between
700 			 * elf_bss and last_bss is the bss section.
701 			 */
702 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
703 			if (k > last_bss) {
704 				last_bss = k;
705 				bss_prot = elf_prot;
706 			}
707 		}
708 	}
709 
710 	/*
711 	 * Now fill out the bss section: first pad the last page from
712 	 * the file up to the page boundary, and zero it from elf_bss
713 	 * up to the end of the page.
714 	 */
715 	if (padzero(elf_bss)) {
716 		error = -EFAULT;
717 		goto out;
718 	}
719 	/*
720 	 * Next, align both the file and mem bss up to the page size,
721 	 * since this is where elf_bss was just zeroed up to, and where
722 	 * last_bss will end after the vm_brk_flags() below.
723 	 */
724 	elf_bss = ELF_PAGEALIGN(elf_bss);
725 	last_bss = ELF_PAGEALIGN(last_bss);
726 	/* Finally, if there is still more bss to allocate, do it. */
727 	if (last_bss > elf_bss) {
728 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
729 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
730 		if (error)
731 			goto out;
732 	}
733 
734 	error = load_addr;
735 out:
736 	return error;
737 }
738 
739 /*
740  * These are the functions used to load ELF style executables and shared
741  * libraries.  There is no binary dependent code anywhere else.
742  */
743 
parse_elf_property(const char * data,size_t * off,size_t datasz,struct arch_elf_state * arch,bool have_prev_type,u32 * prev_type)744 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
745 			      struct arch_elf_state *arch,
746 			      bool have_prev_type, u32 *prev_type)
747 {
748 	size_t o, step;
749 	const struct gnu_property *pr;
750 	int ret;
751 
752 	if (*off == datasz)
753 		return -ENOENT;
754 
755 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
756 		return -EIO;
757 	o = *off;
758 	datasz -= *off;
759 
760 	if (datasz < sizeof(*pr))
761 		return -ENOEXEC;
762 	pr = (const struct gnu_property *)(data + o);
763 	o += sizeof(*pr);
764 	datasz -= sizeof(*pr);
765 
766 	if (pr->pr_datasz > datasz)
767 		return -ENOEXEC;
768 
769 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
770 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
771 	if (step > datasz)
772 		return -ENOEXEC;
773 
774 	/* Properties are supposed to be unique and sorted on pr_type: */
775 	if (have_prev_type && pr->pr_type <= *prev_type)
776 		return -ENOEXEC;
777 	*prev_type = pr->pr_type;
778 
779 	ret = arch_parse_elf_property(pr->pr_type, data + o,
780 				      pr->pr_datasz, ELF_COMPAT, arch);
781 	if (ret)
782 		return ret;
783 
784 	*off = o + step;
785 	return 0;
786 }
787 
788 #define NOTE_DATA_SZ SZ_1K
789 #define GNU_PROPERTY_TYPE_0_NAME "GNU"
790 #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
791 
parse_elf_properties(struct file * f,const struct elf_phdr * phdr,struct arch_elf_state * arch)792 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
793 				struct arch_elf_state *arch)
794 {
795 	union {
796 		struct elf_note nhdr;
797 		char data[NOTE_DATA_SZ];
798 	} note;
799 	loff_t pos;
800 	ssize_t n;
801 	size_t off, datasz;
802 	int ret;
803 	bool have_prev_type;
804 	u32 prev_type;
805 
806 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
807 		return 0;
808 
809 	/* load_elf_binary() shouldn't call us unless this is true... */
810 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
811 		return -ENOEXEC;
812 
813 	/* If the properties are crazy large, that's too bad (for now): */
814 	if (phdr->p_filesz > sizeof(note))
815 		return -ENOEXEC;
816 
817 	pos = phdr->p_offset;
818 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
819 
820 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
821 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
822 		return -EIO;
823 
824 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
825 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
826 	    strncmp(note.data + sizeof(note.nhdr),
827 		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
828 		return -ENOEXEC;
829 
830 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
831 		       ELF_GNU_PROPERTY_ALIGN);
832 	if (off > n)
833 		return -ENOEXEC;
834 
835 	if (note.nhdr.n_descsz > n - off)
836 		return -ENOEXEC;
837 	datasz = off + note.nhdr.n_descsz;
838 
839 	have_prev_type = false;
840 	do {
841 		ret = parse_elf_property(note.data, &off, datasz, arch,
842 					 have_prev_type, &prev_type);
843 		have_prev_type = true;
844 	} while (!ret);
845 
846 	return ret == -ENOENT ? 0 : ret;
847 }
848 
load_elf_binary(struct linux_binprm * bprm)849 static int load_elf_binary(struct linux_binprm *bprm)
850 {
851 	struct file *interpreter = NULL; /* to shut gcc up */
852 	unsigned long load_bias = 0, phdr_addr = 0;
853 	int first_pt_load = 1;
854 	unsigned long error;
855 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
856 	struct elf_phdr *elf_property_phdata = NULL;
857 	unsigned long elf_brk;
858 	bool brk_moved = false;
859 	int retval, i;
860 	unsigned long elf_entry;
861 	unsigned long e_entry;
862 	unsigned long interp_load_addr = 0;
863 	unsigned long start_code, end_code, start_data, end_data;
864 	unsigned long reloc_func_desc __maybe_unused = 0;
865 	int executable_stack = EXSTACK_DEFAULT;
866 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
867 	struct elfhdr *interp_elf_ex = NULL;
868 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
869 	struct mm_struct *mm;
870 	struct pt_regs *regs;
871 
872 	retval = -ENOEXEC;
873 	/* First of all, some simple consistency checks */
874 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
875 		goto out;
876 
877 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
878 		goto out;
879 	if (!elf_check_arch(elf_ex))
880 		goto out;
881 	if (elf_check_fdpic(elf_ex))
882 		goto out;
883 	if (!bprm->file->f_op->mmap)
884 		goto out;
885 
886 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
887 	if (!elf_phdata)
888 		goto out;
889 
890 	elf_ppnt = elf_phdata;
891 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
892 		char *elf_interpreter;
893 
894 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
895 			elf_property_phdata = elf_ppnt;
896 			continue;
897 		}
898 
899 		if (elf_ppnt->p_type != PT_INTERP)
900 			continue;
901 
902 		/*
903 		 * This is the program interpreter used for shared libraries -
904 		 * for now assume that this is an a.out format binary.
905 		 */
906 		retval = -ENOEXEC;
907 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
908 			goto out_free_ph;
909 
910 		retval = -ENOMEM;
911 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
912 		if (!elf_interpreter)
913 			goto out_free_ph;
914 
915 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
916 				  elf_ppnt->p_offset);
917 		if (retval < 0)
918 			goto out_free_interp;
919 		/* make sure path is NULL terminated */
920 		retval = -ENOEXEC;
921 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
922 			goto out_free_interp;
923 
924 		interpreter = open_exec(elf_interpreter);
925 		kfree(elf_interpreter);
926 		retval = PTR_ERR(interpreter);
927 		if (IS_ERR(interpreter))
928 			goto out_free_ph;
929 
930 		/*
931 		 * If the binary is not readable then enforce mm->dumpable = 0
932 		 * regardless of the interpreter's permissions.
933 		 */
934 		would_dump(bprm, interpreter);
935 
936 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
937 		if (!interp_elf_ex) {
938 			retval = -ENOMEM;
939 			goto out_free_file;
940 		}
941 
942 		/* Get the exec headers */
943 		retval = elf_read(interpreter, interp_elf_ex,
944 				  sizeof(*interp_elf_ex), 0);
945 		if (retval < 0)
946 			goto out_free_dentry;
947 
948 		break;
949 
950 out_free_interp:
951 		kfree(elf_interpreter);
952 		goto out_free_ph;
953 	}
954 
955 	elf_ppnt = elf_phdata;
956 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
957 		switch (elf_ppnt->p_type) {
958 		case PT_GNU_STACK:
959 			if (elf_ppnt->p_flags & PF_X)
960 				executable_stack = EXSTACK_ENABLE_X;
961 			else
962 				executable_stack = EXSTACK_DISABLE_X;
963 			break;
964 
965 		case PT_LOPROC ... PT_HIPROC:
966 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
967 						  bprm->file, false,
968 						  &arch_state);
969 			if (retval)
970 				goto out_free_dentry;
971 			break;
972 		}
973 
974 	/* Some simple consistency checks for the interpreter */
975 	if (interpreter) {
976 		retval = -ELIBBAD;
977 		/* Not an ELF interpreter */
978 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
979 			goto out_free_dentry;
980 		/* Verify the interpreter has a valid arch */
981 		if (!elf_check_arch(interp_elf_ex) ||
982 		    elf_check_fdpic(interp_elf_ex))
983 			goto out_free_dentry;
984 
985 		/* Load the interpreter program headers */
986 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
987 						   interpreter);
988 		if (!interp_elf_phdata)
989 			goto out_free_dentry;
990 
991 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
992 		elf_property_phdata = NULL;
993 		elf_ppnt = interp_elf_phdata;
994 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
995 			switch (elf_ppnt->p_type) {
996 			case PT_GNU_PROPERTY:
997 				elf_property_phdata = elf_ppnt;
998 				break;
999 
1000 			case PT_LOPROC ... PT_HIPROC:
1001 				retval = arch_elf_pt_proc(interp_elf_ex,
1002 							  elf_ppnt, interpreter,
1003 							  true, &arch_state);
1004 				if (retval)
1005 					goto out_free_dentry;
1006 				break;
1007 			}
1008 	}
1009 
1010 	retval = parse_elf_properties(interpreter ?: bprm->file,
1011 				      elf_property_phdata, &arch_state);
1012 	if (retval)
1013 		goto out_free_dentry;
1014 
1015 	/*
1016 	 * Allow arch code to reject the ELF at this point, whilst it's
1017 	 * still possible to return an error to the code that invoked
1018 	 * the exec syscall.
1019 	 */
1020 	retval = arch_check_elf(elf_ex,
1021 				!!interpreter, interp_elf_ex,
1022 				&arch_state);
1023 	if (retval)
1024 		goto out_free_dentry;
1025 
1026 	/* Flush all traces of the currently running executable */
1027 	retval = begin_new_exec(bprm);
1028 	if (retval)
1029 		goto out_free_dentry;
1030 
1031 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1032 	   may depend on the personality.  */
1033 	SET_PERSONALITY2(*elf_ex, &arch_state);
1034 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1035 		current->personality |= READ_IMPLIES_EXEC;
1036 
1037 	const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1038 	if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1039 		current->flags |= PF_RANDOMIZE;
1040 
1041 	setup_new_exec(bprm);
1042 
1043 	/* Do this so that we can load the interpreter, if need be.  We will
1044 	   change some of these later */
1045 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1046 				 executable_stack);
1047 	if (retval < 0)
1048 		goto out_free_dentry;
1049 
1050 	elf_brk = 0;
1051 
1052 	start_code = ~0UL;
1053 	end_code = 0;
1054 	start_data = 0;
1055 	end_data = 0;
1056 
1057 	/* Now we do a little grungy work by mmapping the ELF image into
1058 	   the correct location in memory. */
1059 	for(i = 0, elf_ppnt = elf_phdata;
1060 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1061 		int elf_prot, elf_flags;
1062 		unsigned long k, vaddr;
1063 		unsigned long total_size = 0;
1064 		unsigned long alignment;
1065 
1066 		if (elf_ppnt->p_type != PT_LOAD)
1067 			continue;
1068 
1069 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1070 				     !!interpreter, false);
1071 
1072 		elf_flags = MAP_PRIVATE;
1073 
1074 		vaddr = elf_ppnt->p_vaddr;
1075 		/*
1076 		 * The first time through the loop, first_pt_load is true:
1077 		 * layout will be calculated. Once set, use MAP_FIXED since
1078 		 * we know we've already safely mapped the entire region with
1079 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1080 		 */
1081 		if (!first_pt_load) {
1082 			elf_flags |= MAP_FIXED;
1083 		} else if (elf_ex->e_type == ET_EXEC) {
1084 			/*
1085 			 * This logic is run once for the first LOAD Program
1086 			 * Header for ET_EXEC binaries. No special handling
1087 			 * is needed.
1088 			 */
1089 			elf_flags |= MAP_FIXED_NOREPLACE;
1090 		} else if (elf_ex->e_type == ET_DYN) {
1091 			/*
1092 			 * This logic is run once for the first LOAD Program
1093 			 * Header for ET_DYN binaries to calculate the
1094 			 * randomization (load_bias) for all the LOAD
1095 			 * Program Headers.
1096 			 */
1097 
1098 			/*
1099 			 * Calculate the entire size of the ELF mapping
1100 			 * (total_size), used for the initial mapping,
1101 			 * due to load_addr_set which is set to true later
1102 			 * once the initial mapping is performed.
1103 			 *
1104 			 * Note that this is only sensible when the LOAD
1105 			 * segments are contiguous (or overlapping). If
1106 			 * used for LOADs that are far apart, this would
1107 			 * cause the holes between LOADs to be mapped,
1108 			 * running the risk of having the mapping fail,
1109 			 * as it would be larger than the ELF file itself.
1110 			 *
1111 			 * As a result, only ET_DYN does this, since
1112 			 * some ET_EXEC (e.g. ia64) may have large virtual
1113 			 * memory holes between LOADs.
1114 			 *
1115 			 */
1116 			total_size = total_mapping_size(elf_phdata,
1117 							elf_ex->e_phnum);
1118 			if (!total_size) {
1119 				retval = -EINVAL;
1120 				goto out_free_dentry;
1121 			}
1122 
1123 			/* Calculate any requested alignment. */
1124 			alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1125 
1126 			/**
1127 			 * DOC: PIE handling
1128 			 *
1129 			 * There are effectively two types of ET_DYN ELF
1130 			 * binaries: programs (i.e. PIE: ET_DYN with
1131 			 * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
1132 			 * without PT_INTERP, usually the ELF interpreter
1133 			 * itself). Loaders must be loaded away from programs
1134 			 * since the program may otherwise collide with the
1135 			 * loader (especially for ET_EXEC which does not have
1136 			 * a randomized position).
1137 			 *
1138 			 * For example, to handle invocations of
1139 			 * "./ld.so someprog" to test out a new version of
1140 			 * the loader, the subsequent program that the
1141 			 * loader loads must avoid the loader itself, so
1142 			 * they cannot share the same load range. Sufficient
1143 			 * room for the brk must be allocated with the
1144 			 * loader as well, since brk must be available with
1145 			 * the loader.
1146 			 *
1147 			 * Therefore, programs are loaded offset from
1148 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1149 			 * independently randomized mmap region (0 load_bias
1150 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1151 			 *
1152 			 * See below for "brk" handling details, which is
1153 			 * also affected by program vs loader and ASLR.
1154 			 */
1155 			if (interpreter) {
1156 				/* On ET_DYN with PT_INTERP, we do the ASLR. */
1157 				load_bias = ELF_ET_DYN_BASE;
1158 				if (current->flags & PF_RANDOMIZE)
1159 					load_bias += arch_mmap_rnd();
1160 				/* Adjust alignment as requested. */
1161 				if (alignment)
1162 					load_bias &= ~(alignment - 1);
1163 				elf_flags |= MAP_FIXED_NOREPLACE;
1164 			} else {
1165 				/*
1166 				 * For ET_DYN without PT_INTERP, we rely on
1167 				 * the architectures's (potentially ASLR) mmap
1168 				 * base address (via a load_bias of 0).
1169 				 *
1170 				 * When a large alignment is requested, we
1171 				 * must do the allocation at address "0" right
1172 				 * now to discover where things will load so
1173 				 * that we can adjust the resulting alignment.
1174 				 * In this case (load_bias != 0), we can use
1175 				 * MAP_FIXED_NOREPLACE to make sure the mapping
1176 				 * doesn't collide with anything.
1177 				 */
1178 				if (alignment > ELF_MIN_ALIGN) {
1179 					load_bias = elf_load(bprm->file, 0, elf_ppnt,
1180 							     elf_prot, elf_flags, total_size);
1181 					if (BAD_ADDR(load_bias)) {
1182 						retval = IS_ERR_VALUE(load_bias) ?
1183 							 PTR_ERR((void*)load_bias) : -EINVAL;
1184 						goto out_free_dentry;
1185 					}
1186 					vm_munmap(load_bias, total_size);
1187 					/* Adjust alignment as requested. */
1188 					if (alignment)
1189 						load_bias &= ~(alignment - 1);
1190 					elf_flags |= MAP_FIXED_NOREPLACE;
1191 				} else
1192 					load_bias = 0;
1193 			}
1194 
1195 			/*
1196 			 * Since load_bias is used for all subsequent loading
1197 			 * calculations, we must lower it by the first vaddr
1198 			 * so that the remaining calculations based on the
1199 			 * ELF vaddrs will be correctly offset. The result
1200 			 * is then page aligned.
1201 			 */
1202 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1203 		}
1204 
1205 		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1206 				elf_prot, elf_flags, total_size);
1207 		if (BAD_ADDR(error)) {
1208 			retval = IS_ERR_VALUE(error) ?
1209 				PTR_ERR((void*)error) : -EINVAL;
1210 			goto out_free_dentry;
1211 		}
1212 
1213 		if (first_pt_load) {
1214 			first_pt_load = 0;
1215 			if (elf_ex->e_type == ET_DYN) {
1216 				load_bias += error -
1217 				             ELF_PAGESTART(load_bias + vaddr);
1218 				reloc_func_desc = load_bias;
1219 			}
1220 		}
1221 
1222 		/*
1223 		 * Figure out which segment in the file contains the Program
1224 		 * Header table, and map to the associated memory address.
1225 		 */
1226 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1227 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1228 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1229 				    elf_ppnt->p_vaddr;
1230 		}
1231 
1232 		k = elf_ppnt->p_vaddr;
1233 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1234 			start_code = k;
1235 		if (start_data < k)
1236 			start_data = k;
1237 
1238 		/*
1239 		 * Check to see if the section's size will overflow the
1240 		 * allowed task size. Note that p_filesz must always be
1241 		 * <= p_memsz so it is only necessary to check p_memsz.
1242 		 */
1243 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1244 		    elf_ppnt->p_memsz > TASK_SIZE ||
1245 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1246 			/* set_brk can never work. Avoid overflows. */
1247 			retval = -EINVAL;
1248 			goto out_free_dentry;
1249 		}
1250 
1251 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1252 
1253 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1254 			end_code = k;
1255 		if (end_data < k)
1256 			end_data = k;
1257 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1258 		if (k > elf_brk)
1259 			elf_brk = k;
1260 	}
1261 
1262 	e_entry = elf_ex->e_entry + load_bias;
1263 	phdr_addr += load_bias;
1264 	elf_brk += load_bias;
1265 	start_code += load_bias;
1266 	end_code += load_bias;
1267 	start_data += load_bias;
1268 	end_data += load_bias;
1269 
1270 	if (interpreter) {
1271 		elf_entry = load_elf_interp(interp_elf_ex,
1272 					    interpreter,
1273 					    load_bias, interp_elf_phdata,
1274 					    &arch_state);
1275 		if (!IS_ERR_VALUE(elf_entry)) {
1276 			/*
1277 			 * load_elf_interp() returns relocation
1278 			 * adjustment
1279 			 */
1280 			interp_load_addr = elf_entry;
1281 			elf_entry += interp_elf_ex->e_entry;
1282 		}
1283 		if (BAD_ADDR(elf_entry)) {
1284 			retval = IS_ERR_VALUE(elf_entry) ?
1285 					(int)elf_entry : -EINVAL;
1286 			goto out_free_dentry;
1287 		}
1288 		reloc_func_desc = interp_load_addr;
1289 
1290 		allow_write_access(interpreter);
1291 		fput(interpreter);
1292 
1293 		kfree(interp_elf_ex);
1294 		kfree(interp_elf_phdata);
1295 	} else {
1296 		elf_entry = e_entry;
1297 		if (BAD_ADDR(elf_entry)) {
1298 			retval = -EINVAL;
1299 			goto out_free_dentry;
1300 		}
1301 	}
1302 
1303 	kfree(elf_phdata);
1304 
1305 	set_binfmt(&elf_format);
1306 
1307 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1308 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1309 	if (retval < 0)
1310 		goto out;
1311 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1312 
1313 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1314 				   e_entry, phdr_addr);
1315 	if (retval < 0)
1316 		goto out;
1317 
1318 	mm = current->mm;
1319 	mm->end_code = end_code;
1320 	mm->start_code = start_code;
1321 	mm->start_data = start_data;
1322 	mm->end_data = end_data;
1323 	mm->start_stack = bprm->p;
1324 
1325 	/**
1326 	 * DOC: "brk" handling
1327 	 *
1328 	 * For architectures with ELF randomization, when executing a
1329 	 * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
1330 	 * move the brk area out of the mmap region and into the unused
1331 	 * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
1332 	 * early with the stack growing down or other regions being put
1333 	 * into the mmap region by the kernel (e.g. vdso).
1334 	 *
1335 	 * In the CONFIG_COMPAT_BRK case, though, everything is turned
1336 	 * off because we're not allowed to move the brk at all.
1337 	 */
1338 	if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
1339 	    IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1340 	    elf_ex->e_type == ET_DYN && !interpreter) {
1341 		elf_brk = ELF_ET_DYN_BASE;
1342 		/* This counts as moving the brk, so let brk(2) know. */
1343 		brk_moved = true;
1344 	}
1345 	mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
1346 
1347 	if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
1348 		/*
1349 		 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
1350 		 * leave a gap between .bss and brk.
1351 		 */
1352 		if (!brk_moved)
1353 			mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1354 
1355 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1356 		brk_moved = true;
1357 	}
1358 
1359 #ifdef compat_brk_randomized
1360 	if (brk_moved)
1361 		current->brk_randomized = 1;
1362 #endif
1363 
1364 	if (current->personality & MMAP_PAGE_ZERO) {
1365 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1366 		   and some applications "depend" upon this behavior.
1367 		   Since we do not have the power to recompile these, we
1368 		   emulate the SVr4 behavior. Sigh. */
1369 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1370 				MAP_FIXED | MAP_PRIVATE, 0);
1371 	}
1372 
1373 	regs = current_pt_regs();
1374 #ifdef ELF_PLAT_INIT
1375 	/*
1376 	 * The ABI may specify that certain registers be set up in special
1377 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1378 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1379 	 * that the e_entry field is the address of the function descriptor
1380 	 * for the startup routine, rather than the address of the startup
1381 	 * routine itself.  This macro performs whatever initialization to
1382 	 * the regs structure is required as well as any relocations to the
1383 	 * function descriptor entries when executing dynamically links apps.
1384 	 */
1385 	ELF_PLAT_INIT(regs, reloc_func_desc);
1386 #endif
1387 
1388 	finalize_exec(bprm);
1389 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1390 	retval = 0;
1391 out:
1392 	return retval;
1393 
1394 	/* error cleanup */
1395 out_free_dentry:
1396 	kfree(interp_elf_ex);
1397 	kfree(interp_elf_phdata);
1398 out_free_file:
1399 	allow_write_access(interpreter);
1400 	if (interpreter)
1401 		fput(interpreter);
1402 out_free_ph:
1403 	kfree(elf_phdata);
1404 	goto out;
1405 }
1406 
1407 #ifdef CONFIG_USELIB
1408 /* This is really simpleminded and specialized - we are loading an
1409    a.out library that is given an ELF header. */
load_elf_library(struct file * file)1410 static int load_elf_library(struct file *file)
1411 {
1412 	struct elf_phdr *elf_phdata;
1413 	struct elf_phdr *eppnt;
1414 	unsigned long elf_bss, bss, len;
1415 	int retval, error, i, j;
1416 	struct elfhdr elf_ex;
1417 
1418 	error = -ENOEXEC;
1419 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1420 	if (retval < 0)
1421 		goto out;
1422 
1423 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1424 		goto out;
1425 
1426 	/* First of all, some simple consistency checks */
1427 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1428 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1429 		goto out;
1430 	if (elf_check_fdpic(&elf_ex))
1431 		goto out;
1432 
1433 	/* Now read in all of the header information */
1434 
1435 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1436 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1437 
1438 	error = -ENOMEM;
1439 	elf_phdata = kmalloc(j, GFP_KERNEL);
1440 	if (!elf_phdata)
1441 		goto out;
1442 
1443 	eppnt = elf_phdata;
1444 	error = -ENOEXEC;
1445 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1446 	if (retval < 0)
1447 		goto out_free_ph;
1448 
1449 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1450 		if ((eppnt + i)->p_type == PT_LOAD)
1451 			j++;
1452 	if (j != 1)
1453 		goto out_free_ph;
1454 
1455 	while (eppnt->p_type != PT_LOAD)
1456 		eppnt++;
1457 
1458 	/* Now use mmap to map the library into memory. */
1459 	error = vm_mmap(file,
1460 			ELF_PAGESTART(eppnt->p_vaddr),
1461 			(eppnt->p_filesz +
1462 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1463 			PROT_READ | PROT_WRITE | PROT_EXEC,
1464 			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1465 			(eppnt->p_offset -
1466 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1467 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1468 		goto out_free_ph;
1469 
1470 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1471 	if (padzero(elf_bss)) {
1472 		error = -EFAULT;
1473 		goto out_free_ph;
1474 	}
1475 
1476 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1477 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1478 	if (bss > len) {
1479 		error = vm_brk(len, bss - len);
1480 		if (error)
1481 			goto out_free_ph;
1482 	}
1483 	error = 0;
1484 
1485 out_free_ph:
1486 	kfree(elf_phdata);
1487 out:
1488 	return error;
1489 }
1490 #endif /* #ifdef CONFIG_USELIB */
1491 
1492 #ifdef CONFIG_ELF_CORE
1493 /*
1494  * ELF core dumper
1495  *
1496  * Modelled on fs/exec.c:aout_core_dump()
1497  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1498  */
1499 
1500 /* An ELF note in memory */
1501 struct memelfnote
1502 {
1503 	const char *name;
1504 	int type;
1505 	unsigned int datasz;
1506 	void *data;
1507 };
1508 
notesize(struct memelfnote * en)1509 static int notesize(struct memelfnote *en)
1510 {
1511 	int sz;
1512 
1513 	sz = sizeof(struct elf_note);
1514 	sz += roundup(strlen(en->name) + 1, 4);
1515 	sz += roundup(en->datasz, 4);
1516 
1517 	return sz;
1518 }
1519 
writenote(struct memelfnote * men,struct coredump_params * cprm)1520 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1521 {
1522 	struct elf_note en;
1523 	en.n_namesz = strlen(men->name) + 1;
1524 	en.n_descsz = men->datasz;
1525 	en.n_type = men->type;
1526 
1527 	return dump_emit(cprm, &en, sizeof(en)) &&
1528 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1529 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1530 }
1531 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1532 static void fill_elf_header(struct elfhdr *elf, int segs,
1533 			    u16 machine, u32 flags)
1534 {
1535 	memset(elf, 0, sizeof(*elf));
1536 
1537 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1538 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1539 	elf->e_ident[EI_DATA] = ELF_DATA;
1540 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1541 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1542 
1543 	elf->e_type = ET_CORE;
1544 	elf->e_machine = machine;
1545 	elf->e_version = EV_CURRENT;
1546 	elf->e_phoff = sizeof(struct elfhdr);
1547 	elf->e_flags = flags;
1548 	elf->e_ehsize = sizeof(struct elfhdr);
1549 	elf->e_phentsize = sizeof(struct elf_phdr);
1550 	elf->e_phnum = segs;
1551 }
1552 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1553 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1554 {
1555 	phdr->p_type = PT_NOTE;
1556 	phdr->p_offset = offset;
1557 	phdr->p_vaddr = 0;
1558 	phdr->p_paddr = 0;
1559 	phdr->p_filesz = sz;
1560 	phdr->p_memsz = 0;
1561 	phdr->p_flags = 0;
1562 	phdr->p_align = 4;
1563 }
1564 
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1565 static void fill_note(struct memelfnote *note, const char *name, int type,
1566 		unsigned int sz, void *data)
1567 {
1568 	note->name = name;
1569 	note->type = type;
1570 	note->datasz = sz;
1571 	note->data = data;
1572 }
1573 
1574 /*
1575  * fill up all the fields in prstatus from the given task struct, except
1576  * registers which need to be filled up separately.
1577  */
fill_prstatus(struct elf_prstatus_common * prstatus,struct task_struct * p,long signr)1578 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1579 		struct task_struct *p, long signr)
1580 {
1581 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1582 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1583 	prstatus->pr_sighold = p->blocked.sig[0];
1584 	rcu_read_lock();
1585 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1586 	rcu_read_unlock();
1587 	prstatus->pr_pid = task_pid_vnr(p);
1588 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1589 	prstatus->pr_sid = task_session_vnr(p);
1590 	if (thread_group_leader(p)) {
1591 		struct task_cputime cputime;
1592 
1593 		/*
1594 		 * This is the record for the group leader.  It shows the
1595 		 * group-wide total, not its individual thread total.
1596 		 */
1597 		thread_group_cputime(p, &cputime);
1598 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1599 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1600 	} else {
1601 		u64 utime, stime;
1602 
1603 		task_cputime(p, &utime, &stime);
1604 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1605 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1606 	}
1607 
1608 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1609 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1610 }
1611 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1612 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1613 		       struct mm_struct *mm)
1614 {
1615 	const struct cred *cred;
1616 	unsigned int i, len;
1617 	unsigned int state;
1618 
1619 	/* first copy the parameters from user space */
1620 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1621 
1622 	len = mm->arg_end - mm->arg_start;
1623 	if (len >= ELF_PRARGSZ)
1624 		len = ELF_PRARGSZ-1;
1625 	if (copy_from_user(&psinfo->pr_psargs,
1626 		           (const char __user *)mm->arg_start, len))
1627 		return -EFAULT;
1628 	for(i = 0; i < len; i++)
1629 		if (psinfo->pr_psargs[i] == 0)
1630 			psinfo->pr_psargs[i] = ' ';
1631 	psinfo->pr_psargs[len] = 0;
1632 
1633 	rcu_read_lock();
1634 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1635 	rcu_read_unlock();
1636 	psinfo->pr_pid = task_pid_vnr(p);
1637 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1638 	psinfo->pr_sid = task_session_vnr(p);
1639 
1640 	state = READ_ONCE(p->__state);
1641 	i = state ? ffz(~state) + 1 : 0;
1642 	psinfo->pr_state = i;
1643 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1644 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1645 	psinfo->pr_nice = task_nice(p);
1646 	psinfo->pr_flag = p->flags;
1647 	rcu_read_lock();
1648 	cred = __task_cred(p);
1649 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1650 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1651 	rcu_read_unlock();
1652 	get_task_comm(psinfo->pr_fname, p);
1653 
1654 	return 0;
1655 }
1656 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1657 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1658 {
1659 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1660 	int i = 0;
1661 	do
1662 		i += 2;
1663 	while (auxv[i - 2] != AT_NULL);
1664 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1665 }
1666 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const kernel_siginfo_t * siginfo)1667 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1668 		const kernel_siginfo_t *siginfo)
1669 {
1670 	copy_siginfo_to_external(csigdata, siginfo);
1671 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1672 }
1673 
1674 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1675 /*
1676  * Format of NT_FILE note:
1677  *
1678  * long count     -- how many files are mapped
1679  * long page_size -- units for file_ofs
1680  * array of [COUNT] elements of
1681  *   long start
1682  *   long end
1683  *   long file_ofs
1684  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1685  */
fill_files_note(struct memelfnote * note,struct coredump_params * cprm)1686 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1687 {
1688 	unsigned count, size, names_ofs, remaining, n;
1689 	user_long_t *data;
1690 	user_long_t *start_end_ofs;
1691 	char *name_base, *name_curpos;
1692 	int i;
1693 
1694 	/* *Estimated* file count and total data size needed */
1695 	count = cprm->vma_count;
1696 	if (count > UINT_MAX / 64)
1697 		return -EINVAL;
1698 	size = count * 64;
1699 
1700 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1701  alloc:
1702 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1703 		return -EINVAL;
1704 	size = round_up(size, PAGE_SIZE);
1705 	/*
1706 	 * "size" can be 0 here legitimately.
1707 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1708 	 */
1709 	data = kvmalloc(size, GFP_KERNEL);
1710 	if (ZERO_OR_NULL_PTR(data))
1711 		return -ENOMEM;
1712 
1713 	start_end_ofs = data + 2;
1714 	name_base = name_curpos = ((char *)data) + names_ofs;
1715 	remaining = size - names_ofs;
1716 	count = 0;
1717 	for (i = 0; i < cprm->vma_count; i++) {
1718 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1719 		struct file *file;
1720 		const char *filename;
1721 
1722 		file = m->file;
1723 		if (!file)
1724 			continue;
1725 		filename = file_path(file, name_curpos, remaining);
1726 		if (IS_ERR(filename)) {
1727 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1728 				kvfree(data);
1729 				size = size * 5 / 4;
1730 				goto alloc;
1731 			}
1732 			continue;
1733 		}
1734 
1735 		/* file_path() fills at the end, move name down */
1736 		/* n = strlen(filename) + 1: */
1737 		n = (name_curpos + remaining) - filename;
1738 		remaining = filename - name_curpos;
1739 		memmove(name_curpos, filename, n);
1740 		name_curpos += n;
1741 
1742 		*start_end_ofs++ = m->start;
1743 		*start_end_ofs++ = m->end;
1744 		*start_end_ofs++ = m->pgoff;
1745 		count++;
1746 	}
1747 
1748 	/* Now we know exact count of files, can store it */
1749 	data[0] = count;
1750 	data[1] = PAGE_SIZE;
1751 	/*
1752 	 * Count usually is less than mm->map_count,
1753 	 * we need to move filenames down.
1754 	 */
1755 	n = cprm->vma_count - count;
1756 	if (n != 0) {
1757 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1758 		memmove(name_base - shift_bytes, name_base,
1759 			name_curpos - name_base);
1760 		name_curpos -= shift_bytes;
1761 	}
1762 
1763 	size = name_curpos - (char *)data;
1764 	fill_note(note, "CORE", NT_FILE, size, data);
1765 	return 0;
1766 }
1767 
1768 #include <linux/regset.h>
1769 
1770 struct elf_thread_core_info {
1771 	struct elf_thread_core_info *next;
1772 	struct task_struct *task;
1773 	struct elf_prstatus prstatus;
1774 	struct memelfnote notes[];
1775 };
1776 
1777 struct elf_note_info {
1778 	struct elf_thread_core_info *thread;
1779 	struct memelfnote psinfo;
1780 	struct memelfnote signote;
1781 	struct memelfnote auxv;
1782 	struct memelfnote files;
1783 	user_siginfo_t csigdata;
1784 	size_t size;
1785 	int thread_notes;
1786 };
1787 
1788 #ifdef CORE_DUMP_USE_REGSET
1789 /*
1790  * When a regset has a writeback hook, we call it on each thread before
1791  * dumping user memory.  On register window machines, this makes sure the
1792  * user memory backing the register data is up to date before we read it.
1793  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1794 static void do_thread_regset_writeback(struct task_struct *task,
1795 				       const struct user_regset *regset)
1796 {
1797 	if (regset->writeback)
1798 		regset->writeback(task, regset, 1);
1799 }
1800 
1801 #ifndef PRSTATUS_SIZE
1802 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1803 #endif
1804 
1805 #ifndef SET_PR_FPVALID
1806 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1807 #endif
1808 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1809 static int fill_thread_core_info(struct elf_thread_core_info *t,
1810 				 const struct user_regset_view *view,
1811 				 long signr, struct elf_note_info *info)
1812 {
1813 	unsigned int note_iter, view_iter;
1814 
1815 	/*
1816 	 * NT_PRSTATUS is the one special case, because the regset data
1817 	 * goes into the pr_reg field inside the note contents, rather
1818 	 * than being the whole note contents.  We fill the regset in here.
1819 	 * We assume that regset 0 is NT_PRSTATUS.
1820 	 */
1821 	fill_prstatus(&t->prstatus.common, t->task, signr);
1822 	regset_get(t->task, &view->regsets[0],
1823 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1824 
1825 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1826 		  PRSTATUS_SIZE, &t->prstatus);
1827 	info->size += notesize(&t->notes[0]);
1828 
1829 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1830 
1831 	/*
1832 	 * Each other regset might generate a note too.  For each regset
1833 	 * that has no core_note_type or is inactive, skip it.
1834 	 */
1835 	note_iter = 1;
1836 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1837 		const struct user_regset *regset = &view->regsets[view_iter];
1838 		int note_type = regset->core_note_type;
1839 		bool is_fpreg = note_type == NT_PRFPREG;
1840 		void *data;
1841 		int ret;
1842 
1843 		do_thread_regset_writeback(t->task, regset);
1844 		if (!note_type) // not for coredumps
1845 			continue;
1846 		if (regset->active && regset->active(t->task, regset) <= 0)
1847 			continue;
1848 
1849 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1850 		if (ret < 0)
1851 			continue;
1852 
1853 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1854 			break;
1855 
1856 		if (is_fpreg)
1857 			SET_PR_FPVALID(&t->prstatus);
1858 
1859 		fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
1860 			  note_type, ret, data);
1861 
1862 		info->size += notesize(&t->notes[note_iter]);
1863 		note_iter++;
1864 	}
1865 
1866 	return 1;
1867 }
1868 #else
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1869 static int fill_thread_core_info(struct elf_thread_core_info *t,
1870 				 const struct user_regset_view *view,
1871 				 long signr, struct elf_note_info *info)
1872 {
1873 	struct task_struct *p = t->task;
1874 	elf_fpregset_t *fpu;
1875 
1876 	fill_prstatus(&t->prstatus.common, p, signr);
1877 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1878 
1879 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1880 		  &(t->prstatus));
1881 	info->size += notesize(&t->notes[0]);
1882 
1883 	fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1884 	if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1885 		kfree(fpu);
1886 		return 1;
1887 	}
1888 
1889 	t->prstatus.pr_fpvalid = 1;
1890 	fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1891 	info->size += notesize(&t->notes[1]);
1892 
1893 	return 1;
1894 }
1895 #endif
1896 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,struct coredump_params * cprm)1897 static int fill_note_info(struct elfhdr *elf, int phdrs,
1898 			  struct elf_note_info *info,
1899 			  struct coredump_params *cprm)
1900 {
1901 	struct task_struct *dump_task = current;
1902 	const struct user_regset_view *view;
1903 	struct elf_thread_core_info *t;
1904 	struct elf_prpsinfo *psinfo;
1905 	struct core_thread *ct;
1906 
1907 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1908 	if (!psinfo)
1909 		return 0;
1910 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1911 
1912 #ifdef CORE_DUMP_USE_REGSET
1913 	view = task_user_regset_view(dump_task);
1914 
1915 	/*
1916 	 * Figure out how many notes we're going to need for each thread.
1917 	 */
1918 	info->thread_notes = 0;
1919 	for (int i = 0; i < view->n; ++i)
1920 		if (view->regsets[i].core_note_type != 0)
1921 			++info->thread_notes;
1922 
1923 	/*
1924 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1925 	 * since it is our one special case.
1926 	 */
1927 	if (unlikely(info->thread_notes == 0) ||
1928 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1929 		WARN_ON(1);
1930 		return 0;
1931 	}
1932 
1933 	/*
1934 	 * Initialize the ELF file header.
1935 	 */
1936 	fill_elf_header(elf, phdrs,
1937 			view->e_machine, view->e_flags);
1938 #else
1939 	view = NULL;
1940 	info->thread_notes = 2;
1941 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1942 #endif
1943 
1944 	/*
1945 	 * Allocate a structure for each thread.
1946 	 */
1947 	info->thread = kzalloc(offsetof(struct elf_thread_core_info,
1948 				     notes[info->thread_notes]),
1949 			    GFP_KERNEL);
1950 	if (unlikely(!info->thread))
1951 		return 0;
1952 
1953 	info->thread->task = dump_task;
1954 	for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1955 		t = kzalloc(offsetof(struct elf_thread_core_info,
1956 				     notes[info->thread_notes]),
1957 			    GFP_KERNEL);
1958 		if (unlikely(!t))
1959 			return 0;
1960 
1961 		t->task = ct->task;
1962 		t->next = info->thread->next;
1963 		info->thread->next = t;
1964 	}
1965 
1966 	/*
1967 	 * Now fill in each thread's information.
1968 	 */
1969 	for (t = info->thread; t != NULL; t = t->next)
1970 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1971 			return 0;
1972 
1973 	/*
1974 	 * Fill in the two process-wide notes.
1975 	 */
1976 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1977 	info->size += notesize(&info->psinfo);
1978 
1979 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1980 	info->size += notesize(&info->signote);
1981 
1982 	fill_auxv_note(&info->auxv, current->mm);
1983 	info->size += notesize(&info->auxv);
1984 
1985 	if (fill_files_note(&info->files, cprm) == 0)
1986 		info->size += notesize(&info->files);
1987 
1988 	return 1;
1989 }
1990 
1991 /*
1992  * Write all the notes for each thread.  When writing the first thread, the
1993  * process-wide notes are interleaved after the first thread-specific note.
1994  */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1995 static int write_note_info(struct elf_note_info *info,
1996 			   struct coredump_params *cprm)
1997 {
1998 	bool first = true;
1999 	struct elf_thread_core_info *t = info->thread;
2000 
2001 	do {
2002 		int i;
2003 
2004 		if (!writenote(&t->notes[0], cprm))
2005 			return 0;
2006 
2007 		if (first && !writenote(&info->psinfo, cprm))
2008 			return 0;
2009 		if (first && !writenote(&info->signote, cprm))
2010 			return 0;
2011 		if (first && !writenote(&info->auxv, cprm))
2012 			return 0;
2013 		if (first && info->files.data &&
2014 				!writenote(&info->files, cprm))
2015 			return 0;
2016 
2017 		for (i = 1; i < info->thread_notes; ++i)
2018 			if (t->notes[i].data &&
2019 			    !writenote(&t->notes[i], cprm))
2020 				return 0;
2021 
2022 		first = false;
2023 		t = t->next;
2024 	} while (t);
2025 
2026 	return 1;
2027 }
2028 
free_note_info(struct elf_note_info * info)2029 static void free_note_info(struct elf_note_info *info)
2030 {
2031 	struct elf_thread_core_info *threads = info->thread;
2032 	while (threads) {
2033 		unsigned int i;
2034 		struct elf_thread_core_info *t = threads;
2035 		threads = t->next;
2036 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
2037 		for (i = 1; i < info->thread_notes; ++i)
2038 			kfree(t->notes[i].data);
2039 		kfree(t);
2040 	}
2041 	kfree(info->psinfo.data);
2042 	kvfree(info->files.data);
2043 }
2044 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2045 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2046 			     elf_addr_t e_shoff, int segs)
2047 {
2048 	elf->e_shoff = e_shoff;
2049 	elf->e_shentsize = sizeof(*shdr4extnum);
2050 	elf->e_shnum = 1;
2051 	elf->e_shstrndx = SHN_UNDEF;
2052 
2053 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2054 
2055 	shdr4extnum->sh_type = SHT_NULL;
2056 	shdr4extnum->sh_size = elf->e_shnum;
2057 	shdr4extnum->sh_link = elf->e_shstrndx;
2058 	shdr4extnum->sh_info = segs;
2059 }
2060 
2061 /*
2062  * Actual dumper
2063  *
2064  * This is a two-pass process; first we find the offsets of the bits,
2065  * and then they are actually written out.  If we run out of core limit
2066  * we just truncate.
2067  */
elf_core_dump(struct coredump_params * cprm)2068 static int elf_core_dump(struct coredump_params *cprm)
2069 {
2070 	int has_dumped = 0;
2071 	int segs, i;
2072 	struct elfhdr elf;
2073 	loff_t offset = 0, dataoff;
2074 	struct elf_note_info info = { };
2075 	struct elf_phdr *phdr4note = NULL;
2076 	struct elf_shdr *shdr4extnum = NULL;
2077 	Elf_Half e_phnum;
2078 	elf_addr_t e_shoff;
2079 
2080 	/*
2081 	 * The number of segs are recored into ELF header as 16bit value.
2082 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2083 	 */
2084 	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
2085 
2086 	/* for notes section */
2087 	segs++;
2088 
2089 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2090 	 * this, kernel supports extended numbering. Have a look at
2091 	 * include/linux/elf.h for further information. */
2092 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2093 
2094 	/*
2095 	 * Collect all the non-memory information about the process for the
2096 	 * notes.  This also sets up the file header.
2097 	 */
2098 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2099 		goto end_coredump;
2100 
2101 	has_dumped = 1;
2102 
2103 	offset += sizeof(elf);				/* ELF header */
2104 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2105 
2106 	/* Write notes phdr entry */
2107 	{
2108 		size_t sz = info.size;
2109 
2110 		/* For cell spufs */
2111 		sz += elf_coredump_extra_notes_size();
2112 
2113 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2114 		if (!phdr4note)
2115 			goto end_coredump;
2116 
2117 		fill_elf_note_phdr(phdr4note, sz, offset);
2118 		offset += sz;
2119 	}
2120 
2121 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2122 
2123 	offset += cprm->vma_data_size;
2124 	offset += elf_core_extra_data_size(cprm);
2125 	e_shoff = offset;
2126 
2127 	if (e_phnum == PN_XNUM) {
2128 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2129 		if (!shdr4extnum)
2130 			goto end_coredump;
2131 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2132 	}
2133 
2134 	offset = dataoff;
2135 
2136 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2137 		goto end_coredump;
2138 
2139 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2140 		goto end_coredump;
2141 
2142 	/* Write program headers for segments dump */
2143 	for (i = 0; i < cprm->vma_count; i++) {
2144 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2145 		struct elf_phdr phdr;
2146 
2147 		phdr.p_type = PT_LOAD;
2148 		phdr.p_offset = offset;
2149 		phdr.p_vaddr = meta->start;
2150 		phdr.p_paddr = 0;
2151 		phdr.p_filesz = meta->dump_size;
2152 		phdr.p_memsz = meta->end - meta->start;
2153 		offset += phdr.p_filesz;
2154 		phdr.p_flags = 0;
2155 		if (meta->flags & VM_READ)
2156 			phdr.p_flags |= PF_R;
2157 		if (meta->flags & VM_WRITE)
2158 			phdr.p_flags |= PF_W;
2159 		if (meta->flags & VM_EXEC)
2160 			phdr.p_flags |= PF_X;
2161 		phdr.p_align = ELF_EXEC_PAGESIZE;
2162 
2163 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2164 			goto end_coredump;
2165 	}
2166 
2167 	if (!elf_core_write_extra_phdrs(cprm, offset))
2168 		goto end_coredump;
2169 
2170 	/* write out the notes section */
2171 	if (!write_note_info(&info, cprm))
2172 		goto end_coredump;
2173 
2174 	/* For cell spufs */
2175 	if (elf_coredump_extra_notes_write(cprm))
2176 		goto end_coredump;
2177 
2178 	/* Align to page */
2179 	dump_skip_to(cprm, dataoff);
2180 
2181 	for (i = 0; i < cprm->vma_count; i++) {
2182 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2183 
2184 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2185 			goto end_coredump;
2186 	}
2187 
2188 	if (!elf_core_write_extra_data(cprm))
2189 		goto end_coredump;
2190 
2191 	if (e_phnum == PN_XNUM) {
2192 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2193 			goto end_coredump;
2194 	}
2195 
2196 end_coredump:
2197 	free_note_info(&info);
2198 	kfree(shdr4extnum);
2199 	kfree(phdr4note);
2200 	return has_dumped;
2201 }
2202 
2203 #endif		/* CONFIG_ELF_CORE */
2204 
init_elf_binfmt(void)2205 static int __init init_elf_binfmt(void)
2206 {
2207 	register_binfmt(&elf_format);
2208 	return 0;
2209 }
2210 
exit_elf_binfmt(void)2211 static void __exit exit_elf_binfmt(void)
2212 {
2213 	/* Remove the COFF and ELF loaders. */
2214 	unregister_binfmt(&elf_format);
2215 }
2216 
2217 core_initcall(init_elf_binfmt);
2218 module_exit(exit_elf_binfmt);
2219 
2220 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2221 #include "binfmt_elf_test.c"
2222 #endif
2223