xref: /openbmc/linux/fs/binfmt_elf.c (revision 7b73a9c8e26ce5769c41d4b787767c10fe7269db)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/errno.h>
19 #include <linux/signal.h>
20 #include <linux/binfmts.h>
21 #include <linux/string.h>
22 #include <linux/file.h>
23 #include <linux/slab.h>
24 #include <linux/personality.h>
25 #include <linux/elfcore.h>
26 #include <linux/init.h>
27 #include <linux/highuid.h>
28 #include <linux/compiler.h>
29 #include <linux/highmem.h>
30 #include <linux/pagemap.h>
31 #include <linux/vmalloc.h>
32 #include <linux/security.h>
33 #include <linux/random.h>
34 #include <linux/elf.h>
35 #include <linux/elf-randomize.h>
36 #include <linux/utsname.h>
37 #include <linux/coredump.h>
38 #include <linux/sched.h>
39 #include <linux/sched/coredump.h>
40 #include <linux/sched/task_stack.h>
41 #include <linux/sched/cputime.h>
42 #include <linux/cred.h>
43 #include <linux/dax.h>
44 #include <linux/uaccess.h>
45 #include <asm/param.h>
46 #include <asm/page.h>
47 
48 #ifndef user_long_t
49 #define user_long_t long
50 #endif
51 #ifndef user_siginfo_t
52 #define user_siginfo_t siginfo_t
53 #endif
54 
55 /* That's for binfmt_elf_fdpic to deal with */
56 #ifndef elf_check_fdpic
57 #define elf_check_fdpic(ex) false
58 #endif
59 
60 static int load_elf_binary(struct linux_binprm *bprm);
61 
62 #ifdef CONFIG_USELIB
63 static int load_elf_library(struct file *);
64 #else
65 #define load_elf_library NULL
66 #endif
67 
68 /*
69  * If we don't support core dumping, then supply a NULL so we
70  * don't even try.
71  */
72 #ifdef CONFIG_ELF_CORE
73 static int elf_core_dump(struct coredump_params *cprm);
74 #else
75 #define elf_core_dump	NULL
76 #endif
77 
78 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
79 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
80 #else
81 #define ELF_MIN_ALIGN	PAGE_SIZE
82 #endif
83 
84 #ifndef ELF_CORE_EFLAGS
85 #define ELF_CORE_EFLAGS	0
86 #endif
87 
88 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
89 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
90 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
91 
92 static struct linux_binfmt elf_format = {
93 	.module		= THIS_MODULE,
94 	.load_binary	= load_elf_binary,
95 	.load_shlib	= load_elf_library,
96 	.core_dump	= elf_core_dump,
97 	.min_coredump	= ELF_EXEC_PAGESIZE,
98 };
99 
100 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
101 
102 static int set_brk(unsigned long start, unsigned long end, int prot)
103 {
104 	start = ELF_PAGEALIGN(start);
105 	end = ELF_PAGEALIGN(end);
106 	if (end > start) {
107 		/*
108 		 * Map the last of the bss segment.
109 		 * If the header is requesting these pages to be
110 		 * executable, honour that (ppc32 needs this).
111 		 */
112 		int error = vm_brk_flags(start, end - start,
113 				prot & PROT_EXEC ? VM_EXEC : 0);
114 		if (error)
115 			return error;
116 	}
117 	current->mm->start_brk = current->mm->brk = end;
118 	return 0;
119 }
120 
121 /* We need to explicitly zero any fractional pages
122    after the data section (i.e. bss).  This would
123    contain the junk from the file that should not
124    be in memory
125  */
126 static int padzero(unsigned long elf_bss)
127 {
128 	unsigned long nbyte;
129 
130 	nbyte = ELF_PAGEOFFSET(elf_bss);
131 	if (nbyte) {
132 		nbyte = ELF_MIN_ALIGN - nbyte;
133 		if (clear_user((void __user *) elf_bss, nbyte))
134 			return -EFAULT;
135 	}
136 	return 0;
137 }
138 
139 /* Let's use some macros to make this stack manipulation a little clearer */
140 #ifdef CONFIG_STACK_GROWSUP
141 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
142 #define STACK_ROUND(sp, items) \
143 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
144 #define STACK_ALLOC(sp, len) ({ \
145 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
146 	old_sp; })
147 #else
148 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
149 #define STACK_ROUND(sp, items) \
150 	(((unsigned long) (sp - items)) &~ 15UL)
151 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
152 #endif
153 
154 #ifndef ELF_BASE_PLATFORM
155 /*
156  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
157  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
158  * will be copied to the user stack in the same manner as AT_PLATFORM.
159  */
160 #define ELF_BASE_PLATFORM NULL
161 #endif
162 
163 static int
164 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
165 		unsigned long load_addr, unsigned long interp_load_addr)
166 {
167 	unsigned long p = bprm->p;
168 	int argc = bprm->argc;
169 	int envc = bprm->envc;
170 	elf_addr_t __user *sp;
171 	elf_addr_t __user *u_platform;
172 	elf_addr_t __user *u_base_platform;
173 	elf_addr_t __user *u_rand_bytes;
174 	const char *k_platform = ELF_PLATFORM;
175 	const char *k_base_platform = ELF_BASE_PLATFORM;
176 	unsigned char k_rand_bytes[16];
177 	int items;
178 	elf_addr_t *elf_info;
179 	int ei_index = 0;
180 	const struct cred *cred = current_cred();
181 	struct vm_area_struct *vma;
182 
183 	/*
184 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
185 	 * evictions by the processes running on the same package. One
186 	 * thing we can do is to shuffle the initial stack for them.
187 	 */
188 
189 	p = arch_align_stack(p);
190 
191 	/*
192 	 * If this architecture has a platform capability string, copy it
193 	 * to userspace.  In some cases (Sparc), this info is impossible
194 	 * for userspace to get any other way, in others (i386) it is
195 	 * merely difficult.
196 	 */
197 	u_platform = NULL;
198 	if (k_platform) {
199 		size_t len = strlen(k_platform) + 1;
200 
201 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 		if (__copy_to_user(u_platform, k_platform, len))
203 			return -EFAULT;
204 	}
205 
206 	/*
207 	 * If this architecture has a "base" platform capability
208 	 * string, copy it to userspace.
209 	 */
210 	u_base_platform = NULL;
211 	if (k_base_platform) {
212 		size_t len = strlen(k_base_platform) + 1;
213 
214 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
215 		if (__copy_to_user(u_base_platform, k_base_platform, len))
216 			return -EFAULT;
217 	}
218 
219 	/*
220 	 * Generate 16 random bytes for userspace PRNG seeding.
221 	 */
222 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
223 	u_rand_bytes = (elf_addr_t __user *)
224 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
225 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
226 		return -EFAULT;
227 
228 	/* Create the ELF interpreter info */
229 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
230 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
231 #define NEW_AUX_ENT(id, val) \
232 	do { \
233 		elf_info[ei_index++] = id; \
234 		elf_info[ei_index++] = val; \
235 	} while (0)
236 
237 #ifdef ARCH_DLINFO
238 	/*
239 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
240 	 * AUXV.
241 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
242 	 * ARCH_DLINFO changes
243 	 */
244 	ARCH_DLINFO;
245 #endif
246 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
247 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
248 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
249 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
250 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
251 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
252 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
253 	NEW_AUX_ENT(AT_FLAGS, 0);
254 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
255 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
256 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
257 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
258 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
259 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
260 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
261 #ifdef ELF_HWCAP2
262 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
263 #endif
264 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
265 	if (k_platform) {
266 		NEW_AUX_ENT(AT_PLATFORM,
267 			    (elf_addr_t)(unsigned long)u_platform);
268 	}
269 	if (k_base_platform) {
270 		NEW_AUX_ENT(AT_BASE_PLATFORM,
271 			    (elf_addr_t)(unsigned long)u_base_platform);
272 	}
273 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
274 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
275 	}
276 #undef NEW_AUX_ENT
277 	/* AT_NULL is zero; clear the rest too */
278 	memset(&elf_info[ei_index], 0,
279 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
280 
281 	/* And advance past the AT_NULL entry.  */
282 	ei_index += 2;
283 
284 	sp = STACK_ADD(p, ei_index);
285 
286 	items = (argc + 1) + (envc + 1) + 1;
287 	bprm->p = STACK_ROUND(sp, items);
288 
289 	/* Point sp at the lowest address on the stack */
290 #ifdef CONFIG_STACK_GROWSUP
291 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
292 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
293 #else
294 	sp = (elf_addr_t __user *)bprm->p;
295 #endif
296 
297 
298 	/*
299 	 * Grow the stack manually; some architectures have a limit on how
300 	 * far ahead a user-space access may be in order to grow the stack.
301 	 */
302 	vma = find_extend_vma(current->mm, bprm->p);
303 	if (!vma)
304 		return -EFAULT;
305 
306 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
307 	if (__put_user(argc, sp++))
308 		return -EFAULT;
309 
310 	/* Populate list of argv pointers back to argv strings. */
311 	p = current->mm->arg_end = current->mm->arg_start;
312 	while (argc-- > 0) {
313 		size_t len;
314 		if (__put_user((elf_addr_t)p, sp++))
315 			return -EFAULT;
316 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
317 		if (!len || len > MAX_ARG_STRLEN)
318 			return -EINVAL;
319 		p += len;
320 	}
321 	if (__put_user(0, sp++))
322 		return -EFAULT;
323 	current->mm->arg_end = p;
324 
325 	/* Populate list of envp pointers back to envp strings. */
326 	current->mm->env_end = current->mm->env_start = p;
327 	while (envc-- > 0) {
328 		size_t len;
329 		if (__put_user((elf_addr_t)p, sp++))
330 			return -EFAULT;
331 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
332 		if (!len || len > MAX_ARG_STRLEN)
333 			return -EINVAL;
334 		p += len;
335 	}
336 	if (__put_user(0, sp++))
337 		return -EFAULT;
338 	current->mm->env_end = p;
339 
340 	/* Put the elf_info on the stack in the right place.  */
341 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
342 		return -EFAULT;
343 	return 0;
344 }
345 
346 #ifndef elf_map
347 
348 static unsigned long elf_map(struct file *filep, unsigned long addr,
349 		const struct elf_phdr *eppnt, int prot, int type,
350 		unsigned long total_size)
351 {
352 	unsigned long map_addr;
353 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
354 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
355 	addr = ELF_PAGESTART(addr);
356 	size = ELF_PAGEALIGN(size);
357 
358 	/* mmap() will return -EINVAL if given a zero size, but a
359 	 * segment with zero filesize is perfectly valid */
360 	if (!size)
361 		return addr;
362 
363 	/*
364 	* total_size is the size of the ELF (interpreter) image.
365 	* The _first_ mmap needs to know the full size, otherwise
366 	* randomization might put this image into an overlapping
367 	* position with the ELF binary image. (since size < total_size)
368 	* So we first map the 'big' image - and unmap the remainder at
369 	* the end. (which unmap is needed for ELF images with holes.)
370 	*/
371 	if (total_size) {
372 		total_size = ELF_PAGEALIGN(total_size);
373 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
374 		if (!BAD_ADDR(map_addr))
375 			vm_munmap(map_addr+size, total_size-size);
376 	} else
377 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
378 
379 	if ((type & MAP_FIXED_NOREPLACE) &&
380 	    PTR_ERR((void *)map_addr) == -EEXIST)
381 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
382 			task_pid_nr(current), current->comm, (void *)addr);
383 
384 	return(map_addr);
385 }
386 
387 #endif /* !elf_map */
388 
389 static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
390 {
391 	int i, first_idx = -1, last_idx = -1;
392 
393 	for (i = 0; i < nr; i++) {
394 		if (cmds[i].p_type == PT_LOAD) {
395 			last_idx = i;
396 			if (first_idx == -1)
397 				first_idx = i;
398 		}
399 	}
400 	if (first_idx == -1)
401 		return 0;
402 
403 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
404 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
405 }
406 
407 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
408 {
409 	ssize_t rv;
410 
411 	rv = kernel_read(file, buf, len, &pos);
412 	if (unlikely(rv != len)) {
413 		return (rv < 0) ? rv : -EIO;
414 	}
415 	return 0;
416 }
417 
418 /**
419  * load_elf_phdrs() - load ELF program headers
420  * @elf_ex:   ELF header of the binary whose program headers should be loaded
421  * @elf_file: the opened ELF binary file
422  *
423  * Loads ELF program headers from the binary file elf_file, which has the ELF
424  * header pointed to by elf_ex, into a newly allocated array. The caller is
425  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
426  */
427 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
428 				       struct file *elf_file)
429 {
430 	struct elf_phdr *elf_phdata = NULL;
431 	int retval, err = -1;
432 	unsigned int size;
433 
434 	/*
435 	 * If the size of this structure has changed, then punt, since
436 	 * we will be doing the wrong thing.
437 	 */
438 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
439 		goto out;
440 
441 	/* Sanity check the number of program headers... */
442 	/* ...and their total size. */
443 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
444 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
445 		goto out;
446 
447 	elf_phdata = kmalloc(size, GFP_KERNEL);
448 	if (!elf_phdata)
449 		goto out;
450 
451 	/* Read in the program headers */
452 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
453 	if (retval < 0) {
454 		err = retval;
455 		goto out;
456 	}
457 
458 	/* Success! */
459 	err = 0;
460 out:
461 	if (err) {
462 		kfree(elf_phdata);
463 		elf_phdata = NULL;
464 	}
465 	return elf_phdata;
466 }
467 
468 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
469 
470 /**
471  * struct arch_elf_state - arch-specific ELF loading state
472  *
473  * This structure is used to preserve architecture specific data during
474  * the loading of an ELF file, throughout the checking of architecture
475  * specific ELF headers & through to the point where the ELF load is
476  * known to be proceeding (ie. SET_PERSONALITY).
477  *
478  * This implementation is a dummy for architectures which require no
479  * specific state.
480  */
481 struct arch_elf_state {
482 };
483 
484 #define INIT_ARCH_ELF_STATE {}
485 
486 /**
487  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
488  * @ehdr:	The main ELF header
489  * @phdr:	The program header to check
490  * @elf:	The open ELF file
491  * @is_interp:	True if the phdr is from the interpreter of the ELF being
492  *		loaded, else false.
493  * @state:	Architecture-specific state preserved throughout the process
494  *		of loading the ELF.
495  *
496  * Inspects the program header phdr to validate its correctness and/or
497  * suitability for the system. Called once per ELF program header in the
498  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
499  * interpreter.
500  *
501  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502  *         with that return code.
503  */
504 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
505 				   struct elf_phdr *phdr,
506 				   struct file *elf, bool is_interp,
507 				   struct arch_elf_state *state)
508 {
509 	/* Dummy implementation, always proceed */
510 	return 0;
511 }
512 
513 /**
514  * arch_check_elf() - check an ELF executable
515  * @ehdr:	The main ELF header
516  * @has_interp:	True if the ELF has an interpreter, else false.
517  * @interp_ehdr: The interpreter's ELF header
518  * @state:	Architecture-specific state preserved throughout the process
519  *		of loading the ELF.
520  *
521  * Provides a final opportunity for architecture code to reject the loading
522  * of the ELF & cause an exec syscall to return an error. This is called after
523  * all program headers to be checked by arch_elf_pt_proc have been.
524  *
525  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
526  *         with that return code.
527  */
528 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
529 				 struct elfhdr *interp_ehdr,
530 				 struct arch_elf_state *state)
531 {
532 	/* Dummy implementation, always proceed */
533 	return 0;
534 }
535 
536 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
537 
538 static inline int make_prot(u32 p_flags)
539 {
540 	int prot = 0;
541 
542 	if (p_flags & PF_R)
543 		prot |= PROT_READ;
544 	if (p_flags & PF_W)
545 		prot |= PROT_WRITE;
546 	if (p_flags & PF_X)
547 		prot |= PROT_EXEC;
548 	return prot;
549 }
550 
551 /* This is much more generalized than the library routine read function,
552    so we keep this separate.  Technically the library read function
553    is only provided so that we can read a.out libraries that have
554    an ELF header */
555 
556 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
557 		struct file *interpreter,
558 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
559 {
560 	struct elf_phdr *eppnt;
561 	unsigned long load_addr = 0;
562 	int load_addr_set = 0;
563 	unsigned long last_bss = 0, elf_bss = 0;
564 	int bss_prot = 0;
565 	unsigned long error = ~0UL;
566 	unsigned long total_size;
567 	int i;
568 
569 	/* First of all, some simple consistency checks */
570 	if (interp_elf_ex->e_type != ET_EXEC &&
571 	    interp_elf_ex->e_type != ET_DYN)
572 		goto out;
573 	if (!elf_check_arch(interp_elf_ex) ||
574 	    elf_check_fdpic(interp_elf_ex))
575 		goto out;
576 	if (!interpreter->f_op->mmap)
577 		goto out;
578 
579 	total_size = total_mapping_size(interp_elf_phdata,
580 					interp_elf_ex->e_phnum);
581 	if (!total_size) {
582 		error = -EINVAL;
583 		goto out;
584 	}
585 
586 	eppnt = interp_elf_phdata;
587 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
588 		if (eppnt->p_type == PT_LOAD) {
589 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
590 			int elf_prot = make_prot(eppnt->p_flags);
591 			unsigned long vaddr = 0;
592 			unsigned long k, map_addr;
593 
594 			vaddr = eppnt->p_vaddr;
595 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
596 				elf_type |= MAP_FIXED_NOREPLACE;
597 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
598 				load_addr = -vaddr;
599 
600 			map_addr = elf_map(interpreter, load_addr + vaddr,
601 					eppnt, elf_prot, elf_type, total_size);
602 			total_size = 0;
603 			error = map_addr;
604 			if (BAD_ADDR(map_addr))
605 				goto out;
606 
607 			if (!load_addr_set &&
608 			    interp_elf_ex->e_type == ET_DYN) {
609 				load_addr = map_addr - ELF_PAGESTART(vaddr);
610 				load_addr_set = 1;
611 			}
612 
613 			/*
614 			 * Check to see if the section's size will overflow the
615 			 * allowed task size. Note that p_filesz must always be
616 			 * <= p_memsize so it's only necessary to check p_memsz.
617 			 */
618 			k = load_addr + eppnt->p_vaddr;
619 			if (BAD_ADDR(k) ||
620 			    eppnt->p_filesz > eppnt->p_memsz ||
621 			    eppnt->p_memsz > TASK_SIZE ||
622 			    TASK_SIZE - eppnt->p_memsz < k) {
623 				error = -ENOMEM;
624 				goto out;
625 			}
626 
627 			/*
628 			 * Find the end of the file mapping for this phdr, and
629 			 * keep track of the largest address we see for this.
630 			 */
631 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
632 			if (k > elf_bss)
633 				elf_bss = k;
634 
635 			/*
636 			 * Do the same thing for the memory mapping - between
637 			 * elf_bss and last_bss is the bss section.
638 			 */
639 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
640 			if (k > last_bss) {
641 				last_bss = k;
642 				bss_prot = elf_prot;
643 			}
644 		}
645 	}
646 
647 	/*
648 	 * Now fill out the bss section: first pad the last page from
649 	 * the file up to the page boundary, and zero it from elf_bss
650 	 * up to the end of the page.
651 	 */
652 	if (padzero(elf_bss)) {
653 		error = -EFAULT;
654 		goto out;
655 	}
656 	/*
657 	 * Next, align both the file and mem bss up to the page size,
658 	 * since this is where elf_bss was just zeroed up to, and where
659 	 * last_bss will end after the vm_brk_flags() below.
660 	 */
661 	elf_bss = ELF_PAGEALIGN(elf_bss);
662 	last_bss = ELF_PAGEALIGN(last_bss);
663 	/* Finally, if there is still more bss to allocate, do it. */
664 	if (last_bss > elf_bss) {
665 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
666 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
667 		if (error)
668 			goto out;
669 	}
670 
671 	error = load_addr;
672 out:
673 	return error;
674 }
675 
676 /*
677  * These are the functions used to load ELF style executables and shared
678  * libraries.  There is no binary dependent code anywhere else.
679  */
680 
681 static int load_elf_binary(struct linux_binprm *bprm)
682 {
683 	struct file *interpreter = NULL; /* to shut gcc up */
684  	unsigned long load_addr = 0, load_bias = 0;
685 	int load_addr_set = 0;
686 	unsigned long error;
687 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
688 	unsigned long elf_bss, elf_brk;
689 	int bss_prot = 0;
690 	int retval, i;
691 	unsigned long elf_entry;
692 	unsigned long interp_load_addr = 0;
693 	unsigned long start_code, end_code, start_data, end_data;
694 	unsigned long reloc_func_desc __maybe_unused = 0;
695 	int executable_stack = EXSTACK_DEFAULT;
696 	struct {
697 		struct elfhdr elf_ex;
698 		struct elfhdr interp_elf_ex;
699 	} *loc;
700 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
701 	struct pt_regs *regs;
702 
703 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
704 	if (!loc) {
705 		retval = -ENOMEM;
706 		goto out_ret;
707 	}
708 
709 	/* Get the exec-header */
710 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
711 
712 	retval = -ENOEXEC;
713 	/* First of all, some simple consistency checks */
714 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 		goto out;
716 
717 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
718 		goto out;
719 	if (!elf_check_arch(&loc->elf_ex))
720 		goto out;
721 	if (elf_check_fdpic(&loc->elf_ex))
722 		goto out;
723 	if (!bprm->file->f_op->mmap)
724 		goto out;
725 
726 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
727 	if (!elf_phdata)
728 		goto out;
729 
730 	elf_ppnt = elf_phdata;
731 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
732 		char *elf_interpreter;
733 
734 		if (elf_ppnt->p_type != PT_INTERP)
735 			continue;
736 
737 		/*
738 		 * This is the program interpreter used for shared libraries -
739 		 * for now assume that this is an a.out format binary.
740 		 */
741 		retval = -ENOEXEC;
742 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
743 			goto out_free_ph;
744 
745 		retval = -ENOMEM;
746 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
747 		if (!elf_interpreter)
748 			goto out_free_ph;
749 
750 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
751 				  elf_ppnt->p_offset);
752 		if (retval < 0)
753 			goto out_free_interp;
754 		/* make sure path is NULL terminated */
755 		retval = -ENOEXEC;
756 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
757 			goto out_free_interp;
758 
759 		interpreter = open_exec(elf_interpreter);
760 		kfree(elf_interpreter);
761 		retval = PTR_ERR(interpreter);
762 		if (IS_ERR(interpreter))
763 			goto out_free_ph;
764 
765 		/*
766 		 * If the binary is not readable then enforce mm->dumpable = 0
767 		 * regardless of the interpreter's permissions.
768 		 */
769 		would_dump(bprm, interpreter);
770 
771 		/* Get the exec headers */
772 		retval = elf_read(interpreter, &loc->interp_elf_ex,
773 				  sizeof(loc->interp_elf_ex), 0);
774 		if (retval < 0)
775 			goto out_free_dentry;
776 
777 		break;
778 
779 out_free_interp:
780 		kfree(elf_interpreter);
781 		goto out_free_ph;
782 	}
783 
784 	elf_ppnt = elf_phdata;
785 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
786 		switch (elf_ppnt->p_type) {
787 		case PT_GNU_STACK:
788 			if (elf_ppnt->p_flags & PF_X)
789 				executable_stack = EXSTACK_ENABLE_X;
790 			else
791 				executable_stack = EXSTACK_DISABLE_X;
792 			break;
793 
794 		case PT_LOPROC ... PT_HIPROC:
795 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
796 						  bprm->file, false,
797 						  &arch_state);
798 			if (retval)
799 				goto out_free_dentry;
800 			break;
801 		}
802 
803 	/* Some simple consistency checks for the interpreter */
804 	if (interpreter) {
805 		retval = -ELIBBAD;
806 		/* Not an ELF interpreter */
807 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
808 			goto out_free_dentry;
809 		/* Verify the interpreter has a valid arch */
810 		if (!elf_check_arch(&loc->interp_elf_ex) ||
811 		    elf_check_fdpic(&loc->interp_elf_ex))
812 			goto out_free_dentry;
813 
814 		/* Load the interpreter program headers */
815 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
816 						   interpreter);
817 		if (!interp_elf_phdata)
818 			goto out_free_dentry;
819 
820 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
821 		elf_ppnt = interp_elf_phdata;
822 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
823 			switch (elf_ppnt->p_type) {
824 			case PT_LOPROC ... PT_HIPROC:
825 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
826 							  elf_ppnt, interpreter,
827 							  true, &arch_state);
828 				if (retval)
829 					goto out_free_dentry;
830 				break;
831 			}
832 	}
833 
834 	/*
835 	 * Allow arch code to reject the ELF at this point, whilst it's
836 	 * still possible to return an error to the code that invoked
837 	 * the exec syscall.
838 	 */
839 	retval = arch_check_elf(&loc->elf_ex,
840 				!!interpreter, &loc->interp_elf_ex,
841 				&arch_state);
842 	if (retval)
843 		goto out_free_dentry;
844 
845 	/* Flush all traces of the currently running executable */
846 	retval = flush_old_exec(bprm);
847 	if (retval)
848 		goto out_free_dentry;
849 
850 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
851 	   may depend on the personality.  */
852 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
853 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
854 		current->personality |= READ_IMPLIES_EXEC;
855 
856 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
857 		current->flags |= PF_RANDOMIZE;
858 
859 	setup_new_exec(bprm);
860 	install_exec_creds(bprm);
861 
862 	/* Do this so that we can load the interpreter, if need be.  We will
863 	   change some of these later */
864 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
865 				 executable_stack);
866 	if (retval < 0)
867 		goto out_free_dentry;
868 
869 	elf_bss = 0;
870 	elf_brk = 0;
871 
872 	start_code = ~0UL;
873 	end_code = 0;
874 	start_data = 0;
875 	end_data = 0;
876 
877 	/* Now we do a little grungy work by mmapping the ELF image into
878 	   the correct location in memory. */
879 	for(i = 0, elf_ppnt = elf_phdata;
880 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
881 		int elf_prot, elf_flags;
882 		unsigned long k, vaddr;
883 		unsigned long total_size = 0;
884 
885 		if (elf_ppnt->p_type != PT_LOAD)
886 			continue;
887 
888 		if (unlikely (elf_brk > elf_bss)) {
889 			unsigned long nbyte;
890 
891 			/* There was a PT_LOAD segment with p_memsz > p_filesz
892 			   before this one. Map anonymous pages, if needed,
893 			   and clear the area.  */
894 			retval = set_brk(elf_bss + load_bias,
895 					 elf_brk + load_bias,
896 					 bss_prot);
897 			if (retval)
898 				goto out_free_dentry;
899 			nbyte = ELF_PAGEOFFSET(elf_bss);
900 			if (nbyte) {
901 				nbyte = ELF_MIN_ALIGN - nbyte;
902 				if (nbyte > elf_brk - elf_bss)
903 					nbyte = elf_brk - elf_bss;
904 				if (clear_user((void __user *)elf_bss +
905 							load_bias, nbyte)) {
906 					/*
907 					 * This bss-zeroing can fail if the ELF
908 					 * file specifies odd protections. So
909 					 * we don't check the return value
910 					 */
911 				}
912 			}
913 		}
914 
915 		elf_prot = make_prot(elf_ppnt->p_flags);
916 
917 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
918 
919 		vaddr = elf_ppnt->p_vaddr;
920 		/*
921 		 * If we are loading ET_EXEC or we have already performed
922 		 * the ET_DYN load_addr calculations, proceed normally.
923 		 */
924 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
925 			elf_flags |= MAP_FIXED;
926 		} else if (loc->elf_ex.e_type == ET_DYN) {
927 			/*
928 			 * This logic is run once for the first LOAD Program
929 			 * Header for ET_DYN binaries to calculate the
930 			 * randomization (load_bias) for all the LOAD
931 			 * Program Headers, and to calculate the entire
932 			 * size of the ELF mapping (total_size). (Note that
933 			 * load_addr_set is set to true later once the
934 			 * initial mapping is performed.)
935 			 *
936 			 * There are effectively two types of ET_DYN
937 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
938 			 * and loaders (ET_DYN without INTERP, since they
939 			 * _are_ the ELF interpreter). The loaders must
940 			 * be loaded away from programs since the program
941 			 * may otherwise collide with the loader (especially
942 			 * for ET_EXEC which does not have a randomized
943 			 * position). For example to handle invocations of
944 			 * "./ld.so someprog" to test out a new version of
945 			 * the loader, the subsequent program that the
946 			 * loader loads must avoid the loader itself, so
947 			 * they cannot share the same load range. Sufficient
948 			 * room for the brk must be allocated with the
949 			 * loader as well, since brk must be available with
950 			 * the loader.
951 			 *
952 			 * Therefore, programs are loaded offset from
953 			 * ELF_ET_DYN_BASE and loaders are loaded into the
954 			 * independently randomized mmap region (0 load_bias
955 			 * without MAP_FIXED).
956 			 */
957 			if (interpreter) {
958 				load_bias = ELF_ET_DYN_BASE;
959 				if (current->flags & PF_RANDOMIZE)
960 					load_bias += arch_mmap_rnd();
961 				elf_flags |= MAP_FIXED;
962 			} else
963 				load_bias = 0;
964 
965 			/*
966 			 * Since load_bias is used for all subsequent loading
967 			 * calculations, we must lower it by the first vaddr
968 			 * so that the remaining calculations based on the
969 			 * ELF vaddrs will be correctly offset. The result
970 			 * is then page aligned.
971 			 */
972 			load_bias = ELF_PAGESTART(load_bias - vaddr);
973 
974 			total_size = total_mapping_size(elf_phdata,
975 							loc->elf_ex.e_phnum);
976 			if (!total_size) {
977 				retval = -EINVAL;
978 				goto out_free_dentry;
979 			}
980 		}
981 
982 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
983 				elf_prot, elf_flags, total_size);
984 		if (BAD_ADDR(error)) {
985 			retval = IS_ERR((void *)error) ?
986 				PTR_ERR((void*)error) : -EINVAL;
987 			goto out_free_dentry;
988 		}
989 
990 		if (!load_addr_set) {
991 			load_addr_set = 1;
992 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
993 			if (loc->elf_ex.e_type == ET_DYN) {
994 				load_bias += error -
995 				             ELF_PAGESTART(load_bias + vaddr);
996 				load_addr += load_bias;
997 				reloc_func_desc = load_bias;
998 			}
999 		}
1000 		k = elf_ppnt->p_vaddr;
1001 		if (k < start_code)
1002 			start_code = k;
1003 		if (start_data < k)
1004 			start_data = k;
1005 
1006 		/*
1007 		 * Check to see if the section's size will overflow the
1008 		 * allowed task size. Note that p_filesz must always be
1009 		 * <= p_memsz so it is only necessary to check p_memsz.
1010 		 */
1011 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1012 		    elf_ppnt->p_memsz > TASK_SIZE ||
1013 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1014 			/* set_brk can never work. Avoid overflows. */
1015 			retval = -EINVAL;
1016 			goto out_free_dentry;
1017 		}
1018 
1019 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1020 
1021 		if (k > elf_bss)
1022 			elf_bss = k;
1023 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1024 			end_code = k;
1025 		if (end_data < k)
1026 			end_data = k;
1027 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1028 		if (k > elf_brk) {
1029 			bss_prot = elf_prot;
1030 			elf_brk = k;
1031 		}
1032 	}
1033 
1034 	loc->elf_ex.e_entry += load_bias;
1035 	elf_bss += load_bias;
1036 	elf_brk += load_bias;
1037 	start_code += load_bias;
1038 	end_code += load_bias;
1039 	start_data += load_bias;
1040 	end_data += load_bias;
1041 
1042 	/* Calling set_brk effectively mmaps the pages that we need
1043 	 * for the bss and break sections.  We must do this before
1044 	 * mapping in the interpreter, to make sure it doesn't wind
1045 	 * up getting placed where the bss needs to go.
1046 	 */
1047 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1048 	if (retval)
1049 		goto out_free_dentry;
1050 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1051 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1052 		goto out_free_dentry;
1053 	}
1054 
1055 	if (interpreter) {
1056 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1057 					    interpreter,
1058 					    load_bias, interp_elf_phdata);
1059 		if (!IS_ERR((void *)elf_entry)) {
1060 			/*
1061 			 * load_elf_interp() returns relocation
1062 			 * adjustment
1063 			 */
1064 			interp_load_addr = elf_entry;
1065 			elf_entry += loc->interp_elf_ex.e_entry;
1066 		}
1067 		if (BAD_ADDR(elf_entry)) {
1068 			retval = IS_ERR((void *)elf_entry) ?
1069 					(int)elf_entry : -EINVAL;
1070 			goto out_free_dentry;
1071 		}
1072 		reloc_func_desc = interp_load_addr;
1073 
1074 		allow_write_access(interpreter);
1075 		fput(interpreter);
1076 	} else {
1077 		elf_entry = loc->elf_ex.e_entry;
1078 		if (BAD_ADDR(elf_entry)) {
1079 			retval = -EINVAL;
1080 			goto out_free_dentry;
1081 		}
1082 	}
1083 
1084 	kfree(interp_elf_phdata);
1085 	kfree(elf_phdata);
1086 
1087 	set_binfmt(&elf_format);
1088 
1089 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1090 	retval = arch_setup_additional_pages(bprm, !!interpreter);
1091 	if (retval < 0)
1092 		goto out;
1093 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1094 
1095 	retval = create_elf_tables(bprm, &loc->elf_ex,
1096 			  load_addr, interp_load_addr);
1097 	if (retval < 0)
1098 		goto out;
1099 	current->mm->end_code = end_code;
1100 	current->mm->start_code = start_code;
1101 	current->mm->start_data = start_data;
1102 	current->mm->end_data = end_data;
1103 	current->mm->start_stack = bprm->p;
1104 
1105 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1106 		/*
1107 		 * For architectures with ELF randomization, when executing
1108 		 * a loader directly (i.e. no interpreter listed in ELF
1109 		 * headers), move the brk area out of the mmap region
1110 		 * (since it grows up, and may collide early with the stack
1111 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1112 		 */
1113 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1114 		    loc->elf_ex.e_type == ET_DYN && !interpreter)
1115 			current->mm->brk = current->mm->start_brk =
1116 				ELF_ET_DYN_BASE;
1117 
1118 		current->mm->brk = current->mm->start_brk =
1119 			arch_randomize_brk(current->mm);
1120 #ifdef compat_brk_randomized
1121 		current->brk_randomized = 1;
1122 #endif
1123 	}
1124 
1125 	if (current->personality & MMAP_PAGE_ZERO) {
1126 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1127 		   and some applications "depend" upon this behavior.
1128 		   Since we do not have the power to recompile these, we
1129 		   emulate the SVr4 behavior. Sigh. */
1130 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1131 				MAP_FIXED | MAP_PRIVATE, 0);
1132 	}
1133 
1134 	regs = current_pt_regs();
1135 #ifdef ELF_PLAT_INIT
1136 	/*
1137 	 * The ABI may specify that certain registers be set up in special
1138 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1139 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1140 	 * that the e_entry field is the address of the function descriptor
1141 	 * for the startup routine, rather than the address of the startup
1142 	 * routine itself.  This macro performs whatever initialization to
1143 	 * the regs structure is required as well as any relocations to the
1144 	 * function descriptor entries when executing dynamically links apps.
1145 	 */
1146 	ELF_PLAT_INIT(regs, reloc_func_desc);
1147 #endif
1148 
1149 	finalize_exec(bprm);
1150 	start_thread(regs, elf_entry, bprm->p);
1151 	retval = 0;
1152 out:
1153 	kfree(loc);
1154 out_ret:
1155 	return retval;
1156 
1157 	/* error cleanup */
1158 out_free_dentry:
1159 	kfree(interp_elf_phdata);
1160 	allow_write_access(interpreter);
1161 	if (interpreter)
1162 		fput(interpreter);
1163 out_free_ph:
1164 	kfree(elf_phdata);
1165 	goto out;
1166 }
1167 
1168 #ifdef CONFIG_USELIB
1169 /* This is really simpleminded and specialized - we are loading an
1170    a.out library that is given an ELF header. */
1171 static int load_elf_library(struct file *file)
1172 {
1173 	struct elf_phdr *elf_phdata;
1174 	struct elf_phdr *eppnt;
1175 	unsigned long elf_bss, bss, len;
1176 	int retval, error, i, j;
1177 	struct elfhdr elf_ex;
1178 
1179 	error = -ENOEXEC;
1180 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1181 	if (retval < 0)
1182 		goto out;
1183 
1184 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1185 		goto out;
1186 
1187 	/* First of all, some simple consistency checks */
1188 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1189 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1190 		goto out;
1191 	if (elf_check_fdpic(&elf_ex))
1192 		goto out;
1193 
1194 	/* Now read in all of the header information */
1195 
1196 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1197 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1198 
1199 	error = -ENOMEM;
1200 	elf_phdata = kmalloc(j, GFP_KERNEL);
1201 	if (!elf_phdata)
1202 		goto out;
1203 
1204 	eppnt = elf_phdata;
1205 	error = -ENOEXEC;
1206 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1207 	if (retval < 0)
1208 		goto out_free_ph;
1209 
1210 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1211 		if ((eppnt + i)->p_type == PT_LOAD)
1212 			j++;
1213 	if (j != 1)
1214 		goto out_free_ph;
1215 
1216 	while (eppnt->p_type != PT_LOAD)
1217 		eppnt++;
1218 
1219 	/* Now use mmap to map the library into memory. */
1220 	error = vm_mmap(file,
1221 			ELF_PAGESTART(eppnt->p_vaddr),
1222 			(eppnt->p_filesz +
1223 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1224 			PROT_READ | PROT_WRITE | PROT_EXEC,
1225 			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1226 			(eppnt->p_offset -
1227 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1228 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1229 		goto out_free_ph;
1230 
1231 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1232 	if (padzero(elf_bss)) {
1233 		error = -EFAULT;
1234 		goto out_free_ph;
1235 	}
1236 
1237 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1238 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1239 	if (bss > len) {
1240 		error = vm_brk(len, bss - len);
1241 		if (error)
1242 			goto out_free_ph;
1243 	}
1244 	error = 0;
1245 
1246 out_free_ph:
1247 	kfree(elf_phdata);
1248 out:
1249 	return error;
1250 }
1251 #endif /* #ifdef CONFIG_USELIB */
1252 
1253 #ifdef CONFIG_ELF_CORE
1254 /*
1255  * ELF core dumper
1256  *
1257  * Modelled on fs/exec.c:aout_core_dump()
1258  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1259  */
1260 
1261 /*
1262  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1263  * that are useful for post-mortem analysis are included in every core dump.
1264  * In that way we ensure that the core dump is fully interpretable later
1265  * without matching up the same kernel and hardware config to see what PC values
1266  * meant. These special mappings include - vDSO, vsyscall, and other
1267  * architecture specific mappings
1268  */
1269 static bool always_dump_vma(struct vm_area_struct *vma)
1270 {
1271 	/* Any vsyscall mappings? */
1272 	if (vma == get_gate_vma(vma->vm_mm))
1273 		return true;
1274 
1275 	/*
1276 	 * Assume that all vmas with a .name op should always be dumped.
1277 	 * If this changes, a new vm_ops field can easily be added.
1278 	 */
1279 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1280 		return true;
1281 
1282 	/*
1283 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1284 	 * such as vDSO sections.
1285 	 */
1286 	if (arch_vma_name(vma))
1287 		return true;
1288 
1289 	return false;
1290 }
1291 
1292 /*
1293  * Decide what to dump of a segment, part, all or none.
1294  */
1295 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1296 				   unsigned long mm_flags)
1297 {
1298 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1299 
1300 	/* always dump the vdso and vsyscall sections */
1301 	if (always_dump_vma(vma))
1302 		goto whole;
1303 
1304 	if (vma->vm_flags & VM_DONTDUMP)
1305 		return 0;
1306 
1307 	/* support for DAX */
1308 	if (vma_is_dax(vma)) {
1309 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1310 			goto whole;
1311 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1312 			goto whole;
1313 		return 0;
1314 	}
1315 
1316 	/* Hugetlb memory check */
1317 	if (vma->vm_flags & VM_HUGETLB) {
1318 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1319 			goto whole;
1320 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1321 			goto whole;
1322 		return 0;
1323 	}
1324 
1325 	/* Do not dump I/O mapped devices or special mappings */
1326 	if (vma->vm_flags & VM_IO)
1327 		return 0;
1328 
1329 	/* By default, dump shared memory if mapped from an anonymous file. */
1330 	if (vma->vm_flags & VM_SHARED) {
1331 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1332 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1333 			goto whole;
1334 		return 0;
1335 	}
1336 
1337 	/* Dump segments that have been written to.  */
1338 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1339 		goto whole;
1340 	if (vma->vm_file == NULL)
1341 		return 0;
1342 
1343 	if (FILTER(MAPPED_PRIVATE))
1344 		goto whole;
1345 
1346 	/*
1347 	 * If this looks like the beginning of a DSO or executable mapping,
1348 	 * check for an ELF header.  If we find one, dump the first page to
1349 	 * aid in determining what was mapped here.
1350 	 */
1351 	if (FILTER(ELF_HEADERS) &&
1352 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1353 		u32 __user *header = (u32 __user *) vma->vm_start;
1354 		u32 word;
1355 		mm_segment_t fs = get_fs();
1356 		/*
1357 		 * Doing it this way gets the constant folded by GCC.
1358 		 */
1359 		union {
1360 			u32 cmp;
1361 			char elfmag[SELFMAG];
1362 		} magic;
1363 		BUILD_BUG_ON(SELFMAG != sizeof word);
1364 		magic.elfmag[EI_MAG0] = ELFMAG0;
1365 		magic.elfmag[EI_MAG1] = ELFMAG1;
1366 		magic.elfmag[EI_MAG2] = ELFMAG2;
1367 		magic.elfmag[EI_MAG3] = ELFMAG3;
1368 		/*
1369 		 * Switch to the user "segment" for get_user(),
1370 		 * then put back what elf_core_dump() had in place.
1371 		 */
1372 		set_fs(USER_DS);
1373 		if (unlikely(get_user(word, header)))
1374 			word = 0;
1375 		set_fs(fs);
1376 		if (word == magic.cmp)
1377 			return PAGE_SIZE;
1378 	}
1379 
1380 #undef	FILTER
1381 
1382 	return 0;
1383 
1384 whole:
1385 	return vma->vm_end - vma->vm_start;
1386 }
1387 
1388 /* An ELF note in memory */
1389 struct memelfnote
1390 {
1391 	const char *name;
1392 	int type;
1393 	unsigned int datasz;
1394 	void *data;
1395 };
1396 
1397 static int notesize(struct memelfnote *en)
1398 {
1399 	int sz;
1400 
1401 	sz = sizeof(struct elf_note);
1402 	sz += roundup(strlen(en->name) + 1, 4);
1403 	sz += roundup(en->datasz, 4);
1404 
1405 	return sz;
1406 }
1407 
1408 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1409 {
1410 	struct elf_note en;
1411 	en.n_namesz = strlen(men->name) + 1;
1412 	en.n_descsz = men->datasz;
1413 	en.n_type = men->type;
1414 
1415 	return dump_emit(cprm, &en, sizeof(en)) &&
1416 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1417 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1418 }
1419 
1420 static void fill_elf_header(struct elfhdr *elf, int segs,
1421 			    u16 machine, u32 flags)
1422 {
1423 	memset(elf, 0, sizeof(*elf));
1424 
1425 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1426 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1427 	elf->e_ident[EI_DATA] = ELF_DATA;
1428 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1429 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1430 
1431 	elf->e_type = ET_CORE;
1432 	elf->e_machine = machine;
1433 	elf->e_version = EV_CURRENT;
1434 	elf->e_phoff = sizeof(struct elfhdr);
1435 	elf->e_flags = flags;
1436 	elf->e_ehsize = sizeof(struct elfhdr);
1437 	elf->e_phentsize = sizeof(struct elf_phdr);
1438 	elf->e_phnum = segs;
1439 }
1440 
1441 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1442 {
1443 	phdr->p_type = PT_NOTE;
1444 	phdr->p_offset = offset;
1445 	phdr->p_vaddr = 0;
1446 	phdr->p_paddr = 0;
1447 	phdr->p_filesz = sz;
1448 	phdr->p_memsz = 0;
1449 	phdr->p_flags = 0;
1450 	phdr->p_align = 0;
1451 }
1452 
1453 static void fill_note(struct memelfnote *note, const char *name, int type,
1454 		unsigned int sz, void *data)
1455 {
1456 	note->name = name;
1457 	note->type = type;
1458 	note->datasz = sz;
1459 	note->data = data;
1460 }
1461 
1462 /*
1463  * fill up all the fields in prstatus from the given task struct, except
1464  * registers which need to be filled up separately.
1465  */
1466 static void fill_prstatus(struct elf_prstatus *prstatus,
1467 		struct task_struct *p, long signr)
1468 {
1469 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1470 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1471 	prstatus->pr_sighold = p->blocked.sig[0];
1472 	rcu_read_lock();
1473 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1474 	rcu_read_unlock();
1475 	prstatus->pr_pid = task_pid_vnr(p);
1476 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1477 	prstatus->pr_sid = task_session_vnr(p);
1478 	if (thread_group_leader(p)) {
1479 		struct task_cputime cputime;
1480 
1481 		/*
1482 		 * This is the record for the group leader.  It shows the
1483 		 * group-wide total, not its individual thread total.
1484 		 */
1485 		thread_group_cputime(p, &cputime);
1486 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1487 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1488 	} else {
1489 		u64 utime, stime;
1490 
1491 		task_cputime(p, &utime, &stime);
1492 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1493 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1494 	}
1495 
1496 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1497 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1498 }
1499 
1500 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1501 		       struct mm_struct *mm)
1502 {
1503 	const struct cred *cred;
1504 	unsigned int i, len;
1505 
1506 	/* first copy the parameters from user space */
1507 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1508 
1509 	len = mm->arg_end - mm->arg_start;
1510 	if (len >= ELF_PRARGSZ)
1511 		len = ELF_PRARGSZ-1;
1512 	if (copy_from_user(&psinfo->pr_psargs,
1513 		           (const char __user *)mm->arg_start, len))
1514 		return -EFAULT;
1515 	for(i = 0; i < len; i++)
1516 		if (psinfo->pr_psargs[i] == 0)
1517 			psinfo->pr_psargs[i] = ' ';
1518 	psinfo->pr_psargs[len] = 0;
1519 
1520 	rcu_read_lock();
1521 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1522 	rcu_read_unlock();
1523 	psinfo->pr_pid = task_pid_vnr(p);
1524 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1525 	psinfo->pr_sid = task_session_vnr(p);
1526 
1527 	i = p->state ? ffz(~p->state) + 1 : 0;
1528 	psinfo->pr_state = i;
1529 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1530 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1531 	psinfo->pr_nice = task_nice(p);
1532 	psinfo->pr_flag = p->flags;
1533 	rcu_read_lock();
1534 	cred = __task_cred(p);
1535 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1536 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1537 	rcu_read_unlock();
1538 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1539 
1540 	return 0;
1541 }
1542 
1543 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1544 {
1545 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1546 	int i = 0;
1547 	do
1548 		i += 2;
1549 	while (auxv[i - 2] != AT_NULL);
1550 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1551 }
1552 
1553 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1554 		const kernel_siginfo_t *siginfo)
1555 {
1556 	mm_segment_t old_fs = get_fs();
1557 	set_fs(KERNEL_DS);
1558 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1559 	set_fs(old_fs);
1560 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1561 }
1562 
1563 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1564 /*
1565  * Format of NT_FILE note:
1566  *
1567  * long count     -- how many files are mapped
1568  * long page_size -- units for file_ofs
1569  * array of [COUNT] elements of
1570  *   long start
1571  *   long end
1572  *   long file_ofs
1573  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1574  */
1575 static int fill_files_note(struct memelfnote *note)
1576 {
1577 	struct vm_area_struct *vma;
1578 	unsigned count, size, names_ofs, remaining, n;
1579 	user_long_t *data;
1580 	user_long_t *start_end_ofs;
1581 	char *name_base, *name_curpos;
1582 
1583 	/* *Estimated* file count and total data size needed */
1584 	count = current->mm->map_count;
1585 	if (count > UINT_MAX / 64)
1586 		return -EINVAL;
1587 	size = count * 64;
1588 
1589 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1590  alloc:
1591 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1592 		return -EINVAL;
1593 	size = round_up(size, PAGE_SIZE);
1594 	data = kvmalloc(size, GFP_KERNEL);
1595 	if (ZERO_OR_NULL_PTR(data))
1596 		return -ENOMEM;
1597 
1598 	start_end_ofs = data + 2;
1599 	name_base = name_curpos = ((char *)data) + names_ofs;
1600 	remaining = size - names_ofs;
1601 	count = 0;
1602 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1603 		struct file *file;
1604 		const char *filename;
1605 
1606 		file = vma->vm_file;
1607 		if (!file)
1608 			continue;
1609 		filename = file_path(file, name_curpos, remaining);
1610 		if (IS_ERR(filename)) {
1611 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1612 				kvfree(data);
1613 				size = size * 5 / 4;
1614 				goto alloc;
1615 			}
1616 			continue;
1617 		}
1618 
1619 		/* file_path() fills at the end, move name down */
1620 		/* n = strlen(filename) + 1: */
1621 		n = (name_curpos + remaining) - filename;
1622 		remaining = filename - name_curpos;
1623 		memmove(name_curpos, filename, n);
1624 		name_curpos += n;
1625 
1626 		*start_end_ofs++ = vma->vm_start;
1627 		*start_end_ofs++ = vma->vm_end;
1628 		*start_end_ofs++ = vma->vm_pgoff;
1629 		count++;
1630 	}
1631 
1632 	/* Now we know exact count of files, can store it */
1633 	data[0] = count;
1634 	data[1] = PAGE_SIZE;
1635 	/*
1636 	 * Count usually is less than current->mm->map_count,
1637 	 * we need to move filenames down.
1638 	 */
1639 	n = current->mm->map_count - count;
1640 	if (n != 0) {
1641 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1642 		memmove(name_base - shift_bytes, name_base,
1643 			name_curpos - name_base);
1644 		name_curpos -= shift_bytes;
1645 	}
1646 
1647 	size = name_curpos - (char *)data;
1648 	fill_note(note, "CORE", NT_FILE, size, data);
1649 	return 0;
1650 }
1651 
1652 #ifdef CORE_DUMP_USE_REGSET
1653 #include <linux/regset.h>
1654 
1655 struct elf_thread_core_info {
1656 	struct elf_thread_core_info *next;
1657 	struct task_struct *task;
1658 	struct elf_prstatus prstatus;
1659 	struct memelfnote notes[0];
1660 };
1661 
1662 struct elf_note_info {
1663 	struct elf_thread_core_info *thread;
1664 	struct memelfnote psinfo;
1665 	struct memelfnote signote;
1666 	struct memelfnote auxv;
1667 	struct memelfnote files;
1668 	user_siginfo_t csigdata;
1669 	size_t size;
1670 	int thread_notes;
1671 };
1672 
1673 /*
1674  * When a regset has a writeback hook, we call it on each thread before
1675  * dumping user memory.  On register window machines, this makes sure the
1676  * user memory backing the register data is up to date before we read it.
1677  */
1678 static void do_thread_regset_writeback(struct task_struct *task,
1679 				       const struct user_regset *regset)
1680 {
1681 	if (regset->writeback)
1682 		regset->writeback(task, regset, 1);
1683 }
1684 
1685 #ifndef PRSTATUS_SIZE
1686 #define PRSTATUS_SIZE(S, R) sizeof(S)
1687 #endif
1688 
1689 #ifndef SET_PR_FPVALID
1690 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1691 #endif
1692 
1693 static int fill_thread_core_info(struct elf_thread_core_info *t,
1694 				 const struct user_regset_view *view,
1695 				 long signr, size_t *total)
1696 {
1697 	unsigned int i;
1698 	unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1699 
1700 	/*
1701 	 * NT_PRSTATUS is the one special case, because the regset data
1702 	 * goes into the pr_reg field inside the note contents, rather
1703 	 * than being the whole note contents.  We fill the reset in here.
1704 	 * We assume that regset 0 is NT_PRSTATUS.
1705 	 */
1706 	fill_prstatus(&t->prstatus, t->task, signr);
1707 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1708 				    &t->prstatus.pr_reg, NULL);
1709 
1710 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1711 		  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1712 	*total += notesize(&t->notes[0]);
1713 
1714 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1715 
1716 	/*
1717 	 * Each other regset might generate a note too.  For each regset
1718 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1719 	 * all zero and we'll know to skip writing it later.
1720 	 */
1721 	for (i = 1; i < view->n; ++i) {
1722 		const struct user_regset *regset = &view->regsets[i];
1723 		do_thread_regset_writeback(t->task, regset);
1724 		if (regset->core_note_type && regset->get &&
1725 		    (!regset->active || regset->active(t->task, regset) > 0)) {
1726 			int ret;
1727 			size_t size = regset_size(t->task, regset);
1728 			void *data = kmalloc(size, GFP_KERNEL);
1729 			if (unlikely(!data))
1730 				return 0;
1731 			ret = regset->get(t->task, regset,
1732 					  0, size, data, NULL);
1733 			if (unlikely(ret))
1734 				kfree(data);
1735 			else {
1736 				if (regset->core_note_type != NT_PRFPREG)
1737 					fill_note(&t->notes[i], "LINUX",
1738 						  regset->core_note_type,
1739 						  size, data);
1740 				else {
1741 					SET_PR_FPVALID(&t->prstatus,
1742 							1, regset0_size);
1743 					fill_note(&t->notes[i], "CORE",
1744 						  NT_PRFPREG, size, data);
1745 				}
1746 				*total += notesize(&t->notes[i]);
1747 			}
1748 		}
1749 	}
1750 
1751 	return 1;
1752 }
1753 
1754 static int fill_note_info(struct elfhdr *elf, int phdrs,
1755 			  struct elf_note_info *info,
1756 			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1757 {
1758 	struct task_struct *dump_task = current;
1759 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1760 	struct elf_thread_core_info *t;
1761 	struct elf_prpsinfo *psinfo;
1762 	struct core_thread *ct;
1763 	unsigned int i;
1764 
1765 	info->size = 0;
1766 	info->thread = NULL;
1767 
1768 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1769 	if (psinfo == NULL) {
1770 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1771 		return 0;
1772 	}
1773 
1774 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1775 
1776 	/*
1777 	 * Figure out how many notes we're going to need for each thread.
1778 	 */
1779 	info->thread_notes = 0;
1780 	for (i = 0; i < view->n; ++i)
1781 		if (view->regsets[i].core_note_type != 0)
1782 			++info->thread_notes;
1783 
1784 	/*
1785 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1786 	 * since it is our one special case.
1787 	 */
1788 	if (unlikely(info->thread_notes == 0) ||
1789 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1790 		WARN_ON(1);
1791 		return 0;
1792 	}
1793 
1794 	/*
1795 	 * Initialize the ELF file header.
1796 	 */
1797 	fill_elf_header(elf, phdrs,
1798 			view->e_machine, view->e_flags);
1799 
1800 	/*
1801 	 * Allocate a structure for each thread.
1802 	 */
1803 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1804 		t = kzalloc(offsetof(struct elf_thread_core_info,
1805 				     notes[info->thread_notes]),
1806 			    GFP_KERNEL);
1807 		if (unlikely(!t))
1808 			return 0;
1809 
1810 		t->task = ct->task;
1811 		if (ct->task == dump_task || !info->thread) {
1812 			t->next = info->thread;
1813 			info->thread = t;
1814 		} else {
1815 			/*
1816 			 * Make sure to keep the original task at
1817 			 * the head of the list.
1818 			 */
1819 			t->next = info->thread->next;
1820 			info->thread->next = t;
1821 		}
1822 	}
1823 
1824 	/*
1825 	 * Now fill in each thread's information.
1826 	 */
1827 	for (t = info->thread; t != NULL; t = t->next)
1828 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1829 			return 0;
1830 
1831 	/*
1832 	 * Fill in the two process-wide notes.
1833 	 */
1834 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1835 	info->size += notesize(&info->psinfo);
1836 
1837 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1838 	info->size += notesize(&info->signote);
1839 
1840 	fill_auxv_note(&info->auxv, current->mm);
1841 	info->size += notesize(&info->auxv);
1842 
1843 	if (fill_files_note(&info->files) == 0)
1844 		info->size += notesize(&info->files);
1845 
1846 	return 1;
1847 }
1848 
1849 static size_t get_note_info_size(struct elf_note_info *info)
1850 {
1851 	return info->size;
1852 }
1853 
1854 /*
1855  * Write all the notes for each thread.  When writing the first thread, the
1856  * process-wide notes are interleaved after the first thread-specific note.
1857  */
1858 static int write_note_info(struct elf_note_info *info,
1859 			   struct coredump_params *cprm)
1860 {
1861 	bool first = true;
1862 	struct elf_thread_core_info *t = info->thread;
1863 
1864 	do {
1865 		int i;
1866 
1867 		if (!writenote(&t->notes[0], cprm))
1868 			return 0;
1869 
1870 		if (first && !writenote(&info->psinfo, cprm))
1871 			return 0;
1872 		if (first && !writenote(&info->signote, cprm))
1873 			return 0;
1874 		if (first && !writenote(&info->auxv, cprm))
1875 			return 0;
1876 		if (first && info->files.data &&
1877 				!writenote(&info->files, cprm))
1878 			return 0;
1879 
1880 		for (i = 1; i < info->thread_notes; ++i)
1881 			if (t->notes[i].data &&
1882 			    !writenote(&t->notes[i], cprm))
1883 				return 0;
1884 
1885 		first = false;
1886 		t = t->next;
1887 	} while (t);
1888 
1889 	return 1;
1890 }
1891 
1892 static void free_note_info(struct elf_note_info *info)
1893 {
1894 	struct elf_thread_core_info *threads = info->thread;
1895 	while (threads) {
1896 		unsigned int i;
1897 		struct elf_thread_core_info *t = threads;
1898 		threads = t->next;
1899 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1900 		for (i = 1; i < info->thread_notes; ++i)
1901 			kfree(t->notes[i].data);
1902 		kfree(t);
1903 	}
1904 	kfree(info->psinfo.data);
1905 	kvfree(info->files.data);
1906 }
1907 
1908 #else
1909 
1910 /* Here is the structure in which status of each thread is captured. */
1911 struct elf_thread_status
1912 {
1913 	struct list_head list;
1914 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1915 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1916 	struct task_struct *thread;
1917 #ifdef ELF_CORE_COPY_XFPREGS
1918 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1919 #endif
1920 	struct memelfnote notes[3];
1921 	int num_notes;
1922 };
1923 
1924 /*
1925  * In order to add the specific thread information for the elf file format,
1926  * we need to keep a linked list of every threads pr_status and then create
1927  * a single section for them in the final core file.
1928  */
1929 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1930 {
1931 	int sz = 0;
1932 	struct task_struct *p = t->thread;
1933 	t->num_notes = 0;
1934 
1935 	fill_prstatus(&t->prstatus, p, signr);
1936 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1937 
1938 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1939 		  &(t->prstatus));
1940 	t->num_notes++;
1941 	sz += notesize(&t->notes[0]);
1942 
1943 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1944 								&t->fpu))) {
1945 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1946 			  &(t->fpu));
1947 		t->num_notes++;
1948 		sz += notesize(&t->notes[1]);
1949 	}
1950 
1951 #ifdef ELF_CORE_COPY_XFPREGS
1952 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1953 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1954 			  sizeof(t->xfpu), &t->xfpu);
1955 		t->num_notes++;
1956 		sz += notesize(&t->notes[2]);
1957 	}
1958 #endif
1959 	return sz;
1960 }
1961 
1962 struct elf_note_info {
1963 	struct memelfnote *notes;
1964 	struct memelfnote *notes_files;
1965 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1966 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1967 	struct list_head thread_list;
1968 	elf_fpregset_t *fpu;
1969 #ifdef ELF_CORE_COPY_XFPREGS
1970 	elf_fpxregset_t *xfpu;
1971 #endif
1972 	user_siginfo_t csigdata;
1973 	int thread_status_size;
1974 	int numnote;
1975 };
1976 
1977 static int elf_note_info_init(struct elf_note_info *info)
1978 {
1979 	memset(info, 0, sizeof(*info));
1980 	INIT_LIST_HEAD(&info->thread_list);
1981 
1982 	/* Allocate space for ELF notes */
1983 	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
1984 	if (!info->notes)
1985 		return 0;
1986 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1987 	if (!info->psinfo)
1988 		return 0;
1989 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1990 	if (!info->prstatus)
1991 		return 0;
1992 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1993 	if (!info->fpu)
1994 		return 0;
1995 #ifdef ELF_CORE_COPY_XFPREGS
1996 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1997 	if (!info->xfpu)
1998 		return 0;
1999 #endif
2000 	return 1;
2001 }
2002 
2003 static int fill_note_info(struct elfhdr *elf, int phdrs,
2004 			  struct elf_note_info *info,
2005 			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2006 {
2007 	struct core_thread *ct;
2008 	struct elf_thread_status *ets;
2009 
2010 	if (!elf_note_info_init(info))
2011 		return 0;
2012 
2013 	for (ct = current->mm->core_state->dumper.next;
2014 					ct; ct = ct->next) {
2015 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2016 		if (!ets)
2017 			return 0;
2018 
2019 		ets->thread = ct->task;
2020 		list_add(&ets->list, &info->thread_list);
2021 	}
2022 
2023 	list_for_each_entry(ets, &info->thread_list, list) {
2024 		int sz;
2025 
2026 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2027 		info->thread_status_size += sz;
2028 	}
2029 	/* now collect the dump for the current */
2030 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2031 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2032 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2033 
2034 	/* Set up header */
2035 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2036 
2037 	/*
2038 	 * Set up the notes in similar form to SVR4 core dumps made
2039 	 * with info from their /proc.
2040 	 */
2041 
2042 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2043 		  sizeof(*info->prstatus), info->prstatus);
2044 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2045 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2046 		  sizeof(*info->psinfo), info->psinfo);
2047 
2048 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2049 	fill_auxv_note(info->notes + 3, current->mm);
2050 	info->numnote = 4;
2051 
2052 	if (fill_files_note(info->notes + info->numnote) == 0) {
2053 		info->notes_files = info->notes + info->numnote;
2054 		info->numnote++;
2055 	}
2056 
2057 	/* Try to dump the FPU. */
2058 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2059 							       info->fpu);
2060 	if (info->prstatus->pr_fpvalid)
2061 		fill_note(info->notes + info->numnote++,
2062 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2063 #ifdef ELF_CORE_COPY_XFPREGS
2064 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2065 		fill_note(info->notes + info->numnote++,
2066 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2067 			  sizeof(*info->xfpu), info->xfpu);
2068 #endif
2069 
2070 	return 1;
2071 }
2072 
2073 static size_t get_note_info_size(struct elf_note_info *info)
2074 {
2075 	int sz = 0;
2076 	int i;
2077 
2078 	for (i = 0; i < info->numnote; i++)
2079 		sz += notesize(info->notes + i);
2080 
2081 	sz += info->thread_status_size;
2082 
2083 	return sz;
2084 }
2085 
2086 static int write_note_info(struct elf_note_info *info,
2087 			   struct coredump_params *cprm)
2088 {
2089 	struct elf_thread_status *ets;
2090 	int i;
2091 
2092 	for (i = 0; i < info->numnote; i++)
2093 		if (!writenote(info->notes + i, cprm))
2094 			return 0;
2095 
2096 	/* write out the thread status notes section */
2097 	list_for_each_entry(ets, &info->thread_list, list) {
2098 		for (i = 0; i < ets->num_notes; i++)
2099 			if (!writenote(&ets->notes[i], cprm))
2100 				return 0;
2101 	}
2102 
2103 	return 1;
2104 }
2105 
2106 static void free_note_info(struct elf_note_info *info)
2107 {
2108 	while (!list_empty(&info->thread_list)) {
2109 		struct list_head *tmp = info->thread_list.next;
2110 		list_del(tmp);
2111 		kfree(list_entry(tmp, struct elf_thread_status, list));
2112 	}
2113 
2114 	/* Free data possibly allocated by fill_files_note(): */
2115 	if (info->notes_files)
2116 		kvfree(info->notes_files->data);
2117 
2118 	kfree(info->prstatus);
2119 	kfree(info->psinfo);
2120 	kfree(info->notes);
2121 	kfree(info->fpu);
2122 #ifdef ELF_CORE_COPY_XFPREGS
2123 	kfree(info->xfpu);
2124 #endif
2125 }
2126 
2127 #endif
2128 
2129 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2130 					struct vm_area_struct *gate_vma)
2131 {
2132 	struct vm_area_struct *ret = tsk->mm->mmap;
2133 
2134 	if (ret)
2135 		return ret;
2136 	return gate_vma;
2137 }
2138 /*
2139  * Helper function for iterating across a vma list.  It ensures that the caller
2140  * will visit `gate_vma' prior to terminating the search.
2141  */
2142 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2143 					struct vm_area_struct *gate_vma)
2144 {
2145 	struct vm_area_struct *ret;
2146 
2147 	ret = this_vma->vm_next;
2148 	if (ret)
2149 		return ret;
2150 	if (this_vma == gate_vma)
2151 		return NULL;
2152 	return gate_vma;
2153 }
2154 
2155 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2156 			     elf_addr_t e_shoff, int segs)
2157 {
2158 	elf->e_shoff = e_shoff;
2159 	elf->e_shentsize = sizeof(*shdr4extnum);
2160 	elf->e_shnum = 1;
2161 	elf->e_shstrndx = SHN_UNDEF;
2162 
2163 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2164 
2165 	shdr4extnum->sh_type = SHT_NULL;
2166 	shdr4extnum->sh_size = elf->e_shnum;
2167 	shdr4extnum->sh_link = elf->e_shstrndx;
2168 	shdr4extnum->sh_info = segs;
2169 }
2170 
2171 /*
2172  * Actual dumper
2173  *
2174  * This is a two-pass process; first we find the offsets of the bits,
2175  * and then they are actually written out.  If we run out of core limit
2176  * we just truncate.
2177  */
2178 static int elf_core_dump(struct coredump_params *cprm)
2179 {
2180 	int has_dumped = 0;
2181 	mm_segment_t fs;
2182 	int segs, i;
2183 	size_t vma_data_size = 0;
2184 	struct vm_area_struct *vma, *gate_vma;
2185 	struct elfhdr *elf = NULL;
2186 	loff_t offset = 0, dataoff;
2187 	struct elf_note_info info = { };
2188 	struct elf_phdr *phdr4note = NULL;
2189 	struct elf_shdr *shdr4extnum = NULL;
2190 	Elf_Half e_phnum;
2191 	elf_addr_t e_shoff;
2192 	elf_addr_t *vma_filesz = NULL;
2193 
2194 	/*
2195 	 * We no longer stop all VM operations.
2196 	 *
2197 	 * This is because those proceses that could possibly change map_count
2198 	 * or the mmap / vma pages are now blocked in do_exit on current
2199 	 * finishing this core dump.
2200 	 *
2201 	 * Only ptrace can touch these memory addresses, but it doesn't change
2202 	 * the map_count or the pages allocated. So no possibility of crashing
2203 	 * exists while dumping the mm->vm_next areas to the core file.
2204 	 */
2205 
2206 	/* alloc memory for large data structures: too large to be on stack */
2207 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2208 	if (!elf)
2209 		goto out;
2210 	/*
2211 	 * The number of segs are recored into ELF header as 16bit value.
2212 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2213 	 */
2214 	segs = current->mm->map_count;
2215 	segs += elf_core_extra_phdrs();
2216 
2217 	gate_vma = get_gate_vma(current->mm);
2218 	if (gate_vma != NULL)
2219 		segs++;
2220 
2221 	/* for notes section */
2222 	segs++;
2223 
2224 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2225 	 * this, kernel supports extended numbering. Have a look at
2226 	 * include/linux/elf.h for further information. */
2227 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2228 
2229 	/*
2230 	 * Collect all the non-memory information about the process for the
2231 	 * notes.  This also sets up the file header.
2232 	 */
2233 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2234 		goto cleanup;
2235 
2236 	has_dumped = 1;
2237 
2238 	fs = get_fs();
2239 	set_fs(KERNEL_DS);
2240 
2241 	offset += sizeof(*elf);				/* Elf header */
2242 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2243 
2244 	/* Write notes phdr entry */
2245 	{
2246 		size_t sz = get_note_info_size(&info);
2247 
2248 		sz += elf_coredump_extra_notes_size();
2249 
2250 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2251 		if (!phdr4note)
2252 			goto end_coredump;
2253 
2254 		fill_elf_note_phdr(phdr4note, sz, offset);
2255 		offset += sz;
2256 	}
2257 
2258 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2259 
2260 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2261 		goto end_coredump;
2262 	vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2263 			      GFP_KERNEL);
2264 	if (ZERO_OR_NULL_PTR(vma_filesz))
2265 		goto end_coredump;
2266 
2267 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2268 			vma = next_vma(vma, gate_vma)) {
2269 		unsigned long dump_size;
2270 
2271 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2272 		vma_filesz[i++] = dump_size;
2273 		vma_data_size += dump_size;
2274 	}
2275 
2276 	offset += vma_data_size;
2277 	offset += elf_core_extra_data_size();
2278 	e_shoff = offset;
2279 
2280 	if (e_phnum == PN_XNUM) {
2281 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2282 		if (!shdr4extnum)
2283 			goto end_coredump;
2284 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2285 	}
2286 
2287 	offset = dataoff;
2288 
2289 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2290 		goto end_coredump;
2291 
2292 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2293 		goto end_coredump;
2294 
2295 	/* Write program headers for segments dump */
2296 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2297 			vma = next_vma(vma, gate_vma)) {
2298 		struct elf_phdr phdr;
2299 
2300 		phdr.p_type = PT_LOAD;
2301 		phdr.p_offset = offset;
2302 		phdr.p_vaddr = vma->vm_start;
2303 		phdr.p_paddr = 0;
2304 		phdr.p_filesz = vma_filesz[i++];
2305 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2306 		offset += phdr.p_filesz;
2307 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2308 		if (vma->vm_flags & VM_WRITE)
2309 			phdr.p_flags |= PF_W;
2310 		if (vma->vm_flags & VM_EXEC)
2311 			phdr.p_flags |= PF_X;
2312 		phdr.p_align = ELF_EXEC_PAGESIZE;
2313 
2314 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2315 			goto end_coredump;
2316 	}
2317 
2318 	if (!elf_core_write_extra_phdrs(cprm, offset))
2319 		goto end_coredump;
2320 
2321  	/* write out the notes section */
2322 	if (!write_note_info(&info, cprm))
2323 		goto end_coredump;
2324 
2325 	if (elf_coredump_extra_notes_write(cprm))
2326 		goto end_coredump;
2327 
2328 	/* Align to page */
2329 	if (!dump_skip(cprm, dataoff - cprm->pos))
2330 		goto end_coredump;
2331 
2332 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2333 			vma = next_vma(vma, gate_vma)) {
2334 		unsigned long addr;
2335 		unsigned long end;
2336 
2337 		end = vma->vm_start + vma_filesz[i++];
2338 
2339 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2340 			struct page *page;
2341 			int stop;
2342 
2343 			page = get_dump_page(addr);
2344 			if (page) {
2345 				void *kaddr = kmap(page);
2346 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2347 				kunmap(page);
2348 				put_page(page);
2349 			} else
2350 				stop = !dump_skip(cprm, PAGE_SIZE);
2351 			if (stop)
2352 				goto end_coredump;
2353 		}
2354 	}
2355 	dump_truncate(cprm);
2356 
2357 	if (!elf_core_write_extra_data(cprm))
2358 		goto end_coredump;
2359 
2360 	if (e_phnum == PN_XNUM) {
2361 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2362 			goto end_coredump;
2363 	}
2364 
2365 end_coredump:
2366 	set_fs(fs);
2367 
2368 cleanup:
2369 	free_note_info(&info);
2370 	kfree(shdr4extnum);
2371 	kvfree(vma_filesz);
2372 	kfree(phdr4note);
2373 	kfree(elf);
2374 out:
2375 	return has_dumped;
2376 }
2377 
2378 #endif		/* CONFIG_ELF_CORE */
2379 
2380 static int __init init_elf_binfmt(void)
2381 {
2382 	register_binfmt(&elf_format);
2383 	return 0;
2384 }
2385 
2386 static void __exit exit_elf_binfmt(void)
2387 {
2388 	/* Remove the COFF and ELF loaders. */
2389 	unregister_binfmt(&elf_format);
2390 }
2391 
2392 core_initcall(init_elf_binfmt);
2393 module_exit(exit_elf_binfmt);
2394 MODULE_LICENSE("GPL");
2395