xref: /openbmc/linux/fs/binfmt_elf.c (revision 4da722ca19f30f7db250db808d1ab1703607a932)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/sched/task_stack.h>
40 #include <linux/sched/cputime.h>
41 #include <linux/cred.h>
42 #include <linux/dax.h>
43 #include <linux/uaccess.h>
44 #include <asm/param.h>
45 #include <asm/page.h>
46 
47 #ifndef user_long_t
48 #define user_long_t long
49 #endif
50 #ifndef user_siginfo_t
51 #define user_siginfo_t siginfo_t
52 #endif
53 
54 static int load_elf_binary(struct linux_binprm *bprm);
55 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
56 				int, int, unsigned long);
57 
58 #ifdef CONFIG_USELIB
59 static int load_elf_library(struct file *);
60 #else
61 #define load_elf_library NULL
62 #endif
63 
64 /*
65  * If we don't support core dumping, then supply a NULL so we
66  * don't even try.
67  */
68 #ifdef CONFIG_ELF_CORE
69 static int elf_core_dump(struct coredump_params *cprm);
70 #else
71 #define elf_core_dump	NULL
72 #endif
73 
74 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
75 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
76 #else
77 #define ELF_MIN_ALIGN	PAGE_SIZE
78 #endif
79 
80 #ifndef ELF_CORE_EFLAGS
81 #define ELF_CORE_EFLAGS	0
82 #endif
83 
84 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
85 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
86 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
87 
88 static struct linux_binfmt elf_format = {
89 	.module		= THIS_MODULE,
90 	.load_binary	= load_elf_binary,
91 	.load_shlib	= load_elf_library,
92 	.core_dump	= elf_core_dump,
93 	.min_coredump	= ELF_EXEC_PAGESIZE,
94 };
95 
96 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
97 
98 static int set_brk(unsigned long start, unsigned long end, int prot)
99 {
100 	start = ELF_PAGEALIGN(start);
101 	end = ELF_PAGEALIGN(end);
102 	if (end > start) {
103 		/*
104 		 * Map the last of the bss segment.
105 		 * If the header is requesting these pages to be
106 		 * executable, honour that (ppc32 needs this).
107 		 */
108 		int error = vm_brk_flags(start, end - start,
109 				prot & PROT_EXEC ? VM_EXEC : 0);
110 		if (error)
111 			return error;
112 	}
113 	current->mm->start_brk = current->mm->brk = end;
114 	return 0;
115 }
116 
117 /* We need to explicitly zero any fractional pages
118    after the data section (i.e. bss).  This would
119    contain the junk from the file that should not
120    be in memory
121  */
122 static int padzero(unsigned long elf_bss)
123 {
124 	unsigned long nbyte;
125 
126 	nbyte = ELF_PAGEOFFSET(elf_bss);
127 	if (nbyte) {
128 		nbyte = ELF_MIN_ALIGN - nbyte;
129 		if (clear_user((void __user *) elf_bss, nbyte))
130 			return -EFAULT;
131 	}
132 	return 0;
133 }
134 
135 /* Let's use some macros to make this stack manipulation a little clearer */
136 #ifdef CONFIG_STACK_GROWSUP
137 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
138 #define STACK_ROUND(sp, items) \
139 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
140 #define STACK_ALLOC(sp, len) ({ \
141 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
142 	old_sp; })
143 #else
144 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
145 #define STACK_ROUND(sp, items) \
146 	(((unsigned long) (sp - items)) &~ 15UL)
147 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
148 #endif
149 
150 #ifndef ELF_BASE_PLATFORM
151 /*
152  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
153  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
154  * will be copied to the user stack in the same manner as AT_PLATFORM.
155  */
156 #define ELF_BASE_PLATFORM NULL
157 #endif
158 
159 static int
160 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
161 		unsigned long load_addr, unsigned long interp_load_addr)
162 {
163 	unsigned long p = bprm->p;
164 	int argc = bprm->argc;
165 	int envc = bprm->envc;
166 	elf_addr_t __user *sp;
167 	elf_addr_t __user *u_platform;
168 	elf_addr_t __user *u_base_platform;
169 	elf_addr_t __user *u_rand_bytes;
170 	const char *k_platform = ELF_PLATFORM;
171 	const char *k_base_platform = ELF_BASE_PLATFORM;
172 	unsigned char k_rand_bytes[16];
173 	int items;
174 	elf_addr_t *elf_info;
175 	int ei_index = 0;
176 	const struct cred *cred = current_cred();
177 	struct vm_area_struct *vma;
178 
179 	/*
180 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
181 	 * evictions by the processes running on the same package. One
182 	 * thing we can do is to shuffle the initial stack for them.
183 	 */
184 
185 	p = arch_align_stack(p);
186 
187 	/*
188 	 * If this architecture has a platform capability string, copy it
189 	 * to userspace.  In some cases (Sparc), this info is impossible
190 	 * for userspace to get any other way, in others (i386) it is
191 	 * merely difficult.
192 	 */
193 	u_platform = NULL;
194 	if (k_platform) {
195 		size_t len = strlen(k_platform) + 1;
196 
197 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
198 		if (__copy_to_user(u_platform, k_platform, len))
199 			return -EFAULT;
200 	}
201 
202 	/*
203 	 * If this architecture has a "base" platform capability
204 	 * string, copy it to userspace.
205 	 */
206 	u_base_platform = NULL;
207 	if (k_base_platform) {
208 		size_t len = strlen(k_base_platform) + 1;
209 
210 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
211 		if (__copy_to_user(u_base_platform, k_base_platform, len))
212 			return -EFAULT;
213 	}
214 
215 	/*
216 	 * Generate 16 random bytes for userspace PRNG seeding.
217 	 */
218 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
219 	u_rand_bytes = (elf_addr_t __user *)
220 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
221 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
222 		return -EFAULT;
223 
224 	/* Create the ELF interpreter info */
225 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
226 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
227 #define NEW_AUX_ENT(id, val) \
228 	do { \
229 		elf_info[ei_index++] = id; \
230 		elf_info[ei_index++] = val; \
231 	} while (0)
232 
233 #ifdef ARCH_DLINFO
234 	/*
235 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
236 	 * AUXV.
237 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
238 	 * ARCH_DLINFO changes
239 	 */
240 	ARCH_DLINFO;
241 #endif
242 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
243 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
244 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
245 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
246 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
247 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
248 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
249 	NEW_AUX_ENT(AT_FLAGS, 0);
250 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
251 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
252 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
253 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
254 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
255  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
256 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
257 #ifdef ELF_HWCAP2
258 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
259 #endif
260 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
261 	if (k_platform) {
262 		NEW_AUX_ENT(AT_PLATFORM,
263 			    (elf_addr_t)(unsigned long)u_platform);
264 	}
265 	if (k_base_platform) {
266 		NEW_AUX_ENT(AT_BASE_PLATFORM,
267 			    (elf_addr_t)(unsigned long)u_base_platform);
268 	}
269 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
270 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
271 	}
272 #undef NEW_AUX_ENT
273 	/* AT_NULL is zero; clear the rest too */
274 	memset(&elf_info[ei_index], 0,
275 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
276 
277 	/* And advance past the AT_NULL entry.  */
278 	ei_index += 2;
279 
280 	sp = STACK_ADD(p, ei_index);
281 
282 	items = (argc + 1) + (envc + 1) + 1;
283 	bprm->p = STACK_ROUND(sp, items);
284 
285 	/* Point sp at the lowest address on the stack */
286 #ifdef CONFIG_STACK_GROWSUP
287 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
288 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
289 #else
290 	sp = (elf_addr_t __user *)bprm->p;
291 #endif
292 
293 
294 	/*
295 	 * Grow the stack manually; some architectures have a limit on how
296 	 * far ahead a user-space access may be in order to grow the stack.
297 	 */
298 	vma = find_extend_vma(current->mm, bprm->p);
299 	if (!vma)
300 		return -EFAULT;
301 
302 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
303 	if (__put_user(argc, sp++))
304 		return -EFAULT;
305 
306 	/* Populate list of argv pointers back to argv strings. */
307 	p = current->mm->arg_end = current->mm->arg_start;
308 	while (argc-- > 0) {
309 		size_t len;
310 		if (__put_user((elf_addr_t)p, sp++))
311 			return -EFAULT;
312 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 		if (!len || len > MAX_ARG_STRLEN)
314 			return -EINVAL;
315 		p += len;
316 	}
317 	if (__put_user(0, sp++))
318 		return -EFAULT;
319 	current->mm->arg_end = p;
320 
321 	/* Populate list of envp pointers back to envp strings. */
322 	current->mm->env_end = current->mm->env_start = p;
323 	while (envc-- > 0) {
324 		size_t len;
325 		if (__put_user((elf_addr_t)p, sp++))
326 			return -EFAULT;
327 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 		if (!len || len > MAX_ARG_STRLEN)
329 			return -EINVAL;
330 		p += len;
331 	}
332 	if (__put_user(0, sp++))
333 		return -EFAULT;
334 	current->mm->env_end = p;
335 
336 	/* Put the elf_info on the stack in the right place.  */
337 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
338 		return -EFAULT;
339 	return 0;
340 }
341 
342 #ifndef elf_map
343 
344 static unsigned long elf_map(struct file *filep, unsigned long addr,
345 		struct elf_phdr *eppnt, int prot, int type,
346 		unsigned long total_size)
347 {
348 	unsigned long map_addr;
349 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
350 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
351 	addr = ELF_PAGESTART(addr);
352 	size = ELF_PAGEALIGN(size);
353 
354 	/* mmap() will return -EINVAL if given a zero size, but a
355 	 * segment with zero filesize is perfectly valid */
356 	if (!size)
357 		return addr;
358 
359 	/*
360 	* total_size is the size of the ELF (interpreter) image.
361 	* The _first_ mmap needs to know the full size, otherwise
362 	* randomization might put this image into an overlapping
363 	* position with the ELF binary image. (since size < total_size)
364 	* So we first map the 'big' image - and unmap the remainder at
365 	* the end. (which unmap is needed for ELF images with holes.)
366 	*/
367 	if (total_size) {
368 		total_size = ELF_PAGEALIGN(total_size);
369 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
370 		if (!BAD_ADDR(map_addr))
371 			vm_munmap(map_addr+size, total_size-size);
372 	} else
373 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
374 
375 	return(map_addr);
376 }
377 
378 #endif /* !elf_map */
379 
380 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
381 {
382 	int i, first_idx = -1, last_idx = -1;
383 
384 	for (i = 0; i < nr; i++) {
385 		if (cmds[i].p_type == PT_LOAD) {
386 			last_idx = i;
387 			if (first_idx == -1)
388 				first_idx = i;
389 		}
390 	}
391 	if (first_idx == -1)
392 		return 0;
393 
394 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
395 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
396 }
397 
398 /**
399  * load_elf_phdrs() - load ELF program headers
400  * @elf_ex:   ELF header of the binary whose program headers should be loaded
401  * @elf_file: the opened ELF binary file
402  *
403  * Loads ELF program headers from the binary file elf_file, which has the ELF
404  * header pointed to by elf_ex, into a newly allocated array. The caller is
405  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
406  */
407 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
408 				       struct file *elf_file)
409 {
410 	struct elf_phdr *elf_phdata = NULL;
411 	int retval, size, err = -1;
412 
413 	/*
414 	 * If the size of this structure has changed, then punt, since
415 	 * we will be doing the wrong thing.
416 	 */
417 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
418 		goto out;
419 
420 	/* Sanity check the number of program headers... */
421 	if (elf_ex->e_phnum < 1 ||
422 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
423 		goto out;
424 
425 	/* ...and their total size. */
426 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
427 	if (size > ELF_MIN_ALIGN)
428 		goto out;
429 
430 	elf_phdata = kmalloc(size, GFP_KERNEL);
431 	if (!elf_phdata)
432 		goto out;
433 
434 	/* Read in the program headers */
435 	retval = kernel_read(elf_file, elf_ex->e_phoff,
436 			     (char *)elf_phdata, size);
437 	if (retval != size) {
438 		err = (retval < 0) ? retval : -EIO;
439 		goto out;
440 	}
441 
442 	/* Success! */
443 	err = 0;
444 out:
445 	if (err) {
446 		kfree(elf_phdata);
447 		elf_phdata = NULL;
448 	}
449 	return elf_phdata;
450 }
451 
452 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
453 
454 /**
455  * struct arch_elf_state - arch-specific ELF loading state
456  *
457  * This structure is used to preserve architecture specific data during
458  * the loading of an ELF file, throughout the checking of architecture
459  * specific ELF headers & through to the point where the ELF load is
460  * known to be proceeding (ie. SET_PERSONALITY).
461  *
462  * This implementation is a dummy for architectures which require no
463  * specific state.
464  */
465 struct arch_elf_state {
466 };
467 
468 #define INIT_ARCH_ELF_STATE {}
469 
470 /**
471  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
472  * @ehdr:	The main ELF header
473  * @phdr:	The program header to check
474  * @elf:	The open ELF file
475  * @is_interp:	True if the phdr is from the interpreter of the ELF being
476  *		loaded, else false.
477  * @state:	Architecture-specific state preserved throughout the process
478  *		of loading the ELF.
479  *
480  * Inspects the program header phdr to validate its correctness and/or
481  * suitability for the system. Called once per ELF program header in the
482  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
483  * interpreter.
484  *
485  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
486  *         with that return code.
487  */
488 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
489 				   struct elf_phdr *phdr,
490 				   struct file *elf, bool is_interp,
491 				   struct arch_elf_state *state)
492 {
493 	/* Dummy implementation, always proceed */
494 	return 0;
495 }
496 
497 /**
498  * arch_check_elf() - check an ELF executable
499  * @ehdr:	The main ELF header
500  * @has_interp:	True if the ELF has an interpreter, else false.
501  * @interp_ehdr: The interpreter's ELF header
502  * @state:	Architecture-specific state preserved throughout the process
503  *		of loading the ELF.
504  *
505  * Provides a final opportunity for architecture code to reject the loading
506  * of the ELF & cause an exec syscall to return an error. This is called after
507  * all program headers to be checked by arch_elf_pt_proc have been.
508  *
509  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
510  *         with that return code.
511  */
512 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
513 				 struct elfhdr *interp_ehdr,
514 				 struct arch_elf_state *state)
515 {
516 	/* Dummy implementation, always proceed */
517 	return 0;
518 }
519 
520 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
521 
522 /* This is much more generalized than the library routine read function,
523    so we keep this separate.  Technically the library read function
524    is only provided so that we can read a.out libraries that have
525    an ELF header */
526 
527 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
528 		struct file *interpreter, unsigned long *interp_map_addr,
529 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
530 {
531 	struct elf_phdr *eppnt;
532 	unsigned long load_addr = 0;
533 	int load_addr_set = 0;
534 	unsigned long last_bss = 0, elf_bss = 0;
535 	int bss_prot = 0;
536 	unsigned long error = ~0UL;
537 	unsigned long total_size;
538 	int i;
539 
540 	/* First of all, some simple consistency checks */
541 	if (interp_elf_ex->e_type != ET_EXEC &&
542 	    interp_elf_ex->e_type != ET_DYN)
543 		goto out;
544 	if (!elf_check_arch(interp_elf_ex))
545 		goto out;
546 	if (!interpreter->f_op->mmap)
547 		goto out;
548 
549 	total_size = total_mapping_size(interp_elf_phdata,
550 					interp_elf_ex->e_phnum);
551 	if (!total_size) {
552 		error = -EINVAL;
553 		goto out;
554 	}
555 
556 	eppnt = interp_elf_phdata;
557 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
558 		if (eppnt->p_type == PT_LOAD) {
559 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
560 			int elf_prot = 0;
561 			unsigned long vaddr = 0;
562 			unsigned long k, map_addr;
563 
564 			if (eppnt->p_flags & PF_R)
565 		    		elf_prot = PROT_READ;
566 			if (eppnt->p_flags & PF_W)
567 				elf_prot |= PROT_WRITE;
568 			if (eppnt->p_flags & PF_X)
569 				elf_prot |= PROT_EXEC;
570 			vaddr = eppnt->p_vaddr;
571 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
572 				elf_type |= MAP_FIXED;
573 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
574 				load_addr = -vaddr;
575 
576 			map_addr = elf_map(interpreter, load_addr + vaddr,
577 					eppnt, elf_prot, elf_type, total_size);
578 			total_size = 0;
579 			if (!*interp_map_addr)
580 				*interp_map_addr = map_addr;
581 			error = map_addr;
582 			if (BAD_ADDR(map_addr))
583 				goto out;
584 
585 			if (!load_addr_set &&
586 			    interp_elf_ex->e_type == ET_DYN) {
587 				load_addr = map_addr - ELF_PAGESTART(vaddr);
588 				load_addr_set = 1;
589 			}
590 
591 			/*
592 			 * Check to see if the section's size will overflow the
593 			 * allowed task size. Note that p_filesz must always be
594 			 * <= p_memsize so it's only necessary to check p_memsz.
595 			 */
596 			k = load_addr + eppnt->p_vaddr;
597 			if (BAD_ADDR(k) ||
598 			    eppnt->p_filesz > eppnt->p_memsz ||
599 			    eppnt->p_memsz > TASK_SIZE ||
600 			    TASK_SIZE - eppnt->p_memsz < k) {
601 				error = -ENOMEM;
602 				goto out;
603 			}
604 
605 			/*
606 			 * Find the end of the file mapping for this phdr, and
607 			 * keep track of the largest address we see for this.
608 			 */
609 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
610 			if (k > elf_bss)
611 				elf_bss = k;
612 
613 			/*
614 			 * Do the same thing for the memory mapping - between
615 			 * elf_bss and last_bss is the bss section.
616 			 */
617 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
618 			if (k > last_bss) {
619 				last_bss = k;
620 				bss_prot = elf_prot;
621 			}
622 		}
623 	}
624 
625 	/*
626 	 * Now fill out the bss section: first pad the last page from
627 	 * the file up to the page boundary, and zero it from elf_bss
628 	 * up to the end of the page.
629 	 */
630 	if (padzero(elf_bss)) {
631 		error = -EFAULT;
632 		goto out;
633 	}
634 	/*
635 	 * Next, align both the file and mem bss up to the page size,
636 	 * since this is where elf_bss was just zeroed up to, and where
637 	 * last_bss will end after the vm_brk_flags() below.
638 	 */
639 	elf_bss = ELF_PAGEALIGN(elf_bss);
640 	last_bss = ELF_PAGEALIGN(last_bss);
641 	/* Finally, if there is still more bss to allocate, do it. */
642 	if (last_bss > elf_bss) {
643 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
644 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
645 		if (error)
646 			goto out;
647 	}
648 
649 	error = load_addr;
650 out:
651 	return error;
652 }
653 
654 /*
655  * These are the functions used to load ELF style executables and shared
656  * libraries.  There is no binary dependent code anywhere else.
657  */
658 
659 #ifndef STACK_RND_MASK
660 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
661 #endif
662 
663 static unsigned long randomize_stack_top(unsigned long stack_top)
664 {
665 	unsigned long random_variable = 0;
666 
667 	if ((current->flags & PF_RANDOMIZE) &&
668 		!(current->personality & ADDR_NO_RANDOMIZE)) {
669 		random_variable = get_random_long();
670 		random_variable &= STACK_RND_MASK;
671 		random_variable <<= PAGE_SHIFT;
672 	}
673 #ifdef CONFIG_STACK_GROWSUP
674 	return PAGE_ALIGN(stack_top) + random_variable;
675 #else
676 	return PAGE_ALIGN(stack_top) - random_variable;
677 #endif
678 }
679 
680 static int load_elf_binary(struct linux_binprm *bprm)
681 {
682 	struct file *interpreter = NULL; /* to shut gcc up */
683  	unsigned long load_addr = 0, load_bias = 0;
684 	int load_addr_set = 0;
685 	char * elf_interpreter = NULL;
686 	unsigned long error;
687 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
688 	unsigned long elf_bss, elf_brk;
689 	int bss_prot = 0;
690 	int retval, i;
691 	unsigned long elf_entry;
692 	unsigned long interp_load_addr = 0;
693 	unsigned long start_code, end_code, start_data, end_data;
694 	unsigned long reloc_func_desc __maybe_unused = 0;
695 	int executable_stack = EXSTACK_DEFAULT;
696 	struct pt_regs *regs = current_pt_regs();
697 	struct {
698 		struct elfhdr elf_ex;
699 		struct elfhdr interp_elf_ex;
700 	} *loc;
701 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
702 
703 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
704 	if (!loc) {
705 		retval = -ENOMEM;
706 		goto out_ret;
707 	}
708 
709 	/* Get the exec-header */
710 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
711 
712 	retval = -ENOEXEC;
713 	/* First of all, some simple consistency checks */
714 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 		goto out;
716 
717 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
718 		goto out;
719 	if (!elf_check_arch(&loc->elf_ex))
720 		goto out;
721 	if (!bprm->file->f_op->mmap)
722 		goto out;
723 
724 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
725 	if (!elf_phdata)
726 		goto out;
727 
728 	elf_ppnt = elf_phdata;
729 	elf_bss = 0;
730 	elf_brk = 0;
731 
732 	start_code = ~0UL;
733 	end_code = 0;
734 	start_data = 0;
735 	end_data = 0;
736 
737 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
738 		if (elf_ppnt->p_type == PT_INTERP) {
739 			/* This is the program interpreter used for
740 			 * shared libraries - for now assume that this
741 			 * is an a.out format binary
742 			 */
743 			retval = -ENOEXEC;
744 			if (elf_ppnt->p_filesz > PATH_MAX ||
745 			    elf_ppnt->p_filesz < 2)
746 				goto out_free_ph;
747 
748 			retval = -ENOMEM;
749 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
750 						  GFP_KERNEL);
751 			if (!elf_interpreter)
752 				goto out_free_ph;
753 
754 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
755 					     elf_interpreter,
756 					     elf_ppnt->p_filesz);
757 			if (retval != elf_ppnt->p_filesz) {
758 				if (retval >= 0)
759 					retval = -EIO;
760 				goto out_free_interp;
761 			}
762 			/* make sure path is NULL terminated */
763 			retval = -ENOEXEC;
764 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
765 				goto out_free_interp;
766 
767 			interpreter = open_exec(elf_interpreter);
768 			retval = PTR_ERR(interpreter);
769 			if (IS_ERR(interpreter))
770 				goto out_free_interp;
771 
772 			/*
773 			 * If the binary is not readable then enforce
774 			 * mm->dumpable = 0 regardless of the interpreter's
775 			 * permissions.
776 			 */
777 			would_dump(bprm, interpreter);
778 
779 			/* Get the exec headers */
780 			retval = kernel_read(interpreter, 0,
781 					     (void *)&loc->interp_elf_ex,
782 					     sizeof(loc->interp_elf_ex));
783 			if (retval != sizeof(loc->interp_elf_ex)) {
784 				if (retval >= 0)
785 					retval = -EIO;
786 				goto out_free_dentry;
787 			}
788 
789 			break;
790 		}
791 		elf_ppnt++;
792 	}
793 
794 	elf_ppnt = elf_phdata;
795 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
796 		switch (elf_ppnt->p_type) {
797 		case PT_GNU_STACK:
798 			if (elf_ppnt->p_flags & PF_X)
799 				executable_stack = EXSTACK_ENABLE_X;
800 			else
801 				executable_stack = EXSTACK_DISABLE_X;
802 			break;
803 
804 		case PT_LOPROC ... PT_HIPROC:
805 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
806 						  bprm->file, false,
807 						  &arch_state);
808 			if (retval)
809 				goto out_free_dentry;
810 			break;
811 		}
812 
813 	/* Some simple consistency checks for the interpreter */
814 	if (elf_interpreter) {
815 		retval = -ELIBBAD;
816 		/* Not an ELF interpreter */
817 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
818 			goto out_free_dentry;
819 		/* Verify the interpreter has a valid arch */
820 		if (!elf_check_arch(&loc->interp_elf_ex))
821 			goto out_free_dentry;
822 
823 		/* Load the interpreter program headers */
824 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
825 						   interpreter);
826 		if (!interp_elf_phdata)
827 			goto out_free_dentry;
828 
829 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
830 		elf_ppnt = interp_elf_phdata;
831 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
832 			switch (elf_ppnt->p_type) {
833 			case PT_LOPROC ... PT_HIPROC:
834 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
835 							  elf_ppnt, interpreter,
836 							  true, &arch_state);
837 				if (retval)
838 					goto out_free_dentry;
839 				break;
840 			}
841 	}
842 
843 	/*
844 	 * Allow arch code to reject the ELF at this point, whilst it's
845 	 * still possible to return an error to the code that invoked
846 	 * the exec syscall.
847 	 */
848 	retval = arch_check_elf(&loc->elf_ex,
849 				!!interpreter, &loc->interp_elf_ex,
850 				&arch_state);
851 	if (retval)
852 		goto out_free_dentry;
853 
854 	/* Flush all traces of the currently running executable */
855 	retval = flush_old_exec(bprm);
856 	if (retval)
857 		goto out_free_dentry;
858 
859 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
860 	   may depend on the personality.  */
861 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
862 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
863 		current->personality |= READ_IMPLIES_EXEC;
864 
865 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
866 		current->flags |= PF_RANDOMIZE;
867 
868 	setup_new_exec(bprm);
869 	install_exec_creds(bprm);
870 
871 	/* Do this so that we can load the interpreter, if need be.  We will
872 	   change some of these later */
873 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
874 				 executable_stack);
875 	if (retval < 0)
876 		goto out_free_dentry;
877 
878 	current->mm->start_stack = bprm->p;
879 
880 	/* Now we do a little grungy work by mmapping the ELF image into
881 	   the correct location in memory. */
882 	for(i = 0, elf_ppnt = elf_phdata;
883 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
884 		int elf_prot = 0, elf_flags;
885 		unsigned long k, vaddr;
886 		unsigned long total_size = 0;
887 
888 		if (elf_ppnt->p_type != PT_LOAD)
889 			continue;
890 
891 		if (unlikely (elf_brk > elf_bss)) {
892 			unsigned long nbyte;
893 
894 			/* There was a PT_LOAD segment with p_memsz > p_filesz
895 			   before this one. Map anonymous pages, if needed,
896 			   and clear the area.  */
897 			retval = set_brk(elf_bss + load_bias,
898 					 elf_brk + load_bias,
899 					 bss_prot);
900 			if (retval)
901 				goto out_free_dentry;
902 			nbyte = ELF_PAGEOFFSET(elf_bss);
903 			if (nbyte) {
904 				nbyte = ELF_MIN_ALIGN - nbyte;
905 				if (nbyte > elf_brk - elf_bss)
906 					nbyte = elf_brk - elf_bss;
907 				if (clear_user((void __user *)elf_bss +
908 							load_bias, nbyte)) {
909 					/*
910 					 * This bss-zeroing can fail if the ELF
911 					 * file specifies odd protections. So
912 					 * we don't check the return value
913 					 */
914 				}
915 			}
916 		}
917 
918 		if (elf_ppnt->p_flags & PF_R)
919 			elf_prot |= PROT_READ;
920 		if (elf_ppnt->p_flags & PF_W)
921 			elf_prot |= PROT_WRITE;
922 		if (elf_ppnt->p_flags & PF_X)
923 			elf_prot |= PROT_EXEC;
924 
925 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
926 
927 		vaddr = elf_ppnt->p_vaddr;
928 		/*
929 		 * If we are loading ET_EXEC or we have already performed
930 		 * the ET_DYN load_addr calculations, proceed normally.
931 		 */
932 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
933 			elf_flags |= MAP_FIXED;
934 		} else if (loc->elf_ex.e_type == ET_DYN) {
935 			/*
936 			 * This logic is run once for the first LOAD Program
937 			 * Header for ET_DYN binaries to calculate the
938 			 * randomization (load_bias) for all the LOAD
939 			 * Program Headers, and to calculate the entire
940 			 * size of the ELF mapping (total_size). (Note that
941 			 * load_addr_set is set to true later once the
942 			 * initial mapping is performed.)
943 			 *
944 			 * There are effectively two types of ET_DYN
945 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
946 			 * and loaders (ET_DYN without INTERP, since they
947 			 * _are_ the ELF interpreter). The loaders must
948 			 * be loaded away from programs since the program
949 			 * may otherwise collide with the loader (especially
950 			 * for ET_EXEC which does not have a randomized
951 			 * position). For example to handle invocations of
952 			 * "./ld.so someprog" to test out a new version of
953 			 * the loader, the subsequent program that the
954 			 * loader loads must avoid the loader itself, so
955 			 * they cannot share the same load range. Sufficient
956 			 * room for the brk must be allocated with the
957 			 * loader as well, since brk must be available with
958 			 * the loader.
959 			 *
960 			 * Therefore, programs are loaded offset from
961 			 * ELF_ET_DYN_BASE and loaders are loaded into the
962 			 * independently randomized mmap region (0 load_bias
963 			 * without MAP_FIXED).
964 			 */
965 			if (elf_interpreter) {
966 				load_bias = ELF_ET_DYN_BASE;
967 				if (current->flags & PF_RANDOMIZE)
968 					load_bias += arch_mmap_rnd();
969 				elf_flags |= MAP_FIXED;
970 			} else
971 				load_bias = 0;
972 
973 			/*
974 			 * Since load_bias is used for all subsequent loading
975 			 * calculations, we must lower it by the first vaddr
976 			 * so that the remaining calculations based on the
977 			 * ELF vaddrs will be correctly offset. The result
978 			 * is then page aligned.
979 			 */
980 			load_bias = ELF_PAGESTART(load_bias - vaddr);
981 
982 			total_size = total_mapping_size(elf_phdata,
983 							loc->elf_ex.e_phnum);
984 			if (!total_size) {
985 				retval = -EINVAL;
986 				goto out_free_dentry;
987 			}
988 		}
989 
990 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
991 				elf_prot, elf_flags, total_size);
992 		if (BAD_ADDR(error)) {
993 			retval = IS_ERR((void *)error) ?
994 				PTR_ERR((void*)error) : -EINVAL;
995 			goto out_free_dentry;
996 		}
997 
998 		if (!load_addr_set) {
999 			load_addr_set = 1;
1000 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1001 			if (loc->elf_ex.e_type == ET_DYN) {
1002 				load_bias += error -
1003 				             ELF_PAGESTART(load_bias + vaddr);
1004 				load_addr += load_bias;
1005 				reloc_func_desc = load_bias;
1006 			}
1007 		}
1008 		k = elf_ppnt->p_vaddr;
1009 		if (k < start_code)
1010 			start_code = k;
1011 		if (start_data < k)
1012 			start_data = k;
1013 
1014 		/*
1015 		 * Check to see if the section's size will overflow the
1016 		 * allowed task size. Note that p_filesz must always be
1017 		 * <= p_memsz so it is only necessary to check p_memsz.
1018 		 */
1019 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1020 		    elf_ppnt->p_memsz > TASK_SIZE ||
1021 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1022 			/* set_brk can never work. Avoid overflows. */
1023 			retval = -EINVAL;
1024 			goto out_free_dentry;
1025 		}
1026 
1027 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1028 
1029 		if (k > elf_bss)
1030 			elf_bss = k;
1031 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1032 			end_code = k;
1033 		if (end_data < k)
1034 			end_data = k;
1035 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1036 		if (k > elf_brk) {
1037 			bss_prot = elf_prot;
1038 			elf_brk = k;
1039 		}
1040 	}
1041 
1042 	loc->elf_ex.e_entry += load_bias;
1043 	elf_bss += load_bias;
1044 	elf_brk += load_bias;
1045 	start_code += load_bias;
1046 	end_code += load_bias;
1047 	start_data += load_bias;
1048 	end_data += load_bias;
1049 
1050 	/* Calling set_brk effectively mmaps the pages that we need
1051 	 * for the bss and break sections.  We must do this before
1052 	 * mapping in the interpreter, to make sure it doesn't wind
1053 	 * up getting placed where the bss needs to go.
1054 	 */
1055 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1056 	if (retval)
1057 		goto out_free_dentry;
1058 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1059 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1060 		goto out_free_dentry;
1061 	}
1062 
1063 	if (elf_interpreter) {
1064 		unsigned long interp_map_addr = 0;
1065 
1066 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1067 					    interpreter,
1068 					    &interp_map_addr,
1069 					    load_bias, interp_elf_phdata);
1070 		if (!IS_ERR((void *)elf_entry)) {
1071 			/*
1072 			 * load_elf_interp() returns relocation
1073 			 * adjustment
1074 			 */
1075 			interp_load_addr = elf_entry;
1076 			elf_entry += loc->interp_elf_ex.e_entry;
1077 		}
1078 		if (BAD_ADDR(elf_entry)) {
1079 			retval = IS_ERR((void *)elf_entry) ?
1080 					(int)elf_entry : -EINVAL;
1081 			goto out_free_dentry;
1082 		}
1083 		reloc_func_desc = interp_load_addr;
1084 
1085 		allow_write_access(interpreter);
1086 		fput(interpreter);
1087 		kfree(elf_interpreter);
1088 	} else {
1089 		elf_entry = loc->elf_ex.e_entry;
1090 		if (BAD_ADDR(elf_entry)) {
1091 			retval = -EINVAL;
1092 			goto out_free_dentry;
1093 		}
1094 	}
1095 
1096 	kfree(interp_elf_phdata);
1097 	kfree(elf_phdata);
1098 
1099 	set_binfmt(&elf_format);
1100 
1101 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1102 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1103 	if (retval < 0)
1104 		goto out;
1105 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1106 
1107 	retval = create_elf_tables(bprm, &loc->elf_ex,
1108 			  load_addr, interp_load_addr);
1109 	if (retval < 0)
1110 		goto out;
1111 	/* N.B. passed_fileno might not be initialized? */
1112 	current->mm->end_code = end_code;
1113 	current->mm->start_code = start_code;
1114 	current->mm->start_data = start_data;
1115 	current->mm->end_data = end_data;
1116 	current->mm->start_stack = bprm->p;
1117 
1118 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1119 		current->mm->brk = current->mm->start_brk =
1120 			arch_randomize_brk(current->mm);
1121 #ifdef compat_brk_randomized
1122 		current->brk_randomized = 1;
1123 #endif
1124 	}
1125 
1126 	if (current->personality & MMAP_PAGE_ZERO) {
1127 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1128 		   and some applications "depend" upon this behavior.
1129 		   Since we do not have the power to recompile these, we
1130 		   emulate the SVr4 behavior. Sigh. */
1131 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1132 				MAP_FIXED | MAP_PRIVATE, 0);
1133 	}
1134 
1135 #ifdef ELF_PLAT_INIT
1136 	/*
1137 	 * The ABI may specify that certain registers be set up in special
1138 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1139 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1140 	 * that the e_entry field is the address of the function descriptor
1141 	 * for the startup routine, rather than the address of the startup
1142 	 * routine itself.  This macro performs whatever initialization to
1143 	 * the regs structure is required as well as any relocations to the
1144 	 * function descriptor entries when executing dynamically links apps.
1145 	 */
1146 	ELF_PLAT_INIT(regs, reloc_func_desc);
1147 #endif
1148 
1149 	start_thread(regs, elf_entry, bprm->p);
1150 	retval = 0;
1151 out:
1152 	kfree(loc);
1153 out_ret:
1154 	return retval;
1155 
1156 	/* error cleanup */
1157 out_free_dentry:
1158 	kfree(interp_elf_phdata);
1159 	allow_write_access(interpreter);
1160 	if (interpreter)
1161 		fput(interpreter);
1162 out_free_interp:
1163 	kfree(elf_interpreter);
1164 out_free_ph:
1165 	kfree(elf_phdata);
1166 	goto out;
1167 }
1168 
1169 #ifdef CONFIG_USELIB
1170 /* This is really simpleminded and specialized - we are loading an
1171    a.out library that is given an ELF header. */
1172 static int load_elf_library(struct file *file)
1173 {
1174 	struct elf_phdr *elf_phdata;
1175 	struct elf_phdr *eppnt;
1176 	unsigned long elf_bss, bss, len;
1177 	int retval, error, i, j;
1178 	struct elfhdr elf_ex;
1179 
1180 	error = -ENOEXEC;
1181 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1182 	if (retval != sizeof(elf_ex))
1183 		goto out;
1184 
1185 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1186 		goto out;
1187 
1188 	/* First of all, some simple consistency checks */
1189 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1190 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1191 		goto out;
1192 
1193 	/* Now read in all of the header information */
1194 
1195 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1196 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1197 
1198 	error = -ENOMEM;
1199 	elf_phdata = kmalloc(j, GFP_KERNEL);
1200 	if (!elf_phdata)
1201 		goto out;
1202 
1203 	eppnt = elf_phdata;
1204 	error = -ENOEXEC;
1205 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1206 	if (retval != j)
1207 		goto out_free_ph;
1208 
1209 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1210 		if ((eppnt + i)->p_type == PT_LOAD)
1211 			j++;
1212 	if (j != 1)
1213 		goto out_free_ph;
1214 
1215 	while (eppnt->p_type != PT_LOAD)
1216 		eppnt++;
1217 
1218 	/* Now use mmap to map the library into memory. */
1219 	error = vm_mmap(file,
1220 			ELF_PAGESTART(eppnt->p_vaddr),
1221 			(eppnt->p_filesz +
1222 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1223 			PROT_READ | PROT_WRITE | PROT_EXEC,
1224 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1225 			(eppnt->p_offset -
1226 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1227 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1228 		goto out_free_ph;
1229 
1230 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1231 	if (padzero(elf_bss)) {
1232 		error = -EFAULT;
1233 		goto out_free_ph;
1234 	}
1235 
1236 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1237 			    ELF_MIN_ALIGN - 1);
1238 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1239 	if (bss > len) {
1240 		error = vm_brk(len, bss - len);
1241 		if (error)
1242 			goto out_free_ph;
1243 	}
1244 	error = 0;
1245 
1246 out_free_ph:
1247 	kfree(elf_phdata);
1248 out:
1249 	return error;
1250 }
1251 #endif /* #ifdef CONFIG_USELIB */
1252 
1253 #ifdef CONFIG_ELF_CORE
1254 /*
1255  * ELF core dumper
1256  *
1257  * Modelled on fs/exec.c:aout_core_dump()
1258  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1259  */
1260 
1261 /*
1262  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1263  * that are useful for post-mortem analysis are included in every core dump.
1264  * In that way we ensure that the core dump is fully interpretable later
1265  * without matching up the same kernel and hardware config to see what PC values
1266  * meant. These special mappings include - vDSO, vsyscall, and other
1267  * architecture specific mappings
1268  */
1269 static bool always_dump_vma(struct vm_area_struct *vma)
1270 {
1271 	/* Any vsyscall mappings? */
1272 	if (vma == get_gate_vma(vma->vm_mm))
1273 		return true;
1274 
1275 	/*
1276 	 * Assume that all vmas with a .name op should always be dumped.
1277 	 * If this changes, a new vm_ops field can easily be added.
1278 	 */
1279 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1280 		return true;
1281 
1282 	/*
1283 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1284 	 * such as vDSO sections.
1285 	 */
1286 	if (arch_vma_name(vma))
1287 		return true;
1288 
1289 	return false;
1290 }
1291 
1292 /*
1293  * Decide what to dump of a segment, part, all or none.
1294  */
1295 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1296 				   unsigned long mm_flags)
1297 {
1298 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1299 
1300 	/* always dump the vdso and vsyscall sections */
1301 	if (always_dump_vma(vma))
1302 		goto whole;
1303 
1304 	if (vma->vm_flags & VM_DONTDUMP)
1305 		return 0;
1306 
1307 	/* support for DAX */
1308 	if (vma_is_dax(vma)) {
1309 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1310 			goto whole;
1311 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1312 			goto whole;
1313 		return 0;
1314 	}
1315 
1316 	/* Hugetlb memory check */
1317 	if (vma->vm_flags & VM_HUGETLB) {
1318 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1319 			goto whole;
1320 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1321 			goto whole;
1322 		return 0;
1323 	}
1324 
1325 	/* Do not dump I/O mapped devices or special mappings */
1326 	if (vma->vm_flags & VM_IO)
1327 		return 0;
1328 
1329 	/* By default, dump shared memory if mapped from an anonymous file. */
1330 	if (vma->vm_flags & VM_SHARED) {
1331 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1332 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1333 			goto whole;
1334 		return 0;
1335 	}
1336 
1337 	/* Dump segments that have been written to.  */
1338 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1339 		goto whole;
1340 	if (vma->vm_file == NULL)
1341 		return 0;
1342 
1343 	if (FILTER(MAPPED_PRIVATE))
1344 		goto whole;
1345 
1346 	/*
1347 	 * If this looks like the beginning of a DSO or executable mapping,
1348 	 * check for an ELF header.  If we find one, dump the first page to
1349 	 * aid in determining what was mapped here.
1350 	 */
1351 	if (FILTER(ELF_HEADERS) &&
1352 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1353 		u32 __user *header = (u32 __user *) vma->vm_start;
1354 		u32 word;
1355 		mm_segment_t fs = get_fs();
1356 		/*
1357 		 * Doing it this way gets the constant folded by GCC.
1358 		 */
1359 		union {
1360 			u32 cmp;
1361 			char elfmag[SELFMAG];
1362 		} magic;
1363 		BUILD_BUG_ON(SELFMAG != sizeof word);
1364 		magic.elfmag[EI_MAG0] = ELFMAG0;
1365 		magic.elfmag[EI_MAG1] = ELFMAG1;
1366 		magic.elfmag[EI_MAG2] = ELFMAG2;
1367 		magic.elfmag[EI_MAG3] = ELFMAG3;
1368 		/*
1369 		 * Switch to the user "segment" for get_user(),
1370 		 * then put back what elf_core_dump() had in place.
1371 		 */
1372 		set_fs(USER_DS);
1373 		if (unlikely(get_user(word, header)))
1374 			word = 0;
1375 		set_fs(fs);
1376 		if (word == magic.cmp)
1377 			return PAGE_SIZE;
1378 	}
1379 
1380 #undef	FILTER
1381 
1382 	return 0;
1383 
1384 whole:
1385 	return vma->vm_end - vma->vm_start;
1386 }
1387 
1388 /* An ELF note in memory */
1389 struct memelfnote
1390 {
1391 	const char *name;
1392 	int type;
1393 	unsigned int datasz;
1394 	void *data;
1395 };
1396 
1397 static int notesize(struct memelfnote *en)
1398 {
1399 	int sz;
1400 
1401 	sz = sizeof(struct elf_note);
1402 	sz += roundup(strlen(en->name) + 1, 4);
1403 	sz += roundup(en->datasz, 4);
1404 
1405 	return sz;
1406 }
1407 
1408 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1409 {
1410 	struct elf_note en;
1411 	en.n_namesz = strlen(men->name) + 1;
1412 	en.n_descsz = men->datasz;
1413 	en.n_type = men->type;
1414 
1415 	return dump_emit(cprm, &en, sizeof(en)) &&
1416 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1417 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1418 }
1419 
1420 static void fill_elf_header(struct elfhdr *elf, int segs,
1421 			    u16 machine, u32 flags)
1422 {
1423 	memset(elf, 0, sizeof(*elf));
1424 
1425 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1426 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1427 	elf->e_ident[EI_DATA] = ELF_DATA;
1428 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1429 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1430 
1431 	elf->e_type = ET_CORE;
1432 	elf->e_machine = machine;
1433 	elf->e_version = EV_CURRENT;
1434 	elf->e_phoff = sizeof(struct elfhdr);
1435 	elf->e_flags = flags;
1436 	elf->e_ehsize = sizeof(struct elfhdr);
1437 	elf->e_phentsize = sizeof(struct elf_phdr);
1438 	elf->e_phnum = segs;
1439 
1440 	return;
1441 }
1442 
1443 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1444 {
1445 	phdr->p_type = PT_NOTE;
1446 	phdr->p_offset = offset;
1447 	phdr->p_vaddr = 0;
1448 	phdr->p_paddr = 0;
1449 	phdr->p_filesz = sz;
1450 	phdr->p_memsz = 0;
1451 	phdr->p_flags = 0;
1452 	phdr->p_align = 0;
1453 	return;
1454 }
1455 
1456 static void fill_note(struct memelfnote *note, const char *name, int type,
1457 		unsigned int sz, void *data)
1458 {
1459 	note->name = name;
1460 	note->type = type;
1461 	note->datasz = sz;
1462 	note->data = data;
1463 	return;
1464 }
1465 
1466 /*
1467  * fill up all the fields in prstatus from the given task struct, except
1468  * registers which need to be filled up separately.
1469  */
1470 static void fill_prstatus(struct elf_prstatus *prstatus,
1471 		struct task_struct *p, long signr)
1472 {
1473 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1474 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1475 	prstatus->pr_sighold = p->blocked.sig[0];
1476 	rcu_read_lock();
1477 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1478 	rcu_read_unlock();
1479 	prstatus->pr_pid = task_pid_vnr(p);
1480 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1481 	prstatus->pr_sid = task_session_vnr(p);
1482 	if (thread_group_leader(p)) {
1483 		struct task_cputime cputime;
1484 
1485 		/*
1486 		 * This is the record for the group leader.  It shows the
1487 		 * group-wide total, not its individual thread total.
1488 		 */
1489 		thread_group_cputime(p, &cputime);
1490 		prstatus->pr_utime = ns_to_timeval(cputime.utime);
1491 		prstatus->pr_stime = ns_to_timeval(cputime.stime);
1492 	} else {
1493 		u64 utime, stime;
1494 
1495 		task_cputime(p, &utime, &stime);
1496 		prstatus->pr_utime = ns_to_timeval(utime);
1497 		prstatus->pr_stime = ns_to_timeval(stime);
1498 	}
1499 
1500 	prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1501 	prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1502 }
1503 
1504 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1505 		       struct mm_struct *mm)
1506 {
1507 	const struct cred *cred;
1508 	unsigned int i, len;
1509 
1510 	/* first copy the parameters from user space */
1511 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1512 
1513 	len = mm->arg_end - mm->arg_start;
1514 	if (len >= ELF_PRARGSZ)
1515 		len = ELF_PRARGSZ-1;
1516 	if (copy_from_user(&psinfo->pr_psargs,
1517 		           (const char __user *)mm->arg_start, len))
1518 		return -EFAULT;
1519 	for(i = 0; i < len; i++)
1520 		if (psinfo->pr_psargs[i] == 0)
1521 			psinfo->pr_psargs[i] = ' ';
1522 	psinfo->pr_psargs[len] = 0;
1523 
1524 	rcu_read_lock();
1525 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1526 	rcu_read_unlock();
1527 	psinfo->pr_pid = task_pid_vnr(p);
1528 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1529 	psinfo->pr_sid = task_session_vnr(p);
1530 
1531 	i = p->state ? ffz(~p->state) + 1 : 0;
1532 	psinfo->pr_state = i;
1533 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1534 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1535 	psinfo->pr_nice = task_nice(p);
1536 	psinfo->pr_flag = p->flags;
1537 	rcu_read_lock();
1538 	cred = __task_cred(p);
1539 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1540 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1541 	rcu_read_unlock();
1542 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1543 
1544 	return 0;
1545 }
1546 
1547 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1548 {
1549 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1550 	int i = 0;
1551 	do
1552 		i += 2;
1553 	while (auxv[i - 2] != AT_NULL);
1554 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1555 }
1556 
1557 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1558 		const siginfo_t *siginfo)
1559 {
1560 	mm_segment_t old_fs = get_fs();
1561 	set_fs(KERNEL_DS);
1562 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1563 	set_fs(old_fs);
1564 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1565 }
1566 
1567 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1568 /*
1569  * Format of NT_FILE note:
1570  *
1571  * long count     -- how many files are mapped
1572  * long page_size -- units for file_ofs
1573  * array of [COUNT] elements of
1574  *   long start
1575  *   long end
1576  *   long file_ofs
1577  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1578  */
1579 static int fill_files_note(struct memelfnote *note)
1580 {
1581 	struct vm_area_struct *vma;
1582 	unsigned count, size, names_ofs, remaining, n;
1583 	user_long_t *data;
1584 	user_long_t *start_end_ofs;
1585 	char *name_base, *name_curpos;
1586 
1587 	/* *Estimated* file count and total data size needed */
1588 	count = current->mm->map_count;
1589 	size = count * 64;
1590 
1591 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1592  alloc:
1593 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1594 		return -EINVAL;
1595 	size = round_up(size, PAGE_SIZE);
1596 	data = vmalloc(size);
1597 	if (!data)
1598 		return -ENOMEM;
1599 
1600 	start_end_ofs = data + 2;
1601 	name_base = name_curpos = ((char *)data) + names_ofs;
1602 	remaining = size - names_ofs;
1603 	count = 0;
1604 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1605 		struct file *file;
1606 		const char *filename;
1607 
1608 		file = vma->vm_file;
1609 		if (!file)
1610 			continue;
1611 		filename = file_path(file, name_curpos, remaining);
1612 		if (IS_ERR(filename)) {
1613 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1614 				vfree(data);
1615 				size = size * 5 / 4;
1616 				goto alloc;
1617 			}
1618 			continue;
1619 		}
1620 
1621 		/* file_path() fills at the end, move name down */
1622 		/* n = strlen(filename) + 1: */
1623 		n = (name_curpos + remaining) - filename;
1624 		remaining = filename - name_curpos;
1625 		memmove(name_curpos, filename, n);
1626 		name_curpos += n;
1627 
1628 		*start_end_ofs++ = vma->vm_start;
1629 		*start_end_ofs++ = vma->vm_end;
1630 		*start_end_ofs++ = vma->vm_pgoff;
1631 		count++;
1632 	}
1633 
1634 	/* Now we know exact count of files, can store it */
1635 	data[0] = count;
1636 	data[1] = PAGE_SIZE;
1637 	/*
1638 	 * Count usually is less than current->mm->map_count,
1639 	 * we need to move filenames down.
1640 	 */
1641 	n = current->mm->map_count - count;
1642 	if (n != 0) {
1643 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1644 		memmove(name_base - shift_bytes, name_base,
1645 			name_curpos - name_base);
1646 		name_curpos -= shift_bytes;
1647 	}
1648 
1649 	size = name_curpos - (char *)data;
1650 	fill_note(note, "CORE", NT_FILE, size, data);
1651 	return 0;
1652 }
1653 
1654 #ifdef CORE_DUMP_USE_REGSET
1655 #include <linux/regset.h>
1656 
1657 struct elf_thread_core_info {
1658 	struct elf_thread_core_info *next;
1659 	struct task_struct *task;
1660 	struct elf_prstatus prstatus;
1661 	struct memelfnote notes[0];
1662 };
1663 
1664 struct elf_note_info {
1665 	struct elf_thread_core_info *thread;
1666 	struct memelfnote psinfo;
1667 	struct memelfnote signote;
1668 	struct memelfnote auxv;
1669 	struct memelfnote files;
1670 	user_siginfo_t csigdata;
1671 	size_t size;
1672 	int thread_notes;
1673 };
1674 
1675 /*
1676  * When a regset has a writeback hook, we call it on each thread before
1677  * dumping user memory.  On register window machines, this makes sure the
1678  * user memory backing the register data is up to date before we read it.
1679  */
1680 static void do_thread_regset_writeback(struct task_struct *task,
1681 				       const struct user_regset *regset)
1682 {
1683 	if (regset->writeback)
1684 		regset->writeback(task, regset, 1);
1685 }
1686 
1687 #ifndef PRSTATUS_SIZE
1688 #define PRSTATUS_SIZE(S, R) sizeof(S)
1689 #endif
1690 
1691 #ifndef SET_PR_FPVALID
1692 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1693 #endif
1694 
1695 static int fill_thread_core_info(struct elf_thread_core_info *t,
1696 				 const struct user_regset_view *view,
1697 				 long signr, size_t *total)
1698 {
1699 	unsigned int i;
1700 	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1701 
1702 	/*
1703 	 * NT_PRSTATUS is the one special case, because the regset data
1704 	 * goes into the pr_reg field inside the note contents, rather
1705 	 * than being the whole note contents.  We fill the reset in here.
1706 	 * We assume that regset 0 is NT_PRSTATUS.
1707 	 */
1708 	fill_prstatus(&t->prstatus, t->task, signr);
1709 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1710 				    &t->prstatus.pr_reg, NULL);
1711 
1712 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1713 		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1714 	*total += notesize(&t->notes[0]);
1715 
1716 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1717 
1718 	/*
1719 	 * Each other regset might generate a note too.  For each regset
1720 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1721 	 * all zero and we'll know to skip writing it later.
1722 	 */
1723 	for (i = 1; i < view->n; ++i) {
1724 		const struct user_regset *regset = &view->regsets[i];
1725 		do_thread_regset_writeback(t->task, regset);
1726 		if (regset->core_note_type && regset->get &&
1727 		    (!regset->active || regset->active(t->task, regset))) {
1728 			int ret;
1729 			size_t size = regset->n * regset->size;
1730 			void *data = kmalloc(size, GFP_KERNEL);
1731 			if (unlikely(!data))
1732 				return 0;
1733 			ret = regset->get(t->task, regset,
1734 					  0, size, data, NULL);
1735 			if (unlikely(ret))
1736 				kfree(data);
1737 			else {
1738 				if (regset->core_note_type != NT_PRFPREG)
1739 					fill_note(&t->notes[i], "LINUX",
1740 						  regset->core_note_type,
1741 						  size, data);
1742 				else {
1743 					SET_PR_FPVALID(&t->prstatus,
1744 							1, regset_size);
1745 					fill_note(&t->notes[i], "CORE",
1746 						  NT_PRFPREG, size, data);
1747 				}
1748 				*total += notesize(&t->notes[i]);
1749 			}
1750 		}
1751 	}
1752 
1753 	return 1;
1754 }
1755 
1756 static int fill_note_info(struct elfhdr *elf, int phdrs,
1757 			  struct elf_note_info *info,
1758 			  const siginfo_t *siginfo, struct pt_regs *regs)
1759 {
1760 	struct task_struct *dump_task = current;
1761 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1762 	struct elf_thread_core_info *t;
1763 	struct elf_prpsinfo *psinfo;
1764 	struct core_thread *ct;
1765 	unsigned int i;
1766 
1767 	info->size = 0;
1768 	info->thread = NULL;
1769 
1770 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1771 	if (psinfo == NULL) {
1772 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1773 		return 0;
1774 	}
1775 
1776 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1777 
1778 	/*
1779 	 * Figure out how many notes we're going to need for each thread.
1780 	 */
1781 	info->thread_notes = 0;
1782 	for (i = 0; i < view->n; ++i)
1783 		if (view->regsets[i].core_note_type != 0)
1784 			++info->thread_notes;
1785 
1786 	/*
1787 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1788 	 * since it is our one special case.
1789 	 */
1790 	if (unlikely(info->thread_notes == 0) ||
1791 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1792 		WARN_ON(1);
1793 		return 0;
1794 	}
1795 
1796 	/*
1797 	 * Initialize the ELF file header.
1798 	 */
1799 	fill_elf_header(elf, phdrs,
1800 			view->e_machine, view->e_flags);
1801 
1802 	/*
1803 	 * Allocate a structure for each thread.
1804 	 */
1805 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1806 		t = kzalloc(offsetof(struct elf_thread_core_info,
1807 				     notes[info->thread_notes]),
1808 			    GFP_KERNEL);
1809 		if (unlikely(!t))
1810 			return 0;
1811 
1812 		t->task = ct->task;
1813 		if (ct->task == dump_task || !info->thread) {
1814 			t->next = info->thread;
1815 			info->thread = t;
1816 		} else {
1817 			/*
1818 			 * Make sure to keep the original task at
1819 			 * the head of the list.
1820 			 */
1821 			t->next = info->thread->next;
1822 			info->thread->next = t;
1823 		}
1824 	}
1825 
1826 	/*
1827 	 * Now fill in each thread's information.
1828 	 */
1829 	for (t = info->thread; t != NULL; t = t->next)
1830 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1831 			return 0;
1832 
1833 	/*
1834 	 * Fill in the two process-wide notes.
1835 	 */
1836 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1837 	info->size += notesize(&info->psinfo);
1838 
1839 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1840 	info->size += notesize(&info->signote);
1841 
1842 	fill_auxv_note(&info->auxv, current->mm);
1843 	info->size += notesize(&info->auxv);
1844 
1845 	if (fill_files_note(&info->files) == 0)
1846 		info->size += notesize(&info->files);
1847 
1848 	return 1;
1849 }
1850 
1851 static size_t get_note_info_size(struct elf_note_info *info)
1852 {
1853 	return info->size;
1854 }
1855 
1856 /*
1857  * Write all the notes for each thread.  When writing the first thread, the
1858  * process-wide notes are interleaved after the first thread-specific note.
1859  */
1860 static int write_note_info(struct elf_note_info *info,
1861 			   struct coredump_params *cprm)
1862 {
1863 	bool first = true;
1864 	struct elf_thread_core_info *t = info->thread;
1865 
1866 	do {
1867 		int i;
1868 
1869 		if (!writenote(&t->notes[0], cprm))
1870 			return 0;
1871 
1872 		if (first && !writenote(&info->psinfo, cprm))
1873 			return 0;
1874 		if (first && !writenote(&info->signote, cprm))
1875 			return 0;
1876 		if (first && !writenote(&info->auxv, cprm))
1877 			return 0;
1878 		if (first && info->files.data &&
1879 				!writenote(&info->files, cprm))
1880 			return 0;
1881 
1882 		for (i = 1; i < info->thread_notes; ++i)
1883 			if (t->notes[i].data &&
1884 			    !writenote(&t->notes[i], cprm))
1885 				return 0;
1886 
1887 		first = false;
1888 		t = t->next;
1889 	} while (t);
1890 
1891 	return 1;
1892 }
1893 
1894 static void free_note_info(struct elf_note_info *info)
1895 {
1896 	struct elf_thread_core_info *threads = info->thread;
1897 	while (threads) {
1898 		unsigned int i;
1899 		struct elf_thread_core_info *t = threads;
1900 		threads = t->next;
1901 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1902 		for (i = 1; i < info->thread_notes; ++i)
1903 			kfree(t->notes[i].data);
1904 		kfree(t);
1905 	}
1906 	kfree(info->psinfo.data);
1907 	vfree(info->files.data);
1908 }
1909 
1910 #else
1911 
1912 /* Here is the structure in which status of each thread is captured. */
1913 struct elf_thread_status
1914 {
1915 	struct list_head list;
1916 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1917 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1918 	struct task_struct *thread;
1919 #ifdef ELF_CORE_COPY_XFPREGS
1920 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1921 #endif
1922 	struct memelfnote notes[3];
1923 	int num_notes;
1924 };
1925 
1926 /*
1927  * In order to add the specific thread information for the elf file format,
1928  * we need to keep a linked list of every threads pr_status and then create
1929  * a single section for them in the final core file.
1930  */
1931 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1932 {
1933 	int sz = 0;
1934 	struct task_struct *p = t->thread;
1935 	t->num_notes = 0;
1936 
1937 	fill_prstatus(&t->prstatus, p, signr);
1938 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1939 
1940 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1941 		  &(t->prstatus));
1942 	t->num_notes++;
1943 	sz += notesize(&t->notes[0]);
1944 
1945 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1946 								&t->fpu))) {
1947 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1948 			  &(t->fpu));
1949 		t->num_notes++;
1950 		sz += notesize(&t->notes[1]);
1951 	}
1952 
1953 #ifdef ELF_CORE_COPY_XFPREGS
1954 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1955 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1956 			  sizeof(t->xfpu), &t->xfpu);
1957 		t->num_notes++;
1958 		sz += notesize(&t->notes[2]);
1959 	}
1960 #endif
1961 	return sz;
1962 }
1963 
1964 struct elf_note_info {
1965 	struct memelfnote *notes;
1966 	struct memelfnote *notes_files;
1967 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1968 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1969 	struct list_head thread_list;
1970 	elf_fpregset_t *fpu;
1971 #ifdef ELF_CORE_COPY_XFPREGS
1972 	elf_fpxregset_t *xfpu;
1973 #endif
1974 	user_siginfo_t csigdata;
1975 	int thread_status_size;
1976 	int numnote;
1977 };
1978 
1979 static int elf_note_info_init(struct elf_note_info *info)
1980 {
1981 	memset(info, 0, sizeof(*info));
1982 	INIT_LIST_HEAD(&info->thread_list);
1983 
1984 	/* Allocate space for ELF notes */
1985 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1986 	if (!info->notes)
1987 		return 0;
1988 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1989 	if (!info->psinfo)
1990 		return 0;
1991 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1992 	if (!info->prstatus)
1993 		return 0;
1994 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1995 	if (!info->fpu)
1996 		return 0;
1997 #ifdef ELF_CORE_COPY_XFPREGS
1998 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1999 	if (!info->xfpu)
2000 		return 0;
2001 #endif
2002 	return 1;
2003 }
2004 
2005 static int fill_note_info(struct elfhdr *elf, int phdrs,
2006 			  struct elf_note_info *info,
2007 			  const siginfo_t *siginfo, struct pt_regs *regs)
2008 {
2009 	struct list_head *t;
2010 	struct core_thread *ct;
2011 	struct elf_thread_status *ets;
2012 
2013 	if (!elf_note_info_init(info))
2014 		return 0;
2015 
2016 	for (ct = current->mm->core_state->dumper.next;
2017 					ct; ct = ct->next) {
2018 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2019 		if (!ets)
2020 			return 0;
2021 
2022 		ets->thread = ct->task;
2023 		list_add(&ets->list, &info->thread_list);
2024 	}
2025 
2026 	list_for_each(t, &info->thread_list) {
2027 		int sz;
2028 
2029 		ets = list_entry(t, struct elf_thread_status, list);
2030 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2031 		info->thread_status_size += sz;
2032 	}
2033 	/* now collect the dump for the current */
2034 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2035 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2036 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2037 
2038 	/* Set up header */
2039 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2040 
2041 	/*
2042 	 * Set up the notes in similar form to SVR4 core dumps made
2043 	 * with info from their /proc.
2044 	 */
2045 
2046 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2047 		  sizeof(*info->prstatus), info->prstatus);
2048 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2049 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2050 		  sizeof(*info->psinfo), info->psinfo);
2051 
2052 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2053 	fill_auxv_note(info->notes + 3, current->mm);
2054 	info->numnote = 4;
2055 
2056 	if (fill_files_note(info->notes + info->numnote) == 0) {
2057 		info->notes_files = info->notes + info->numnote;
2058 		info->numnote++;
2059 	}
2060 
2061 	/* Try to dump the FPU. */
2062 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2063 							       info->fpu);
2064 	if (info->prstatus->pr_fpvalid)
2065 		fill_note(info->notes + info->numnote++,
2066 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2067 #ifdef ELF_CORE_COPY_XFPREGS
2068 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2069 		fill_note(info->notes + info->numnote++,
2070 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2071 			  sizeof(*info->xfpu), info->xfpu);
2072 #endif
2073 
2074 	return 1;
2075 }
2076 
2077 static size_t get_note_info_size(struct elf_note_info *info)
2078 {
2079 	int sz = 0;
2080 	int i;
2081 
2082 	for (i = 0; i < info->numnote; i++)
2083 		sz += notesize(info->notes + i);
2084 
2085 	sz += info->thread_status_size;
2086 
2087 	return sz;
2088 }
2089 
2090 static int write_note_info(struct elf_note_info *info,
2091 			   struct coredump_params *cprm)
2092 {
2093 	int i;
2094 	struct list_head *t;
2095 
2096 	for (i = 0; i < info->numnote; i++)
2097 		if (!writenote(info->notes + i, cprm))
2098 			return 0;
2099 
2100 	/* write out the thread status notes section */
2101 	list_for_each(t, &info->thread_list) {
2102 		struct elf_thread_status *tmp =
2103 				list_entry(t, struct elf_thread_status, list);
2104 
2105 		for (i = 0; i < tmp->num_notes; i++)
2106 			if (!writenote(&tmp->notes[i], cprm))
2107 				return 0;
2108 	}
2109 
2110 	return 1;
2111 }
2112 
2113 static void free_note_info(struct elf_note_info *info)
2114 {
2115 	while (!list_empty(&info->thread_list)) {
2116 		struct list_head *tmp = info->thread_list.next;
2117 		list_del(tmp);
2118 		kfree(list_entry(tmp, struct elf_thread_status, list));
2119 	}
2120 
2121 	/* Free data possibly allocated by fill_files_note(): */
2122 	if (info->notes_files)
2123 		vfree(info->notes_files->data);
2124 
2125 	kfree(info->prstatus);
2126 	kfree(info->psinfo);
2127 	kfree(info->notes);
2128 	kfree(info->fpu);
2129 #ifdef ELF_CORE_COPY_XFPREGS
2130 	kfree(info->xfpu);
2131 #endif
2132 }
2133 
2134 #endif
2135 
2136 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2137 					struct vm_area_struct *gate_vma)
2138 {
2139 	struct vm_area_struct *ret = tsk->mm->mmap;
2140 
2141 	if (ret)
2142 		return ret;
2143 	return gate_vma;
2144 }
2145 /*
2146  * Helper function for iterating across a vma list.  It ensures that the caller
2147  * will visit `gate_vma' prior to terminating the search.
2148  */
2149 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2150 					struct vm_area_struct *gate_vma)
2151 {
2152 	struct vm_area_struct *ret;
2153 
2154 	ret = this_vma->vm_next;
2155 	if (ret)
2156 		return ret;
2157 	if (this_vma == gate_vma)
2158 		return NULL;
2159 	return gate_vma;
2160 }
2161 
2162 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2163 			     elf_addr_t e_shoff, int segs)
2164 {
2165 	elf->e_shoff = e_shoff;
2166 	elf->e_shentsize = sizeof(*shdr4extnum);
2167 	elf->e_shnum = 1;
2168 	elf->e_shstrndx = SHN_UNDEF;
2169 
2170 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2171 
2172 	shdr4extnum->sh_type = SHT_NULL;
2173 	shdr4extnum->sh_size = elf->e_shnum;
2174 	shdr4extnum->sh_link = elf->e_shstrndx;
2175 	shdr4extnum->sh_info = segs;
2176 }
2177 
2178 /*
2179  * Actual dumper
2180  *
2181  * This is a two-pass process; first we find the offsets of the bits,
2182  * and then they are actually written out.  If we run out of core limit
2183  * we just truncate.
2184  */
2185 static int elf_core_dump(struct coredump_params *cprm)
2186 {
2187 	int has_dumped = 0;
2188 	mm_segment_t fs;
2189 	int segs, i;
2190 	size_t vma_data_size = 0;
2191 	struct vm_area_struct *vma, *gate_vma;
2192 	struct elfhdr *elf = NULL;
2193 	loff_t offset = 0, dataoff;
2194 	struct elf_note_info info = { };
2195 	struct elf_phdr *phdr4note = NULL;
2196 	struct elf_shdr *shdr4extnum = NULL;
2197 	Elf_Half e_phnum;
2198 	elf_addr_t e_shoff;
2199 	elf_addr_t *vma_filesz = NULL;
2200 
2201 	/*
2202 	 * We no longer stop all VM operations.
2203 	 *
2204 	 * This is because those proceses that could possibly change map_count
2205 	 * or the mmap / vma pages are now blocked in do_exit on current
2206 	 * finishing this core dump.
2207 	 *
2208 	 * Only ptrace can touch these memory addresses, but it doesn't change
2209 	 * the map_count or the pages allocated. So no possibility of crashing
2210 	 * exists while dumping the mm->vm_next areas to the core file.
2211 	 */
2212 
2213 	/* alloc memory for large data structures: too large to be on stack */
2214 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2215 	if (!elf)
2216 		goto out;
2217 	/*
2218 	 * The number of segs are recored into ELF header as 16bit value.
2219 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2220 	 */
2221 	segs = current->mm->map_count;
2222 	segs += elf_core_extra_phdrs();
2223 
2224 	gate_vma = get_gate_vma(current->mm);
2225 	if (gate_vma != NULL)
2226 		segs++;
2227 
2228 	/* for notes section */
2229 	segs++;
2230 
2231 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2232 	 * this, kernel supports extended numbering. Have a look at
2233 	 * include/linux/elf.h for further information. */
2234 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2235 
2236 	/*
2237 	 * Collect all the non-memory information about the process for the
2238 	 * notes.  This also sets up the file header.
2239 	 */
2240 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2241 		goto cleanup;
2242 
2243 	has_dumped = 1;
2244 
2245 	fs = get_fs();
2246 	set_fs(KERNEL_DS);
2247 
2248 	offset += sizeof(*elf);				/* Elf header */
2249 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2250 
2251 	/* Write notes phdr entry */
2252 	{
2253 		size_t sz = get_note_info_size(&info);
2254 
2255 		sz += elf_coredump_extra_notes_size();
2256 
2257 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2258 		if (!phdr4note)
2259 			goto end_coredump;
2260 
2261 		fill_elf_note_phdr(phdr4note, sz, offset);
2262 		offset += sz;
2263 	}
2264 
2265 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2266 
2267 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2268 		goto end_coredump;
2269 	vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2270 	if (!vma_filesz)
2271 		goto end_coredump;
2272 
2273 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2274 			vma = next_vma(vma, gate_vma)) {
2275 		unsigned long dump_size;
2276 
2277 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2278 		vma_filesz[i++] = dump_size;
2279 		vma_data_size += dump_size;
2280 	}
2281 
2282 	offset += vma_data_size;
2283 	offset += elf_core_extra_data_size();
2284 	e_shoff = offset;
2285 
2286 	if (e_phnum == PN_XNUM) {
2287 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2288 		if (!shdr4extnum)
2289 			goto end_coredump;
2290 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2291 	}
2292 
2293 	offset = dataoff;
2294 
2295 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2296 		goto end_coredump;
2297 
2298 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2299 		goto end_coredump;
2300 
2301 	/* Write program headers for segments dump */
2302 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2303 			vma = next_vma(vma, gate_vma)) {
2304 		struct elf_phdr phdr;
2305 
2306 		phdr.p_type = PT_LOAD;
2307 		phdr.p_offset = offset;
2308 		phdr.p_vaddr = vma->vm_start;
2309 		phdr.p_paddr = 0;
2310 		phdr.p_filesz = vma_filesz[i++];
2311 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2312 		offset += phdr.p_filesz;
2313 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2314 		if (vma->vm_flags & VM_WRITE)
2315 			phdr.p_flags |= PF_W;
2316 		if (vma->vm_flags & VM_EXEC)
2317 			phdr.p_flags |= PF_X;
2318 		phdr.p_align = ELF_EXEC_PAGESIZE;
2319 
2320 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2321 			goto end_coredump;
2322 	}
2323 
2324 	if (!elf_core_write_extra_phdrs(cprm, offset))
2325 		goto end_coredump;
2326 
2327  	/* write out the notes section */
2328 	if (!write_note_info(&info, cprm))
2329 		goto end_coredump;
2330 
2331 	if (elf_coredump_extra_notes_write(cprm))
2332 		goto end_coredump;
2333 
2334 	/* Align to page */
2335 	if (!dump_skip(cprm, dataoff - cprm->pos))
2336 		goto end_coredump;
2337 
2338 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2339 			vma = next_vma(vma, gate_vma)) {
2340 		unsigned long addr;
2341 		unsigned long end;
2342 
2343 		end = vma->vm_start + vma_filesz[i++];
2344 
2345 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2346 			struct page *page;
2347 			int stop;
2348 
2349 			page = get_dump_page(addr);
2350 			if (page) {
2351 				void *kaddr = kmap(page);
2352 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2353 				kunmap(page);
2354 				put_page(page);
2355 			} else
2356 				stop = !dump_skip(cprm, PAGE_SIZE);
2357 			if (stop)
2358 				goto end_coredump;
2359 		}
2360 	}
2361 	dump_truncate(cprm);
2362 
2363 	if (!elf_core_write_extra_data(cprm))
2364 		goto end_coredump;
2365 
2366 	if (e_phnum == PN_XNUM) {
2367 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2368 			goto end_coredump;
2369 	}
2370 
2371 end_coredump:
2372 	set_fs(fs);
2373 
2374 cleanup:
2375 	free_note_info(&info);
2376 	kfree(shdr4extnum);
2377 	vfree(vma_filesz);
2378 	kfree(phdr4note);
2379 	kfree(elf);
2380 out:
2381 	return has_dumped;
2382 }
2383 
2384 #endif		/* CONFIG_ELF_CORE */
2385 
2386 static int __init init_elf_binfmt(void)
2387 {
2388 	register_binfmt(&elf_format);
2389 	return 0;
2390 }
2391 
2392 static void __exit exit_elf_binfmt(void)
2393 {
2394 	/* Remove the COFF and ELF loaders. */
2395 	unregister_binfmt(&elf_format);
2396 }
2397 
2398 core_initcall(init_elf_binfmt);
2399 module_exit(exit_elf_binfmt);
2400 MODULE_LICENSE("GPL");
2401