xref: /openbmc/linux/fs/binfmt_elf.c (revision 6aa7de05)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/sched/task_stack.h>
40 #include <linux/sched/cputime.h>
41 #include <linux/cred.h>
42 #include <linux/dax.h>
43 #include <linux/uaccess.h>
44 #include <asm/param.h>
45 #include <asm/page.h>
46 
47 #ifndef user_long_t
48 #define user_long_t long
49 #endif
50 #ifndef user_siginfo_t
51 #define user_siginfo_t siginfo_t
52 #endif
53 
54 static int load_elf_binary(struct linux_binprm *bprm);
55 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
56 				int, int, unsigned long);
57 
58 #ifdef CONFIG_USELIB
59 static int load_elf_library(struct file *);
60 #else
61 #define load_elf_library NULL
62 #endif
63 
64 /*
65  * If we don't support core dumping, then supply a NULL so we
66  * don't even try.
67  */
68 #ifdef CONFIG_ELF_CORE
69 static int elf_core_dump(struct coredump_params *cprm);
70 #else
71 #define elf_core_dump	NULL
72 #endif
73 
74 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
75 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
76 #else
77 #define ELF_MIN_ALIGN	PAGE_SIZE
78 #endif
79 
80 #ifndef ELF_CORE_EFLAGS
81 #define ELF_CORE_EFLAGS	0
82 #endif
83 
84 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
85 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
86 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
87 
88 static struct linux_binfmt elf_format = {
89 	.module		= THIS_MODULE,
90 	.load_binary	= load_elf_binary,
91 	.load_shlib	= load_elf_library,
92 	.core_dump	= elf_core_dump,
93 	.min_coredump	= ELF_EXEC_PAGESIZE,
94 };
95 
96 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
97 
98 static int set_brk(unsigned long start, unsigned long end, int prot)
99 {
100 	start = ELF_PAGEALIGN(start);
101 	end = ELF_PAGEALIGN(end);
102 	if (end > start) {
103 		/*
104 		 * Map the last of the bss segment.
105 		 * If the header is requesting these pages to be
106 		 * executable, honour that (ppc32 needs this).
107 		 */
108 		int error = vm_brk_flags(start, end - start,
109 				prot & PROT_EXEC ? VM_EXEC : 0);
110 		if (error)
111 			return error;
112 	}
113 	current->mm->start_brk = current->mm->brk = end;
114 	return 0;
115 }
116 
117 /* We need to explicitly zero any fractional pages
118    after the data section (i.e. bss).  This would
119    contain the junk from the file that should not
120    be in memory
121  */
122 static int padzero(unsigned long elf_bss)
123 {
124 	unsigned long nbyte;
125 
126 	nbyte = ELF_PAGEOFFSET(elf_bss);
127 	if (nbyte) {
128 		nbyte = ELF_MIN_ALIGN - nbyte;
129 		if (clear_user((void __user *) elf_bss, nbyte))
130 			return -EFAULT;
131 	}
132 	return 0;
133 }
134 
135 /* Let's use some macros to make this stack manipulation a little clearer */
136 #ifdef CONFIG_STACK_GROWSUP
137 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
138 #define STACK_ROUND(sp, items) \
139 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
140 #define STACK_ALLOC(sp, len) ({ \
141 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
142 	old_sp; })
143 #else
144 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
145 #define STACK_ROUND(sp, items) \
146 	(((unsigned long) (sp - items)) &~ 15UL)
147 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
148 #endif
149 
150 #ifndef ELF_BASE_PLATFORM
151 /*
152  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
153  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
154  * will be copied to the user stack in the same manner as AT_PLATFORM.
155  */
156 #define ELF_BASE_PLATFORM NULL
157 #endif
158 
159 static int
160 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
161 		unsigned long load_addr, unsigned long interp_load_addr)
162 {
163 	unsigned long p = bprm->p;
164 	int argc = bprm->argc;
165 	int envc = bprm->envc;
166 	elf_addr_t __user *sp;
167 	elf_addr_t __user *u_platform;
168 	elf_addr_t __user *u_base_platform;
169 	elf_addr_t __user *u_rand_bytes;
170 	const char *k_platform = ELF_PLATFORM;
171 	const char *k_base_platform = ELF_BASE_PLATFORM;
172 	unsigned char k_rand_bytes[16];
173 	int items;
174 	elf_addr_t *elf_info;
175 	int ei_index = 0;
176 	const struct cred *cred = current_cred();
177 	struct vm_area_struct *vma;
178 
179 	/*
180 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
181 	 * evictions by the processes running on the same package. One
182 	 * thing we can do is to shuffle the initial stack for them.
183 	 */
184 
185 	p = arch_align_stack(p);
186 
187 	/*
188 	 * If this architecture has a platform capability string, copy it
189 	 * to userspace.  In some cases (Sparc), this info is impossible
190 	 * for userspace to get any other way, in others (i386) it is
191 	 * merely difficult.
192 	 */
193 	u_platform = NULL;
194 	if (k_platform) {
195 		size_t len = strlen(k_platform) + 1;
196 
197 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
198 		if (__copy_to_user(u_platform, k_platform, len))
199 			return -EFAULT;
200 	}
201 
202 	/*
203 	 * If this architecture has a "base" platform capability
204 	 * string, copy it to userspace.
205 	 */
206 	u_base_platform = NULL;
207 	if (k_base_platform) {
208 		size_t len = strlen(k_base_platform) + 1;
209 
210 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
211 		if (__copy_to_user(u_base_platform, k_base_platform, len))
212 			return -EFAULT;
213 	}
214 
215 	/*
216 	 * Generate 16 random bytes for userspace PRNG seeding.
217 	 */
218 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
219 	u_rand_bytes = (elf_addr_t __user *)
220 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
221 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
222 		return -EFAULT;
223 
224 	/* Create the ELF interpreter info */
225 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
226 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
227 #define NEW_AUX_ENT(id, val) \
228 	do { \
229 		elf_info[ei_index++] = id; \
230 		elf_info[ei_index++] = val; \
231 	} while (0)
232 
233 #ifdef ARCH_DLINFO
234 	/*
235 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
236 	 * AUXV.
237 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
238 	 * ARCH_DLINFO changes
239 	 */
240 	ARCH_DLINFO;
241 #endif
242 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
243 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
244 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
245 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
246 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
247 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
248 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
249 	NEW_AUX_ENT(AT_FLAGS, 0);
250 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
251 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
252 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
253 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
254 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
255 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
256 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
257 #ifdef ELF_HWCAP2
258 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
259 #endif
260 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
261 	if (k_platform) {
262 		NEW_AUX_ENT(AT_PLATFORM,
263 			    (elf_addr_t)(unsigned long)u_platform);
264 	}
265 	if (k_base_platform) {
266 		NEW_AUX_ENT(AT_BASE_PLATFORM,
267 			    (elf_addr_t)(unsigned long)u_base_platform);
268 	}
269 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
270 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
271 	}
272 #undef NEW_AUX_ENT
273 	/* AT_NULL is zero; clear the rest too */
274 	memset(&elf_info[ei_index], 0,
275 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
276 
277 	/* And advance past the AT_NULL entry.  */
278 	ei_index += 2;
279 
280 	sp = STACK_ADD(p, ei_index);
281 
282 	items = (argc + 1) + (envc + 1) + 1;
283 	bprm->p = STACK_ROUND(sp, items);
284 
285 	/* Point sp at the lowest address on the stack */
286 #ifdef CONFIG_STACK_GROWSUP
287 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
288 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
289 #else
290 	sp = (elf_addr_t __user *)bprm->p;
291 #endif
292 
293 
294 	/*
295 	 * Grow the stack manually; some architectures have a limit on how
296 	 * far ahead a user-space access may be in order to grow the stack.
297 	 */
298 	vma = find_extend_vma(current->mm, bprm->p);
299 	if (!vma)
300 		return -EFAULT;
301 
302 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
303 	if (__put_user(argc, sp++))
304 		return -EFAULT;
305 
306 	/* Populate list of argv pointers back to argv strings. */
307 	p = current->mm->arg_end = current->mm->arg_start;
308 	while (argc-- > 0) {
309 		size_t len;
310 		if (__put_user((elf_addr_t)p, sp++))
311 			return -EFAULT;
312 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 		if (!len || len > MAX_ARG_STRLEN)
314 			return -EINVAL;
315 		p += len;
316 	}
317 	if (__put_user(0, sp++))
318 		return -EFAULT;
319 	current->mm->arg_end = p;
320 
321 	/* Populate list of envp pointers back to envp strings. */
322 	current->mm->env_end = current->mm->env_start = p;
323 	while (envc-- > 0) {
324 		size_t len;
325 		if (__put_user((elf_addr_t)p, sp++))
326 			return -EFAULT;
327 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 		if (!len || len > MAX_ARG_STRLEN)
329 			return -EINVAL;
330 		p += len;
331 	}
332 	if (__put_user(0, sp++))
333 		return -EFAULT;
334 	current->mm->env_end = p;
335 
336 	/* Put the elf_info on the stack in the right place.  */
337 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
338 		return -EFAULT;
339 	return 0;
340 }
341 
342 #ifndef elf_map
343 
344 static unsigned long elf_map(struct file *filep, unsigned long addr,
345 		struct elf_phdr *eppnt, int prot, int type,
346 		unsigned long total_size)
347 {
348 	unsigned long map_addr;
349 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
350 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
351 	addr = ELF_PAGESTART(addr);
352 	size = ELF_PAGEALIGN(size);
353 
354 	/* mmap() will return -EINVAL if given a zero size, but a
355 	 * segment with zero filesize is perfectly valid */
356 	if (!size)
357 		return addr;
358 
359 	/*
360 	* total_size is the size of the ELF (interpreter) image.
361 	* The _first_ mmap needs to know the full size, otherwise
362 	* randomization might put this image into an overlapping
363 	* position with the ELF binary image. (since size < total_size)
364 	* So we first map the 'big' image - and unmap the remainder at
365 	* the end. (which unmap is needed for ELF images with holes.)
366 	*/
367 	if (total_size) {
368 		total_size = ELF_PAGEALIGN(total_size);
369 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
370 		if (!BAD_ADDR(map_addr))
371 			vm_munmap(map_addr+size, total_size-size);
372 	} else
373 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
374 
375 	return(map_addr);
376 }
377 
378 #endif /* !elf_map */
379 
380 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
381 {
382 	int i, first_idx = -1, last_idx = -1;
383 
384 	for (i = 0; i < nr; i++) {
385 		if (cmds[i].p_type == PT_LOAD) {
386 			last_idx = i;
387 			if (first_idx == -1)
388 				first_idx = i;
389 		}
390 	}
391 	if (first_idx == -1)
392 		return 0;
393 
394 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
395 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
396 }
397 
398 /**
399  * load_elf_phdrs() - load ELF program headers
400  * @elf_ex:   ELF header of the binary whose program headers should be loaded
401  * @elf_file: the opened ELF binary file
402  *
403  * Loads ELF program headers from the binary file elf_file, which has the ELF
404  * header pointed to by elf_ex, into a newly allocated array. The caller is
405  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
406  */
407 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
408 				       struct file *elf_file)
409 {
410 	struct elf_phdr *elf_phdata = NULL;
411 	int retval, size, err = -1;
412 	loff_t pos = elf_ex->e_phoff;
413 
414 	/*
415 	 * If the size of this structure has changed, then punt, since
416 	 * we will be doing the wrong thing.
417 	 */
418 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
419 		goto out;
420 
421 	/* Sanity check the number of program headers... */
422 	if (elf_ex->e_phnum < 1 ||
423 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
424 		goto out;
425 
426 	/* ...and their total size. */
427 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
428 	if (size > ELF_MIN_ALIGN)
429 		goto out;
430 
431 	elf_phdata = kmalloc(size, GFP_KERNEL);
432 	if (!elf_phdata)
433 		goto out;
434 
435 	/* Read in the program headers */
436 	retval = kernel_read(elf_file, elf_phdata, size, &pos);
437 	if (retval != size) {
438 		err = (retval < 0) ? retval : -EIO;
439 		goto out;
440 	}
441 
442 	/* Success! */
443 	err = 0;
444 out:
445 	if (err) {
446 		kfree(elf_phdata);
447 		elf_phdata = NULL;
448 	}
449 	return elf_phdata;
450 }
451 
452 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
453 
454 /**
455  * struct arch_elf_state - arch-specific ELF loading state
456  *
457  * This structure is used to preserve architecture specific data during
458  * the loading of an ELF file, throughout the checking of architecture
459  * specific ELF headers & through to the point where the ELF load is
460  * known to be proceeding (ie. SET_PERSONALITY).
461  *
462  * This implementation is a dummy for architectures which require no
463  * specific state.
464  */
465 struct arch_elf_state {
466 };
467 
468 #define INIT_ARCH_ELF_STATE {}
469 
470 /**
471  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
472  * @ehdr:	The main ELF header
473  * @phdr:	The program header to check
474  * @elf:	The open ELF file
475  * @is_interp:	True if the phdr is from the interpreter of the ELF being
476  *		loaded, else false.
477  * @state:	Architecture-specific state preserved throughout the process
478  *		of loading the ELF.
479  *
480  * Inspects the program header phdr to validate its correctness and/or
481  * suitability for the system. Called once per ELF program header in the
482  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
483  * interpreter.
484  *
485  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
486  *         with that return code.
487  */
488 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
489 				   struct elf_phdr *phdr,
490 				   struct file *elf, bool is_interp,
491 				   struct arch_elf_state *state)
492 {
493 	/* Dummy implementation, always proceed */
494 	return 0;
495 }
496 
497 /**
498  * arch_check_elf() - check an ELF executable
499  * @ehdr:	The main ELF header
500  * @has_interp:	True if the ELF has an interpreter, else false.
501  * @interp_ehdr: The interpreter's ELF header
502  * @state:	Architecture-specific state preserved throughout the process
503  *		of loading the ELF.
504  *
505  * Provides a final opportunity for architecture code to reject the loading
506  * of the ELF & cause an exec syscall to return an error. This is called after
507  * all program headers to be checked by arch_elf_pt_proc have been.
508  *
509  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
510  *         with that return code.
511  */
512 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
513 				 struct elfhdr *interp_ehdr,
514 				 struct arch_elf_state *state)
515 {
516 	/* Dummy implementation, always proceed */
517 	return 0;
518 }
519 
520 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
521 
522 /* This is much more generalized than the library routine read function,
523    so we keep this separate.  Technically the library read function
524    is only provided so that we can read a.out libraries that have
525    an ELF header */
526 
527 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
528 		struct file *interpreter, unsigned long *interp_map_addr,
529 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
530 {
531 	struct elf_phdr *eppnt;
532 	unsigned long load_addr = 0;
533 	int load_addr_set = 0;
534 	unsigned long last_bss = 0, elf_bss = 0;
535 	int bss_prot = 0;
536 	unsigned long error = ~0UL;
537 	unsigned long total_size;
538 	int i;
539 
540 	/* First of all, some simple consistency checks */
541 	if (interp_elf_ex->e_type != ET_EXEC &&
542 	    interp_elf_ex->e_type != ET_DYN)
543 		goto out;
544 	if (!elf_check_arch(interp_elf_ex))
545 		goto out;
546 	if (!interpreter->f_op->mmap)
547 		goto out;
548 
549 	total_size = total_mapping_size(interp_elf_phdata,
550 					interp_elf_ex->e_phnum);
551 	if (!total_size) {
552 		error = -EINVAL;
553 		goto out;
554 	}
555 
556 	eppnt = interp_elf_phdata;
557 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
558 		if (eppnt->p_type == PT_LOAD) {
559 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
560 			int elf_prot = 0;
561 			unsigned long vaddr = 0;
562 			unsigned long k, map_addr;
563 
564 			if (eppnt->p_flags & PF_R)
565 		    		elf_prot = PROT_READ;
566 			if (eppnt->p_flags & PF_W)
567 				elf_prot |= PROT_WRITE;
568 			if (eppnt->p_flags & PF_X)
569 				elf_prot |= PROT_EXEC;
570 			vaddr = eppnt->p_vaddr;
571 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
572 				elf_type |= MAP_FIXED;
573 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
574 				load_addr = -vaddr;
575 
576 			map_addr = elf_map(interpreter, load_addr + vaddr,
577 					eppnt, elf_prot, elf_type, total_size);
578 			total_size = 0;
579 			if (!*interp_map_addr)
580 				*interp_map_addr = map_addr;
581 			error = map_addr;
582 			if (BAD_ADDR(map_addr))
583 				goto out;
584 
585 			if (!load_addr_set &&
586 			    interp_elf_ex->e_type == ET_DYN) {
587 				load_addr = map_addr - ELF_PAGESTART(vaddr);
588 				load_addr_set = 1;
589 			}
590 
591 			/*
592 			 * Check to see if the section's size will overflow the
593 			 * allowed task size. Note that p_filesz must always be
594 			 * <= p_memsize so it's only necessary to check p_memsz.
595 			 */
596 			k = load_addr + eppnt->p_vaddr;
597 			if (BAD_ADDR(k) ||
598 			    eppnt->p_filesz > eppnt->p_memsz ||
599 			    eppnt->p_memsz > TASK_SIZE ||
600 			    TASK_SIZE - eppnt->p_memsz < k) {
601 				error = -ENOMEM;
602 				goto out;
603 			}
604 
605 			/*
606 			 * Find the end of the file mapping for this phdr, and
607 			 * keep track of the largest address we see for this.
608 			 */
609 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
610 			if (k > elf_bss)
611 				elf_bss = k;
612 
613 			/*
614 			 * Do the same thing for the memory mapping - between
615 			 * elf_bss and last_bss is the bss section.
616 			 */
617 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
618 			if (k > last_bss) {
619 				last_bss = k;
620 				bss_prot = elf_prot;
621 			}
622 		}
623 	}
624 
625 	/*
626 	 * Now fill out the bss section: first pad the last page from
627 	 * the file up to the page boundary, and zero it from elf_bss
628 	 * up to the end of the page.
629 	 */
630 	if (padzero(elf_bss)) {
631 		error = -EFAULT;
632 		goto out;
633 	}
634 	/*
635 	 * Next, align both the file and mem bss up to the page size,
636 	 * since this is where elf_bss was just zeroed up to, and where
637 	 * last_bss will end after the vm_brk_flags() below.
638 	 */
639 	elf_bss = ELF_PAGEALIGN(elf_bss);
640 	last_bss = ELF_PAGEALIGN(last_bss);
641 	/* Finally, if there is still more bss to allocate, do it. */
642 	if (last_bss > elf_bss) {
643 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
644 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
645 		if (error)
646 			goto out;
647 	}
648 
649 	error = load_addr;
650 out:
651 	return error;
652 }
653 
654 /*
655  * These are the functions used to load ELF style executables and shared
656  * libraries.  There is no binary dependent code anywhere else.
657  */
658 
659 #ifndef STACK_RND_MASK
660 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
661 #endif
662 
663 static unsigned long randomize_stack_top(unsigned long stack_top)
664 {
665 	unsigned long random_variable = 0;
666 
667 	if (current->flags & PF_RANDOMIZE) {
668 		random_variable = get_random_long();
669 		random_variable &= STACK_RND_MASK;
670 		random_variable <<= PAGE_SHIFT;
671 	}
672 #ifdef CONFIG_STACK_GROWSUP
673 	return PAGE_ALIGN(stack_top) + random_variable;
674 #else
675 	return PAGE_ALIGN(stack_top) - random_variable;
676 #endif
677 }
678 
679 static int load_elf_binary(struct linux_binprm *bprm)
680 {
681 	struct file *interpreter = NULL; /* to shut gcc up */
682  	unsigned long load_addr = 0, load_bias = 0;
683 	int load_addr_set = 0;
684 	char * elf_interpreter = NULL;
685 	unsigned long error;
686 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
687 	unsigned long elf_bss, elf_brk;
688 	int bss_prot = 0;
689 	int retval, i;
690 	unsigned long elf_entry;
691 	unsigned long interp_load_addr = 0;
692 	unsigned long start_code, end_code, start_data, end_data;
693 	unsigned long reloc_func_desc __maybe_unused = 0;
694 	int executable_stack = EXSTACK_DEFAULT;
695 	struct pt_regs *regs = current_pt_regs();
696 	struct {
697 		struct elfhdr elf_ex;
698 		struct elfhdr interp_elf_ex;
699 	} *loc;
700 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
701 	loff_t pos;
702 
703 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
704 	if (!loc) {
705 		retval = -ENOMEM;
706 		goto out_ret;
707 	}
708 
709 	/* Get the exec-header */
710 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
711 
712 	retval = -ENOEXEC;
713 	/* First of all, some simple consistency checks */
714 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 		goto out;
716 
717 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
718 		goto out;
719 	if (!elf_check_arch(&loc->elf_ex))
720 		goto out;
721 	if (!bprm->file->f_op->mmap)
722 		goto out;
723 
724 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
725 	if (!elf_phdata)
726 		goto out;
727 
728 	elf_ppnt = elf_phdata;
729 	elf_bss = 0;
730 	elf_brk = 0;
731 
732 	start_code = ~0UL;
733 	end_code = 0;
734 	start_data = 0;
735 	end_data = 0;
736 
737 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
738 		if (elf_ppnt->p_type == PT_INTERP) {
739 			/* This is the program interpreter used for
740 			 * shared libraries - for now assume that this
741 			 * is an a.out format binary
742 			 */
743 			retval = -ENOEXEC;
744 			if (elf_ppnt->p_filesz > PATH_MAX ||
745 			    elf_ppnt->p_filesz < 2)
746 				goto out_free_ph;
747 
748 			retval = -ENOMEM;
749 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
750 						  GFP_KERNEL);
751 			if (!elf_interpreter)
752 				goto out_free_ph;
753 
754 			pos = elf_ppnt->p_offset;
755 			retval = kernel_read(bprm->file, elf_interpreter,
756 					     elf_ppnt->p_filesz, &pos);
757 			if (retval != elf_ppnt->p_filesz) {
758 				if (retval >= 0)
759 					retval = -EIO;
760 				goto out_free_interp;
761 			}
762 			/* make sure path is NULL terminated */
763 			retval = -ENOEXEC;
764 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
765 				goto out_free_interp;
766 
767 			interpreter = open_exec(elf_interpreter);
768 			retval = PTR_ERR(interpreter);
769 			if (IS_ERR(interpreter))
770 				goto out_free_interp;
771 
772 			/*
773 			 * If the binary is not readable then enforce
774 			 * mm->dumpable = 0 regardless of the interpreter's
775 			 * permissions.
776 			 */
777 			would_dump(bprm, interpreter);
778 
779 			/* Get the exec headers */
780 			pos = 0;
781 			retval = kernel_read(interpreter, &loc->interp_elf_ex,
782 					     sizeof(loc->interp_elf_ex), &pos);
783 			if (retval != sizeof(loc->interp_elf_ex)) {
784 				if (retval >= 0)
785 					retval = -EIO;
786 				goto out_free_dentry;
787 			}
788 
789 			break;
790 		}
791 		elf_ppnt++;
792 	}
793 
794 	elf_ppnt = elf_phdata;
795 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
796 		switch (elf_ppnt->p_type) {
797 		case PT_GNU_STACK:
798 			if (elf_ppnt->p_flags & PF_X)
799 				executable_stack = EXSTACK_ENABLE_X;
800 			else
801 				executable_stack = EXSTACK_DISABLE_X;
802 			break;
803 
804 		case PT_LOPROC ... PT_HIPROC:
805 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
806 						  bprm->file, false,
807 						  &arch_state);
808 			if (retval)
809 				goto out_free_dentry;
810 			break;
811 		}
812 
813 	/* Some simple consistency checks for the interpreter */
814 	if (elf_interpreter) {
815 		retval = -ELIBBAD;
816 		/* Not an ELF interpreter */
817 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
818 			goto out_free_dentry;
819 		/* Verify the interpreter has a valid arch */
820 		if (!elf_check_arch(&loc->interp_elf_ex))
821 			goto out_free_dentry;
822 
823 		/* Load the interpreter program headers */
824 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
825 						   interpreter);
826 		if (!interp_elf_phdata)
827 			goto out_free_dentry;
828 
829 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
830 		elf_ppnt = interp_elf_phdata;
831 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
832 			switch (elf_ppnt->p_type) {
833 			case PT_LOPROC ... PT_HIPROC:
834 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
835 							  elf_ppnt, interpreter,
836 							  true, &arch_state);
837 				if (retval)
838 					goto out_free_dentry;
839 				break;
840 			}
841 	}
842 
843 	/*
844 	 * Allow arch code to reject the ELF at this point, whilst it's
845 	 * still possible to return an error to the code that invoked
846 	 * the exec syscall.
847 	 */
848 	retval = arch_check_elf(&loc->elf_ex,
849 				!!interpreter, &loc->interp_elf_ex,
850 				&arch_state);
851 	if (retval)
852 		goto out_free_dentry;
853 
854 	/* Flush all traces of the currently running executable */
855 	retval = flush_old_exec(bprm);
856 	if (retval)
857 		goto out_free_dentry;
858 
859 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
860 	   may depend on the personality.  */
861 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
862 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
863 		current->personality |= READ_IMPLIES_EXEC;
864 
865 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
866 		current->flags |= PF_RANDOMIZE;
867 
868 	setup_new_exec(bprm);
869 	install_exec_creds(bprm);
870 
871 	/* Do this so that we can load the interpreter, if need be.  We will
872 	   change some of these later */
873 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
874 				 executable_stack);
875 	if (retval < 0)
876 		goto out_free_dentry;
877 
878 	current->mm->start_stack = bprm->p;
879 
880 	/* Now we do a little grungy work by mmapping the ELF image into
881 	   the correct location in memory. */
882 	for(i = 0, elf_ppnt = elf_phdata;
883 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
884 		int elf_prot = 0, elf_flags;
885 		unsigned long k, vaddr;
886 		unsigned long total_size = 0;
887 
888 		if (elf_ppnt->p_type != PT_LOAD)
889 			continue;
890 
891 		if (unlikely (elf_brk > elf_bss)) {
892 			unsigned long nbyte;
893 
894 			/* There was a PT_LOAD segment with p_memsz > p_filesz
895 			   before this one. Map anonymous pages, if needed,
896 			   and clear the area.  */
897 			retval = set_brk(elf_bss + load_bias,
898 					 elf_brk + load_bias,
899 					 bss_prot);
900 			if (retval)
901 				goto out_free_dentry;
902 			nbyte = ELF_PAGEOFFSET(elf_bss);
903 			if (nbyte) {
904 				nbyte = ELF_MIN_ALIGN - nbyte;
905 				if (nbyte > elf_brk - elf_bss)
906 					nbyte = elf_brk - elf_bss;
907 				if (clear_user((void __user *)elf_bss +
908 							load_bias, nbyte)) {
909 					/*
910 					 * This bss-zeroing can fail if the ELF
911 					 * file specifies odd protections. So
912 					 * we don't check the return value
913 					 */
914 				}
915 			}
916 		}
917 
918 		if (elf_ppnt->p_flags & PF_R)
919 			elf_prot |= PROT_READ;
920 		if (elf_ppnt->p_flags & PF_W)
921 			elf_prot |= PROT_WRITE;
922 		if (elf_ppnt->p_flags & PF_X)
923 			elf_prot |= PROT_EXEC;
924 
925 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
926 
927 		vaddr = elf_ppnt->p_vaddr;
928 		/*
929 		 * If we are loading ET_EXEC or we have already performed
930 		 * the ET_DYN load_addr calculations, proceed normally.
931 		 */
932 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
933 			elf_flags |= MAP_FIXED;
934 		} else if (loc->elf_ex.e_type == ET_DYN) {
935 			/*
936 			 * This logic is run once for the first LOAD Program
937 			 * Header for ET_DYN binaries to calculate the
938 			 * randomization (load_bias) for all the LOAD
939 			 * Program Headers, and to calculate the entire
940 			 * size of the ELF mapping (total_size). (Note that
941 			 * load_addr_set is set to true later once the
942 			 * initial mapping is performed.)
943 			 *
944 			 * There are effectively two types of ET_DYN
945 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
946 			 * and loaders (ET_DYN without INTERP, since they
947 			 * _are_ the ELF interpreter). The loaders must
948 			 * be loaded away from programs since the program
949 			 * may otherwise collide with the loader (especially
950 			 * for ET_EXEC which does not have a randomized
951 			 * position). For example to handle invocations of
952 			 * "./ld.so someprog" to test out a new version of
953 			 * the loader, the subsequent program that the
954 			 * loader loads must avoid the loader itself, so
955 			 * they cannot share the same load range. Sufficient
956 			 * room for the brk must be allocated with the
957 			 * loader as well, since brk must be available with
958 			 * the loader.
959 			 *
960 			 * Therefore, programs are loaded offset from
961 			 * ELF_ET_DYN_BASE and loaders are loaded into the
962 			 * independently randomized mmap region (0 load_bias
963 			 * without MAP_FIXED).
964 			 */
965 			if (elf_interpreter) {
966 				load_bias = ELF_ET_DYN_BASE;
967 				if (current->flags & PF_RANDOMIZE)
968 					load_bias += arch_mmap_rnd();
969 				elf_flags |= MAP_FIXED;
970 			} else
971 				load_bias = 0;
972 
973 			/*
974 			 * Since load_bias is used for all subsequent loading
975 			 * calculations, we must lower it by the first vaddr
976 			 * so that the remaining calculations based on the
977 			 * ELF vaddrs will be correctly offset. The result
978 			 * is then page aligned.
979 			 */
980 			load_bias = ELF_PAGESTART(load_bias - vaddr);
981 
982 			total_size = total_mapping_size(elf_phdata,
983 							loc->elf_ex.e_phnum);
984 			if (!total_size) {
985 				retval = -EINVAL;
986 				goto out_free_dentry;
987 			}
988 		}
989 
990 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
991 				elf_prot, elf_flags, total_size);
992 		if (BAD_ADDR(error)) {
993 			retval = IS_ERR((void *)error) ?
994 				PTR_ERR((void*)error) : -EINVAL;
995 			goto out_free_dentry;
996 		}
997 
998 		if (!load_addr_set) {
999 			load_addr_set = 1;
1000 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1001 			if (loc->elf_ex.e_type == ET_DYN) {
1002 				load_bias += error -
1003 				             ELF_PAGESTART(load_bias + vaddr);
1004 				load_addr += load_bias;
1005 				reloc_func_desc = load_bias;
1006 			}
1007 		}
1008 		k = elf_ppnt->p_vaddr;
1009 		if (k < start_code)
1010 			start_code = k;
1011 		if (start_data < k)
1012 			start_data = k;
1013 
1014 		/*
1015 		 * Check to see if the section's size will overflow the
1016 		 * allowed task size. Note that p_filesz must always be
1017 		 * <= p_memsz so it is only necessary to check p_memsz.
1018 		 */
1019 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1020 		    elf_ppnt->p_memsz > TASK_SIZE ||
1021 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1022 			/* set_brk can never work. Avoid overflows. */
1023 			retval = -EINVAL;
1024 			goto out_free_dentry;
1025 		}
1026 
1027 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1028 
1029 		if (k > elf_bss)
1030 			elf_bss = k;
1031 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1032 			end_code = k;
1033 		if (end_data < k)
1034 			end_data = k;
1035 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1036 		if (k > elf_brk) {
1037 			bss_prot = elf_prot;
1038 			elf_brk = k;
1039 		}
1040 	}
1041 
1042 	loc->elf_ex.e_entry += load_bias;
1043 	elf_bss += load_bias;
1044 	elf_brk += load_bias;
1045 	start_code += load_bias;
1046 	end_code += load_bias;
1047 	start_data += load_bias;
1048 	end_data += load_bias;
1049 
1050 	/* Calling set_brk effectively mmaps the pages that we need
1051 	 * for the bss and break sections.  We must do this before
1052 	 * mapping in the interpreter, to make sure it doesn't wind
1053 	 * up getting placed where the bss needs to go.
1054 	 */
1055 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1056 	if (retval)
1057 		goto out_free_dentry;
1058 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1059 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1060 		goto out_free_dentry;
1061 	}
1062 
1063 	if (elf_interpreter) {
1064 		unsigned long interp_map_addr = 0;
1065 
1066 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1067 					    interpreter,
1068 					    &interp_map_addr,
1069 					    load_bias, interp_elf_phdata);
1070 		if (!IS_ERR((void *)elf_entry)) {
1071 			/*
1072 			 * load_elf_interp() returns relocation
1073 			 * adjustment
1074 			 */
1075 			interp_load_addr = elf_entry;
1076 			elf_entry += loc->interp_elf_ex.e_entry;
1077 		}
1078 		if (BAD_ADDR(elf_entry)) {
1079 			retval = IS_ERR((void *)elf_entry) ?
1080 					(int)elf_entry : -EINVAL;
1081 			goto out_free_dentry;
1082 		}
1083 		reloc_func_desc = interp_load_addr;
1084 
1085 		allow_write_access(interpreter);
1086 		fput(interpreter);
1087 		kfree(elf_interpreter);
1088 	} else {
1089 		elf_entry = loc->elf_ex.e_entry;
1090 		if (BAD_ADDR(elf_entry)) {
1091 			retval = -EINVAL;
1092 			goto out_free_dentry;
1093 		}
1094 	}
1095 
1096 	kfree(interp_elf_phdata);
1097 	kfree(elf_phdata);
1098 
1099 	set_binfmt(&elf_format);
1100 
1101 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1102 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1103 	if (retval < 0)
1104 		goto out;
1105 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1106 
1107 	retval = create_elf_tables(bprm, &loc->elf_ex,
1108 			  load_addr, interp_load_addr);
1109 	if (retval < 0)
1110 		goto out;
1111 	/* N.B. passed_fileno might not be initialized? */
1112 	current->mm->end_code = end_code;
1113 	current->mm->start_code = start_code;
1114 	current->mm->start_data = start_data;
1115 	current->mm->end_data = end_data;
1116 	current->mm->start_stack = bprm->p;
1117 
1118 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1119 		current->mm->brk = current->mm->start_brk =
1120 			arch_randomize_brk(current->mm);
1121 #ifdef compat_brk_randomized
1122 		current->brk_randomized = 1;
1123 #endif
1124 	}
1125 
1126 	if (current->personality & MMAP_PAGE_ZERO) {
1127 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1128 		   and some applications "depend" upon this behavior.
1129 		   Since we do not have the power to recompile these, we
1130 		   emulate the SVr4 behavior. Sigh. */
1131 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1132 				MAP_FIXED | MAP_PRIVATE, 0);
1133 	}
1134 
1135 #ifdef ELF_PLAT_INIT
1136 	/*
1137 	 * The ABI may specify that certain registers be set up in special
1138 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1139 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1140 	 * that the e_entry field is the address of the function descriptor
1141 	 * for the startup routine, rather than the address of the startup
1142 	 * routine itself.  This macro performs whatever initialization to
1143 	 * the regs structure is required as well as any relocations to the
1144 	 * function descriptor entries when executing dynamically links apps.
1145 	 */
1146 	ELF_PLAT_INIT(regs, reloc_func_desc);
1147 #endif
1148 
1149 	start_thread(regs, elf_entry, bprm->p);
1150 	retval = 0;
1151 out:
1152 	kfree(loc);
1153 out_ret:
1154 	return retval;
1155 
1156 	/* error cleanup */
1157 out_free_dentry:
1158 	kfree(interp_elf_phdata);
1159 	allow_write_access(interpreter);
1160 	if (interpreter)
1161 		fput(interpreter);
1162 out_free_interp:
1163 	kfree(elf_interpreter);
1164 out_free_ph:
1165 	kfree(elf_phdata);
1166 	goto out;
1167 }
1168 
1169 #ifdef CONFIG_USELIB
1170 /* This is really simpleminded and specialized - we are loading an
1171    a.out library that is given an ELF header. */
1172 static int load_elf_library(struct file *file)
1173 {
1174 	struct elf_phdr *elf_phdata;
1175 	struct elf_phdr *eppnt;
1176 	unsigned long elf_bss, bss, len;
1177 	int retval, error, i, j;
1178 	struct elfhdr elf_ex;
1179 	loff_t pos = 0;
1180 
1181 	error = -ENOEXEC;
1182 	retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
1183 	if (retval != sizeof(elf_ex))
1184 		goto out;
1185 
1186 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1187 		goto out;
1188 
1189 	/* First of all, some simple consistency checks */
1190 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1191 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1192 		goto out;
1193 
1194 	/* Now read in all of the header information */
1195 
1196 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1197 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1198 
1199 	error = -ENOMEM;
1200 	elf_phdata = kmalloc(j, GFP_KERNEL);
1201 	if (!elf_phdata)
1202 		goto out;
1203 
1204 	eppnt = elf_phdata;
1205 	error = -ENOEXEC;
1206 	pos =  elf_ex.e_phoff;
1207 	retval = kernel_read(file, eppnt, j, &pos);
1208 	if (retval != j)
1209 		goto out_free_ph;
1210 
1211 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1212 		if ((eppnt + i)->p_type == PT_LOAD)
1213 			j++;
1214 	if (j != 1)
1215 		goto out_free_ph;
1216 
1217 	while (eppnt->p_type != PT_LOAD)
1218 		eppnt++;
1219 
1220 	/* Now use mmap to map the library into memory. */
1221 	error = vm_mmap(file,
1222 			ELF_PAGESTART(eppnt->p_vaddr),
1223 			(eppnt->p_filesz +
1224 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1225 			PROT_READ | PROT_WRITE | PROT_EXEC,
1226 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1227 			(eppnt->p_offset -
1228 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1229 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1230 		goto out_free_ph;
1231 
1232 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1233 	if (padzero(elf_bss)) {
1234 		error = -EFAULT;
1235 		goto out_free_ph;
1236 	}
1237 
1238 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1239 			    ELF_MIN_ALIGN - 1);
1240 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1241 	if (bss > len) {
1242 		error = vm_brk(len, bss - len);
1243 		if (error)
1244 			goto out_free_ph;
1245 	}
1246 	error = 0;
1247 
1248 out_free_ph:
1249 	kfree(elf_phdata);
1250 out:
1251 	return error;
1252 }
1253 #endif /* #ifdef CONFIG_USELIB */
1254 
1255 #ifdef CONFIG_ELF_CORE
1256 /*
1257  * ELF core dumper
1258  *
1259  * Modelled on fs/exec.c:aout_core_dump()
1260  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1261  */
1262 
1263 /*
1264  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1265  * that are useful for post-mortem analysis are included in every core dump.
1266  * In that way we ensure that the core dump is fully interpretable later
1267  * without matching up the same kernel and hardware config to see what PC values
1268  * meant. These special mappings include - vDSO, vsyscall, and other
1269  * architecture specific mappings
1270  */
1271 static bool always_dump_vma(struct vm_area_struct *vma)
1272 {
1273 	/* Any vsyscall mappings? */
1274 	if (vma == get_gate_vma(vma->vm_mm))
1275 		return true;
1276 
1277 	/*
1278 	 * Assume that all vmas with a .name op should always be dumped.
1279 	 * If this changes, a new vm_ops field can easily be added.
1280 	 */
1281 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1282 		return true;
1283 
1284 	/*
1285 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1286 	 * such as vDSO sections.
1287 	 */
1288 	if (arch_vma_name(vma))
1289 		return true;
1290 
1291 	return false;
1292 }
1293 
1294 /*
1295  * Decide what to dump of a segment, part, all or none.
1296  */
1297 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1298 				   unsigned long mm_flags)
1299 {
1300 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1301 
1302 	/* always dump the vdso and vsyscall sections */
1303 	if (always_dump_vma(vma))
1304 		goto whole;
1305 
1306 	if (vma->vm_flags & VM_DONTDUMP)
1307 		return 0;
1308 
1309 	/* support for DAX */
1310 	if (vma_is_dax(vma)) {
1311 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1312 			goto whole;
1313 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1314 			goto whole;
1315 		return 0;
1316 	}
1317 
1318 	/* Hugetlb memory check */
1319 	if (vma->vm_flags & VM_HUGETLB) {
1320 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1321 			goto whole;
1322 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1323 			goto whole;
1324 		return 0;
1325 	}
1326 
1327 	/* Do not dump I/O mapped devices or special mappings */
1328 	if (vma->vm_flags & VM_IO)
1329 		return 0;
1330 
1331 	/* By default, dump shared memory if mapped from an anonymous file. */
1332 	if (vma->vm_flags & VM_SHARED) {
1333 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1334 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1335 			goto whole;
1336 		return 0;
1337 	}
1338 
1339 	/* Dump segments that have been written to.  */
1340 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1341 		goto whole;
1342 	if (vma->vm_file == NULL)
1343 		return 0;
1344 
1345 	if (FILTER(MAPPED_PRIVATE))
1346 		goto whole;
1347 
1348 	/*
1349 	 * If this looks like the beginning of a DSO or executable mapping,
1350 	 * check for an ELF header.  If we find one, dump the first page to
1351 	 * aid in determining what was mapped here.
1352 	 */
1353 	if (FILTER(ELF_HEADERS) &&
1354 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1355 		u32 __user *header = (u32 __user *) vma->vm_start;
1356 		u32 word;
1357 		mm_segment_t fs = get_fs();
1358 		/*
1359 		 * Doing it this way gets the constant folded by GCC.
1360 		 */
1361 		union {
1362 			u32 cmp;
1363 			char elfmag[SELFMAG];
1364 		} magic;
1365 		BUILD_BUG_ON(SELFMAG != sizeof word);
1366 		magic.elfmag[EI_MAG0] = ELFMAG0;
1367 		magic.elfmag[EI_MAG1] = ELFMAG1;
1368 		magic.elfmag[EI_MAG2] = ELFMAG2;
1369 		magic.elfmag[EI_MAG3] = ELFMAG3;
1370 		/*
1371 		 * Switch to the user "segment" for get_user(),
1372 		 * then put back what elf_core_dump() had in place.
1373 		 */
1374 		set_fs(USER_DS);
1375 		if (unlikely(get_user(word, header)))
1376 			word = 0;
1377 		set_fs(fs);
1378 		if (word == magic.cmp)
1379 			return PAGE_SIZE;
1380 	}
1381 
1382 #undef	FILTER
1383 
1384 	return 0;
1385 
1386 whole:
1387 	return vma->vm_end - vma->vm_start;
1388 }
1389 
1390 /* An ELF note in memory */
1391 struct memelfnote
1392 {
1393 	const char *name;
1394 	int type;
1395 	unsigned int datasz;
1396 	void *data;
1397 };
1398 
1399 static int notesize(struct memelfnote *en)
1400 {
1401 	int sz;
1402 
1403 	sz = sizeof(struct elf_note);
1404 	sz += roundup(strlen(en->name) + 1, 4);
1405 	sz += roundup(en->datasz, 4);
1406 
1407 	return sz;
1408 }
1409 
1410 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1411 {
1412 	struct elf_note en;
1413 	en.n_namesz = strlen(men->name) + 1;
1414 	en.n_descsz = men->datasz;
1415 	en.n_type = men->type;
1416 
1417 	return dump_emit(cprm, &en, sizeof(en)) &&
1418 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1419 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1420 }
1421 
1422 static void fill_elf_header(struct elfhdr *elf, int segs,
1423 			    u16 machine, u32 flags)
1424 {
1425 	memset(elf, 0, sizeof(*elf));
1426 
1427 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1428 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1429 	elf->e_ident[EI_DATA] = ELF_DATA;
1430 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1431 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1432 
1433 	elf->e_type = ET_CORE;
1434 	elf->e_machine = machine;
1435 	elf->e_version = EV_CURRENT;
1436 	elf->e_phoff = sizeof(struct elfhdr);
1437 	elf->e_flags = flags;
1438 	elf->e_ehsize = sizeof(struct elfhdr);
1439 	elf->e_phentsize = sizeof(struct elf_phdr);
1440 	elf->e_phnum = segs;
1441 
1442 	return;
1443 }
1444 
1445 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1446 {
1447 	phdr->p_type = PT_NOTE;
1448 	phdr->p_offset = offset;
1449 	phdr->p_vaddr = 0;
1450 	phdr->p_paddr = 0;
1451 	phdr->p_filesz = sz;
1452 	phdr->p_memsz = 0;
1453 	phdr->p_flags = 0;
1454 	phdr->p_align = 0;
1455 	return;
1456 }
1457 
1458 static void fill_note(struct memelfnote *note, const char *name, int type,
1459 		unsigned int sz, void *data)
1460 {
1461 	note->name = name;
1462 	note->type = type;
1463 	note->datasz = sz;
1464 	note->data = data;
1465 	return;
1466 }
1467 
1468 /*
1469  * fill up all the fields in prstatus from the given task struct, except
1470  * registers which need to be filled up separately.
1471  */
1472 static void fill_prstatus(struct elf_prstatus *prstatus,
1473 		struct task_struct *p, long signr)
1474 {
1475 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1476 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1477 	prstatus->pr_sighold = p->blocked.sig[0];
1478 	rcu_read_lock();
1479 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1480 	rcu_read_unlock();
1481 	prstatus->pr_pid = task_pid_vnr(p);
1482 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1483 	prstatus->pr_sid = task_session_vnr(p);
1484 	if (thread_group_leader(p)) {
1485 		struct task_cputime cputime;
1486 
1487 		/*
1488 		 * This is the record for the group leader.  It shows the
1489 		 * group-wide total, not its individual thread total.
1490 		 */
1491 		thread_group_cputime(p, &cputime);
1492 		prstatus->pr_utime = ns_to_timeval(cputime.utime);
1493 		prstatus->pr_stime = ns_to_timeval(cputime.stime);
1494 	} else {
1495 		u64 utime, stime;
1496 
1497 		task_cputime(p, &utime, &stime);
1498 		prstatus->pr_utime = ns_to_timeval(utime);
1499 		prstatus->pr_stime = ns_to_timeval(stime);
1500 	}
1501 
1502 	prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1503 	prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1504 }
1505 
1506 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1507 		       struct mm_struct *mm)
1508 {
1509 	const struct cred *cred;
1510 	unsigned int i, len;
1511 
1512 	/* first copy the parameters from user space */
1513 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1514 
1515 	len = mm->arg_end - mm->arg_start;
1516 	if (len >= ELF_PRARGSZ)
1517 		len = ELF_PRARGSZ-1;
1518 	if (copy_from_user(&psinfo->pr_psargs,
1519 		           (const char __user *)mm->arg_start, len))
1520 		return -EFAULT;
1521 	for(i = 0; i < len; i++)
1522 		if (psinfo->pr_psargs[i] == 0)
1523 			psinfo->pr_psargs[i] = ' ';
1524 	psinfo->pr_psargs[len] = 0;
1525 
1526 	rcu_read_lock();
1527 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1528 	rcu_read_unlock();
1529 	psinfo->pr_pid = task_pid_vnr(p);
1530 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1531 	psinfo->pr_sid = task_session_vnr(p);
1532 
1533 	i = p->state ? ffz(~p->state) + 1 : 0;
1534 	psinfo->pr_state = i;
1535 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1536 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1537 	psinfo->pr_nice = task_nice(p);
1538 	psinfo->pr_flag = p->flags;
1539 	rcu_read_lock();
1540 	cred = __task_cred(p);
1541 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1542 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1543 	rcu_read_unlock();
1544 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1545 
1546 	return 0;
1547 }
1548 
1549 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1550 {
1551 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1552 	int i = 0;
1553 	do
1554 		i += 2;
1555 	while (auxv[i - 2] != AT_NULL);
1556 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1557 }
1558 
1559 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1560 		const siginfo_t *siginfo)
1561 {
1562 	mm_segment_t old_fs = get_fs();
1563 	set_fs(KERNEL_DS);
1564 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1565 	set_fs(old_fs);
1566 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1567 }
1568 
1569 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1570 /*
1571  * Format of NT_FILE note:
1572  *
1573  * long count     -- how many files are mapped
1574  * long page_size -- units for file_ofs
1575  * array of [COUNT] elements of
1576  *   long start
1577  *   long end
1578  *   long file_ofs
1579  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1580  */
1581 static int fill_files_note(struct memelfnote *note)
1582 {
1583 	struct vm_area_struct *vma;
1584 	unsigned count, size, names_ofs, remaining, n;
1585 	user_long_t *data;
1586 	user_long_t *start_end_ofs;
1587 	char *name_base, *name_curpos;
1588 
1589 	/* *Estimated* file count and total data size needed */
1590 	count = current->mm->map_count;
1591 	size = count * 64;
1592 
1593 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1594  alloc:
1595 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1596 		return -EINVAL;
1597 	size = round_up(size, PAGE_SIZE);
1598 	data = vmalloc(size);
1599 	if (!data)
1600 		return -ENOMEM;
1601 
1602 	start_end_ofs = data + 2;
1603 	name_base = name_curpos = ((char *)data) + names_ofs;
1604 	remaining = size - names_ofs;
1605 	count = 0;
1606 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1607 		struct file *file;
1608 		const char *filename;
1609 
1610 		file = vma->vm_file;
1611 		if (!file)
1612 			continue;
1613 		filename = file_path(file, name_curpos, remaining);
1614 		if (IS_ERR(filename)) {
1615 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1616 				vfree(data);
1617 				size = size * 5 / 4;
1618 				goto alloc;
1619 			}
1620 			continue;
1621 		}
1622 
1623 		/* file_path() fills at the end, move name down */
1624 		/* n = strlen(filename) + 1: */
1625 		n = (name_curpos + remaining) - filename;
1626 		remaining = filename - name_curpos;
1627 		memmove(name_curpos, filename, n);
1628 		name_curpos += n;
1629 
1630 		*start_end_ofs++ = vma->vm_start;
1631 		*start_end_ofs++ = vma->vm_end;
1632 		*start_end_ofs++ = vma->vm_pgoff;
1633 		count++;
1634 	}
1635 
1636 	/* Now we know exact count of files, can store it */
1637 	data[0] = count;
1638 	data[1] = PAGE_SIZE;
1639 	/*
1640 	 * Count usually is less than current->mm->map_count,
1641 	 * we need to move filenames down.
1642 	 */
1643 	n = current->mm->map_count - count;
1644 	if (n != 0) {
1645 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1646 		memmove(name_base - shift_bytes, name_base,
1647 			name_curpos - name_base);
1648 		name_curpos -= shift_bytes;
1649 	}
1650 
1651 	size = name_curpos - (char *)data;
1652 	fill_note(note, "CORE", NT_FILE, size, data);
1653 	return 0;
1654 }
1655 
1656 #ifdef CORE_DUMP_USE_REGSET
1657 #include <linux/regset.h>
1658 
1659 struct elf_thread_core_info {
1660 	struct elf_thread_core_info *next;
1661 	struct task_struct *task;
1662 	struct elf_prstatus prstatus;
1663 	struct memelfnote notes[0];
1664 };
1665 
1666 struct elf_note_info {
1667 	struct elf_thread_core_info *thread;
1668 	struct memelfnote psinfo;
1669 	struct memelfnote signote;
1670 	struct memelfnote auxv;
1671 	struct memelfnote files;
1672 	user_siginfo_t csigdata;
1673 	size_t size;
1674 	int thread_notes;
1675 };
1676 
1677 /*
1678  * When a regset has a writeback hook, we call it on each thread before
1679  * dumping user memory.  On register window machines, this makes sure the
1680  * user memory backing the register data is up to date before we read it.
1681  */
1682 static void do_thread_regset_writeback(struct task_struct *task,
1683 				       const struct user_regset *regset)
1684 {
1685 	if (regset->writeback)
1686 		regset->writeback(task, regset, 1);
1687 }
1688 
1689 #ifndef PRSTATUS_SIZE
1690 #define PRSTATUS_SIZE(S, R) sizeof(S)
1691 #endif
1692 
1693 #ifndef SET_PR_FPVALID
1694 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1695 #endif
1696 
1697 static int fill_thread_core_info(struct elf_thread_core_info *t,
1698 				 const struct user_regset_view *view,
1699 				 long signr, size_t *total)
1700 {
1701 	unsigned int i;
1702 	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1703 
1704 	/*
1705 	 * NT_PRSTATUS is the one special case, because the regset data
1706 	 * goes into the pr_reg field inside the note contents, rather
1707 	 * than being the whole note contents.  We fill the reset in here.
1708 	 * We assume that regset 0 is NT_PRSTATUS.
1709 	 */
1710 	fill_prstatus(&t->prstatus, t->task, signr);
1711 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1712 				    &t->prstatus.pr_reg, NULL);
1713 
1714 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1715 		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1716 	*total += notesize(&t->notes[0]);
1717 
1718 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1719 
1720 	/*
1721 	 * Each other regset might generate a note too.  For each regset
1722 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1723 	 * all zero and we'll know to skip writing it later.
1724 	 */
1725 	for (i = 1; i < view->n; ++i) {
1726 		const struct user_regset *regset = &view->regsets[i];
1727 		do_thread_regset_writeback(t->task, regset);
1728 		if (regset->core_note_type && regset->get &&
1729 		    (!regset->active || regset->active(t->task, regset))) {
1730 			int ret;
1731 			size_t size = regset->n * regset->size;
1732 			void *data = kmalloc(size, GFP_KERNEL);
1733 			if (unlikely(!data))
1734 				return 0;
1735 			ret = regset->get(t->task, regset,
1736 					  0, size, data, NULL);
1737 			if (unlikely(ret))
1738 				kfree(data);
1739 			else {
1740 				if (regset->core_note_type != NT_PRFPREG)
1741 					fill_note(&t->notes[i], "LINUX",
1742 						  regset->core_note_type,
1743 						  size, data);
1744 				else {
1745 					SET_PR_FPVALID(&t->prstatus,
1746 							1, regset_size);
1747 					fill_note(&t->notes[i], "CORE",
1748 						  NT_PRFPREG, size, data);
1749 				}
1750 				*total += notesize(&t->notes[i]);
1751 			}
1752 		}
1753 	}
1754 
1755 	return 1;
1756 }
1757 
1758 static int fill_note_info(struct elfhdr *elf, int phdrs,
1759 			  struct elf_note_info *info,
1760 			  const siginfo_t *siginfo, struct pt_regs *regs)
1761 {
1762 	struct task_struct *dump_task = current;
1763 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1764 	struct elf_thread_core_info *t;
1765 	struct elf_prpsinfo *psinfo;
1766 	struct core_thread *ct;
1767 	unsigned int i;
1768 
1769 	info->size = 0;
1770 	info->thread = NULL;
1771 
1772 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1773 	if (psinfo == NULL) {
1774 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1775 		return 0;
1776 	}
1777 
1778 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1779 
1780 	/*
1781 	 * Figure out how many notes we're going to need for each thread.
1782 	 */
1783 	info->thread_notes = 0;
1784 	for (i = 0; i < view->n; ++i)
1785 		if (view->regsets[i].core_note_type != 0)
1786 			++info->thread_notes;
1787 
1788 	/*
1789 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1790 	 * since it is our one special case.
1791 	 */
1792 	if (unlikely(info->thread_notes == 0) ||
1793 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1794 		WARN_ON(1);
1795 		return 0;
1796 	}
1797 
1798 	/*
1799 	 * Initialize the ELF file header.
1800 	 */
1801 	fill_elf_header(elf, phdrs,
1802 			view->e_machine, view->e_flags);
1803 
1804 	/*
1805 	 * Allocate a structure for each thread.
1806 	 */
1807 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1808 		t = kzalloc(offsetof(struct elf_thread_core_info,
1809 				     notes[info->thread_notes]),
1810 			    GFP_KERNEL);
1811 		if (unlikely(!t))
1812 			return 0;
1813 
1814 		t->task = ct->task;
1815 		if (ct->task == dump_task || !info->thread) {
1816 			t->next = info->thread;
1817 			info->thread = t;
1818 		} else {
1819 			/*
1820 			 * Make sure to keep the original task at
1821 			 * the head of the list.
1822 			 */
1823 			t->next = info->thread->next;
1824 			info->thread->next = t;
1825 		}
1826 	}
1827 
1828 	/*
1829 	 * Now fill in each thread's information.
1830 	 */
1831 	for (t = info->thread; t != NULL; t = t->next)
1832 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1833 			return 0;
1834 
1835 	/*
1836 	 * Fill in the two process-wide notes.
1837 	 */
1838 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1839 	info->size += notesize(&info->psinfo);
1840 
1841 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1842 	info->size += notesize(&info->signote);
1843 
1844 	fill_auxv_note(&info->auxv, current->mm);
1845 	info->size += notesize(&info->auxv);
1846 
1847 	if (fill_files_note(&info->files) == 0)
1848 		info->size += notesize(&info->files);
1849 
1850 	return 1;
1851 }
1852 
1853 static size_t get_note_info_size(struct elf_note_info *info)
1854 {
1855 	return info->size;
1856 }
1857 
1858 /*
1859  * Write all the notes for each thread.  When writing the first thread, the
1860  * process-wide notes are interleaved after the first thread-specific note.
1861  */
1862 static int write_note_info(struct elf_note_info *info,
1863 			   struct coredump_params *cprm)
1864 {
1865 	bool first = true;
1866 	struct elf_thread_core_info *t = info->thread;
1867 
1868 	do {
1869 		int i;
1870 
1871 		if (!writenote(&t->notes[0], cprm))
1872 			return 0;
1873 
1874 		if (first && !writenote(&info->psinfo, cprm))
1875 			return 0;
1876 		if (first && !writenote(&info->signote, cprm))
1877 			return 0;
1878 		if (first && !writenote(&info->auxv, cprm))
1879 			return 0;
1880 		if (first && info->files.data &&
1881 				!writenote(&info->files, cprm))
1882 			return 0;
1883 
1884 		for (i = 1; i < info->thread_notes; ++i)
1885 			if (t->notes[i].data &&
1886 			    !writenote(&t->notes[i], cprm))
1887 				return 0;
1888 
1889 		first = false;
1890 		t = t->next;
1891 	} while (t);
1892 
1893 	return 1;
1894 }
1895 
1896 static void free_note_info(struct elf_note_info *info)
1897 {
1898 	struct elf_thread_core_info *threads = info->thread;
1899 	while (threads) {
1900 		unsigned int i;
1901 		struct elf_thread_core_info *t = threads;
1902 		threads = t->next;
1903 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1904 		for (i = 1; i < info->thread_notes; ++i)
1905 			kfree(t->notes[i].data);
1906 		kfree(t);
1907 	}
1908 	kfree(info->psinfo.data);
1909 	vfree(info->files.data);
1910 }
1911 
1912 #else
1913 
1914 /* Here is the structure in which status of each thread is captured. */
1915 struct elf_thread_status
1916 {
1917 	struct list_head list;
1918 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1919 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1920 	struct task_struct *thread;
1921 #ifdef ELF_CORE_COPY_XFPREGS
1922 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1923 #endif
1924 	struct memelfnote notes[3];
1925 	int num_notes;
1926 };
1927 
1928 /*
1929  * In order to add the specific thread information for the elf file format,
1930  * we need to keep a linked list of every threads pr_status and then create
1931  * a single section for them in the final core file.
1932  */
1933 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1934 {
1935 	int sz = 0;
1936 	struct task_struct *p = t->thread;
1937 	t->num_notes = 0;
1938 
1939 	fill_prstatus(&t->prstatus, p, signr);
1940 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1941 
1942 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1943 		  &(t->prstatus));
1944 	t->num_notes++;
1945 	sz += notesize(&t->notes[0]);
1946 
1947 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1948 								&t->fpu))) {
1949 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1950 			  &(t->fpu));
1951 		t->num_notes++;
1952 		sz += notesize(&t->notes[1]);
1953 	}
1954 
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1957 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1958 			  sizeof(t->xfpu), &t->xfpu);
1959 		t->num_notes++;
1960 		sz += notesize(&t->notes[2]);
1961 	}
1962 #endif
1963 	return sz;
1964 }
1965 
1966 struct elf_note_info {
1967 	struct memelfnote *notes;
1968 	struct memelfnote *notes_files;
1969 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1970 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1971 	struct list_head thread_list;
1972 	elf_fpregset_t *fpu;
1973 #ifdef ELF_CORE_COPY_XFPREGS
1974 	elf_fpxregset_t *xfpu;
1975 #endif
1976 	user_siginfo_t csigdata;
1977 	int thread_status_size;
1978 	int numnote;
1979 };
1980 
1981 static int elf_note_info_init(struct elf_note_info *info)
1982 {
1983 	memset(info, 0, sizeof(*info));
1984 	INIT_LIST_HEAD(&info->thread_list);
1985 
1986 	/* Allocate space for ELF notes */
1987 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1988 	if (!info->notes)
1989 		return 0;
1990 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1991 	if (!info->psinfo)
1992 		return 0;
1993 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1994 	if (!info->prstatus)
1995 		return 0;
1996 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1997 	if (!info->fpu)
1998 		return 0;
1999 #ifdef ELF_CORE_COPY_XFPREGS
2000 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2001 	if (!info->xfpu)
2002 		return 0;
2003 #endif
2004 	return 1;
2005 }
2006 
2007 static int fill_note_info(struct elfhdr *elf, int phdrs,
2008 			  struct elf_note_info *info,
2009 			  const siginfo_t *siginfo, struct pt_regs *regs)
2010 {
2011 	struct list_head *t;
2012 	struct core_thread *ct;
2013 	struct elf_thread_status *ets;
2014 
2015 	if (!elf_note_info_init(info))
2016 		return 0;
2017 
2018 	for (ct = current->mm->core_state->dumper.next;
2019 					ct; ct = ct->next) {
2020 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2021 		if (!ets)
2022 			return 0;
2023 
2024 		ets->thread = ct->task;
2025 		list_add(&ets->list, &info->thread_list);
2026 	}
2027 
2028 	list_for_each(t, &info->thread_list) {
2029 		int sz;
2030 
2031 		ets = list_entry(t, struct elf_thread_status, list);
2032 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2033 		info->thread_status_size += sz;
2034 	}
2035 	/* now collect the dump for the current */
2036 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2037 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2038 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2039 
2040 	/* Set up header */
2041 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2042 
2043 	/*
2044 	 * Set up the notes in similar form to SVR4 core dumps made
2045 	 * with info from their /proc.
2046 	 */
2047 
2048 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2049 		  sizeof(*info->prstatus), info->prstatus);
2050 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2051 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2052 		  sizeof(*info->psinfo), info->psinfo);
2053 
2054 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2055 	fill_auxv_note(info->notes + 3, current->mm);
2056 	info->numnote = 4;
2057 
2058 	if (fill_files_note(info->notes + info->numnote) == 0) {
2059 		info->notes_files = info->notes + info->numnote;
2060 		info->numnote++;
2061 	}
2062 
2063 	/* Try to dump the FPU. */
2064 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2065 							       info->fpu);
2066 	if (info->prstatus->pr_fpvalid)
2067 		fill_note(info->notes + info->numnote++,
2068 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2069 #ifdef ELF_CORE_COPY_XFPREGS
2070 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2071 		fill_note(info->notes + info->numnote++,
2072 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2073 			  sizeof(*info->xfpu), info->xfpu);
2074 #endif
2075 
2076 	return 1;
2077 }
2078 
2079 static size_t get_note_info_size(struct elf_note_info *info)
2080 {
2081 	int sz = 0;
2082 	int i;
2083 
2084 	for (i = 0; i < info->numnote; i++)
2085 		sz += notesize(info->notes + i);
2086 
2087 	sz += info->thread_status_size;
2088 
2089 	return sz;
2090 }
2091 
2092 static int write_note_info(struct elf_note_info *info,
2093 			   struct coredump_params *cprm)
2094 {
2095 	int i;
2096 	struct list_head *t;
2097 
2098 	for (i = 0; i < info->numnote; i++)
2099 		if (!writenote(info->notes + i, cprm))
2100 			return 0;
2101 
2102 	/* write out the thread status notes section */
2103 	list_for_each(t, &info->thread_list) {
2104 		struct elf_thread_status *tmp =
2105 				list_entry(t, struct elf_thread_status, list);
2106 
2107 		for (i = 0; i < tmp->num_notes; i++)
2108 			if (!writenote(&tmp->notes[i], cprm))
2109 				return 0;
2110 	}
2111 
2112 	return 1;
2113 }
2114 
2115 static void free_note_info(struct elf_note_info *info)
2116 {
2117 	while (!list_empty(&info->thread_list)) {
2118 		struct list_head *tmp = info->thread_list.next;
2119 		list_del(tmp);
2120 		kfree(list_entry(tmp, struct elf_thread_status, list));
2121 	}
2122 
2123 	/* Free data possibly allocated by fill_files_note(): */
2124 	if (info->notes_files)
2125 		vfree(info->notes_files->data);
2126 
2127 	kfree(info->prstatus);
2128 	kfree(info->psinfo);
2129 	kfree(info->notes);
2130 	kfree(info->fpu);
2131 #ifdef ELF_CORE_COPY_XFPREGS
2132 	kfree(info->xfpu);
2133 #endif
2134 }
2135 
2136 #endif
2137 
2138 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2139 					struct vm_area_struct *gate_vma)
2140 {
2141 	struct vm_area_struct *ret = tsk->mm->mmap;
2142 
2143 	if (ret)
2144 		return ret;
2145 	return gate_vma;
2146 }
2147 /*
2148  * Helper function for iterating across a vma list.  It ensures that the caller
2149  * will visit `gate_vma' prior to terminating the search.
2150  */
2151 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2152 					struct vm_area_struct *gate_vma)
2153 {
2154 	struct vm_area_struct *ret;
2155 
2156 	ret = this_vma->vm_next;
2157 	if (ret)
2158 		return ret;
2159 	if (this_vma == gate_vma)
2160 		return NULL;
2161 	return gate_vma;
2162 }
2163 
2164 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2165 			     elf_addr_t e_shoff, int segs)
2166 {
2167 	elf->e_shoff = e_shoff;
2168 	elf->e_shentsize = sizeof(*shdr4extnum);
2169 	elf->e_shnum = 1;
2170 	elf->e_shstrndx = SHN_UNDEF;
2171 
2172 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2173 
2174 	shdr4extnum->sh_type = SHT_NULL;
2175 	shdr4extnum->sh_size = elf->e_shnum;
2176 	shdr4extnum->sh_link = elf->e_shstrndx;
2177 	shdr4extnum->sh_info = segs;
2178 }
2179 
2180 /*
2181  * Actual dumper
2182  *
2183  * This is a two-pass process; first we find the offsets of the bits,
2184  * and then they are actually written out.  If we run out of core limit
2185  * we just truncate.
2186  */
2187 static int elf_core_dump(struct coredump_params *cprm)
2188 {
2189 	int has_dumped = 0;
2190 	mm_segment_t fs;
2191 	int segs, i;
2192 	size_t vma_data_size = 0;
2193 	struct vm_area_struct *vma, *gate_vma;
2194 	struct elfhdr *elf = NULL;
2195 	loff_t offset = 0, dataoff;
2196 	struct elf_note_info info = { };
2197 	struct elf_phdr *phdr4note = NULL;
2198 	struct elf_shdr *shdr4extnum = NULL;
2199 	Elf_Half e_phnum;
2200 	elf_addr_t e_shoff;
2201 	elf_addr_t *vma_filesz = NULL;
2202 
2203 	/*
2204 	 * We no longer stop all VM operations.
2205 	 *
2206 	 * This is because those proceses that could possibly change map_count
2207 	 * or the mmap / vma pages are now blocked in do_exit on current
2208 	 * finishing this core dump.
2209 	 *
2210 	 * Only ptrace can touch these memory addresses, but it doesn't change
2211 	 * the map_count or the pages allocated. So no possibility of crashing
2212 	 * exists while dumping the mm->vm_next areas to the core file.
2213 	 */
2214 
2215 	/* alloc memory for large data structures: too large to be on stack */
2216 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2217 	if (!elf)
2218 		goto out;
2219 	/*
2220 	 * The number of segs are recored into ELF header as 16bit value.
2221 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2222 	 */
2223 	segs = current->mm->map_count;
2224 	segs += elf_core_extra_phdrs();
2225 
2226 	gate_vma = get_gate_vma(current->mm);
2227 	if (gate_vma != NULL)
2228 		segs++;
2229 
2230 	/* for notes section */
2231 	segs++;
2232 
2233 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2234 	 * this, kernel supports extended numbering. Have a look at
2235 	 * include/linux/elf.h for further information. */
2236 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2237 
2238 	/*
2239 	 * Collect all the non-memory information about the process for the
2240 	 * notes.  This also sets up the file header.
2241 	 */
2242 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2243 		goto cleanup;
2244 
2245 	has_dumped = 1;
2246 
2247 	fs = get_fs();
2248 	set_fs(KERNEL_DS);
2249 
2250 	offset += sizeof(*elf);				/* Elf header */
2251 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2252 
2253 	/* Write notes phdr entry */
2254 	{
2255 		size_t sz = get_note_info_size(&info);
2256 
2257 		sz += elf_coredump_extra_notes_size();
2258 
2259 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2260 		if (!phdr4note)
2261 			goto end_coredump;
2262 
2263 		fill_elf_note_phdr(phdr4note, sz, offset);
2264 		offset += sz;
2265 	}
2266 
2267 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2268 
2269 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2270 		goto end_coredump;
2271 	vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2272 	if (!vma_filesz)
2273 		goto end_coredump;
2274 
2275 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2276 			vma = next_vma(vma, gate_vma)) {
2277 		unsigned long dump_size;
2278 
2279 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2280 		vma_filesz[i++] = dump_size;
2281 		vma_data_size += dump_size;
2282 	}
2283 
2284 	offset += vma_data_size;
2285 	offset += elf_core_extra_data_size();
2286 	e_shoff = offset;
2287 
2288 	if (e_phnum == PN_XNUM) {
2289 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2290 		if (!shdr4extnum)
2291 			goto end_coredump;
2292 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2293 	}
2294 
2295 	offset = dataoff;
2296 
2297 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2298 		goto end_coredump;
2299 
2300 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2301 		goto end_coredump;
2302 
2303 	/* Write program headers for segments dump */
2304 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2305 			vma = next_vma(vma, gate_vma)) {
2306 		struct elf_phdr phdr;
2307 
2308 		phdr.p_type = PT_LOAD;
2309 		phdr.p_offset = offset;
2310 		phdr.p_vaddr = vma->vm_start;
2311 		phdr.p_paddr = 0;
2312 		phdr.p_filesz = vma_filesz[i++];
2313 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2314 		offset += phdr.p_filesz;
2315 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2316 		if (vma->vm_flags & VM_WRITE)
2317 			phdr.p_flags |= PF_W;
2318 		if (vma->vm_flags & VM_EXEC)
2319 			phdr.p_flags |= PF_X;
2320 		phdr.p_align = ELF_EXEC_PAGESIZE;
2321 
2322 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2323 			goto end_coredump;
2324 	}
2325 
2326 	if (!elf_core_write_extra_phdrs(cprm, offset))
2327 		goto end_coredump;
2328 
2329  	/* write out the notes section */
2330 	if (!write_note_info(&info, cprm))
2331 		goto end_coredump;
2332 
2333 	if (elf_coredump_extra_notes_write(cprm))
2334 		goto end_coredump;
2335 
2336 	/* Align to page */
2337 	if (!dump_skip(cprm, dataoff - cprm->pos))
2338 		goto end_coredump;
2339 
2340 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2341 			vma = next_vma(vma, gate_vma)) {
2342 		unsigned long addr;
2343 		unsigned long end;
2344 
2345 		end = vma->vm_start + vma_filesz[i++];
2346 
2347 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2348 			struct page *page;
2349 			int stop;
2350 
2351 			page = get_dump_page(addr);
2352 			if (page) {
2353 				void *kaddr = kmap(page);
2354 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2355 				kunmap(page);
2356 				put_page(page);
2357 			} else
2358 				stop = !dump_skip(cprm, PAGE_SIZE);
2359 			if (stop)
2360 				goto end_coredump;
2361 		}
2362 	}
2363 	dump_truncate(cprm);
2364 
2365 	if (!elf_core_write_extra_data(cprm))
2366 		goto end_coredump;
2367 
2368 	if (e_phnum == PN_XNUM) {
2369 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2370 			goto end_coredump;
2371 	}
2372 
2373 end_coredump:
2374 	set_fs(fs);
2375 
2376 cleanup:
2377 	free_note_info(&info);
2378 	kfree(shdr4extnum);
2379 	vfree(vma_filesz);
2380 	kfree(phdr4note);
2381 	kfree(elf);
2382 out:
2383 	return has_dumped;
2384 }
2385 
2386 #endif		/* CONFIG_ELF_CORE */
2387 
2388 static int __init init_elf_binfmt(void)
2389 {
2390 	register_binfmt(&elf_format);
2391 	return 0;
2392 }
2393 
2394 static void __exit exit_elf_binfmt(void)
2395 {
2396 	/* Remove the COFF and ELF loaders. */
2397 	unregister_binfmt(&elf_format);
2398 }
2399 
2400 core_initcall(init_elf_binfmt);
2401 module_exit(exit_elf_binfmt);
2402 MODULE_LICENSE("GPL");
2403