xref: /openbmc/linux/fs/binfmt_elf.c (revision 6a613ac6)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42 
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49 
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 				int, int, unsigned long);
53 
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59 
60 /*
61  * If we don't support core dumping, then supply a NULL so we
62  * don't even try.
63  */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump	NULL
68 #endif
69 
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN	PAGE_SIZE
74 #endif
75 
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS	0
78 #endif
79 
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 
84 static struct linux_binfmt elf_format = {
85 	.module		= THIS_MODULE,
86 	.load_binary	= load_elf_binary,
87 	.load_shlib	= load_elf_library,
88 	.core_dump	= elf_core_dump,
89 	.min_coredump	= ELF_EXEC_PAGESIZE,
90 };
91 
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 
94 static int set_brk(unsigned long start, unsigned long end)
95 {
96 	start = ELF_PAGEALIGN(start);
97 	end = ELF_PAGEALIGN(end);
98 	if (end > start) {
99 		unsigned long addr;
100 		addr = vm_brk(start, end - start);
101 		if (BAD_ADDR(addr))
102 			return addr;
103 	}
104 	current->mm->start_brk = current->mm->brk = end;
105 	return 0;
106 }
107 
108 /* We need to explicitly zero any fractional pages
109    after the data section (i.e. bss).  This would
110    contain the junk from the file that should not
111    be in memory
112  */
113 static int padzero(unsigned long elf_bss)
114 {
115 	unsigned long nbyte;
116 
117 	nbyte = ELF_PAGEOFFSET(elf_bss);
118 	if (nbyte) {
119 		nbyte = ELF_MIN_ALIGN - nbyte;
120 		if (clear_user((void __user *) elf_bss, nbyte))
121 			return -EFAULT;
122 	}
123 	return 0;
124 }
125 
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 	old_sp; })
134 #else
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 	(((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #endif
140 
141 #ifndef ELF_BASE_PLATFORM
142 /*
143  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145  * will be copied to the user stack in the same manner as AT_PLATFORM.
146  */
147 #define ELF_BASE_PLATFORM NULL
148 #endif
149 
150 static int
151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 		unsigned long load_addr, unsigned long interp_load_addr)
153 {
154 	unsigned long p = bprm->p;
155 	int argc = bprm->argc;
156 	int envc = bprm->envc;
157 	elf_addr_t __user *argv;
158 	elf_addr_t __user *envp;
159 	elf_addr_t __user *sp;
160 	elf_addr_t __user *u_platform;
161 	elf_addr_t __user *u_base_platform;
162 	elf_addr_t __user *u_rand_bytes;
163 	const char *k_platform = ELF_PLATFORM;
164 	const char *k_base_platform = ELF_BASE_PLATFORM;
165 	unsigned char k_rand_bytes[16];
166 	int items;
167 	elf_addr_t *elf_info;
168 	int ei_index = 0;
169 	const struct cred *cred = current_cred();
170 	struct vm_area_struct *vma;
171 
172 	/*
173 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 	 * evictions by the processes running on the same package. One
175 	 * thing we can do is to shuffle the initial stack for them.
176 	 */
177 
178 	p = arch_align_stack(p);
179 
180 	/*
181 	 * If this architecture has a platform capability string, copy it
182 	 * to userspace.  In some cases (Sparc), this info is impossible
183 	 * for userspace to get any other way, in others (i386) it is
184 	 * merely difficult.
185 	 */
186 	u_platform = NULL;
187 	if (k_platform) {
188 		size_t len = strlen(k_platform) + 1;
189 
190 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 		if (__copy_to_user(u_platform, k_platform, len))
192 			return -EFAULT;
193 	}
194 
195 	/*
196 	 * If this architecture has a "base" platform capability
197 	 * string, copy it to userspace.
198 	 */
199 	u_base_platform = NULL;
200 	if (k_base_platform) {
201 		size_t len = strlen(k_base_platform) + 1;
202 
203 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 		if (__copy_to_user(u_base_platform, k_base_platform, len))
205 			return -EFAULT;
206 	}
207 
208 	/*
209 	 * Generate 16 random bytes for userspace PRNG seeding.
210 	 */
211 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 	u_rand_bytes = (elf_addr_t __user *)
213 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
214 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 		return -EFAULT;
216 
217 	/* Create the ELF interpreter info */
218 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
221 	do { \
222 		elf_info[ei_index++] = id; \
223 		elf_info[ei_index++] = val; \
224 	} while (0)
225 
226 #ifdef ARCH_DLINFO
227 	/*
228 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 	 * AUXV.
230 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 	 * ARCH_DLINFO changes
232 	 */
233 	ARCH_DLINFO;
234 #endif
235 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 	NEW_AUX_ENT(AT_FLAGS, 0);
243 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 #ifdef ELF_HWCAP2
251 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 #endif
253 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 	if (k_platform) {
255 		NEW_AUX_ENT(AT_PLATFORM,
256 			    (elf_addr_t)(unsigned long)u_platform);
257 	}
258 	if (k_base_platform) {
259 		NEW_AUX_ENT(AT_BASE_PLATFORM,
260 			    (elf_addr_t)(unsigned long)u_base_platform);
261 	}
262 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 	}
265 #undef NEW_AUX_ENT
266 	/* AT_NULL is zero; clear the rest too */
267 	memset(&elf_info[ei_index], 0,
268 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269 
270 	/* And advance past the AT_NULL entry.  */
271 	ei_index += 2;
272 
273 	sp = STACK_ADD(p, ei_index);
274 
275 	items = (argc + 1) + (envc + 1) + 1;
276 	bprm->p = STACK_ROUND(sp, items);
277 
278 	/* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 #else
283 	sp = (elf_addr_t __user *)bprm->p;
284 #endif
285 
286 
287 	/*
288 	 * Grow the stack manually; some architectures have a limit on how
289 	 * far ahead a user-space access may be in order to grow the stack.
290 	 */
291 	vma = find_extend_vma(current->mm, bprm->p);
292 	if (!vma)
293 		return -EFAULT;
294 
295 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 	if (__put_user(argc, sp++))
297 		return -EFAULT;
298 	argv = sp;
299 	envp = argv + argc + 1;
300 
301 	/* Populate argv and envp */
302 	p = current->mm->arg_end = current->mm->arg_start;
303 	while (argc-- > 0) {
304 		size_t len;
305 		if (__put_user((elf_addr_t)p, argv++))
306 			return -EFAULT;
307 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 		if (!len || len > MAX_ARG_STRLEN)
309 			return -EINVAL;
310 		p += len;
311 	}
312 	if (__put_user(0, argv))
313 		return -EFAULT;
314 	current->mm->arg_end = current->mm->env_start = p;
315 	while (envc-- > 0) {
316 		size_t len;
317 		if (__put_user((elf_addr_t)p, envp++))
318 			return -EFAULT;
319 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 		if (!len || len > MAX_ARG_STRLEN)
321 			return -EINVAL;
322 		p += len;
323 	}
324 	if (__put_user(0, envp))
325 		return -EFAULT;
326 	current->mm->env_end = p;
327 
328 	/* Put the elf_info on the stack in the right place.  */
329 	sp = (elf_addr_t __user *)envp + 1;
330 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331 		return -EFAULT;
332 	return 0;
333 }
334 
335 #ifndef elf_map
336 
337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 		struct elf_phdr *eppnt, int prot, int type,
339 		unsigned long total_size)
340 {
341 	unsigned long map_addr;
342 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 	addr = ELF_PAGESTART(addr);
345 	size = ELF_PAGEALIGN(size);
346 
347 	/* mmap() will return -EINVAL if given a zero size, but a
348 	 * segment with zero filesize is perfectly valid */
349 	if (!size)
350 		return addr;
351 
352 	/*
353 	* total_size is the size of the ELF (interpreter) image.
354 	* The _first_ mmap needs to know the full size, otherwise
355 	* randomization might put this image into an overlapping
356 	* position with the ELF binary image. (since size < total_size)
357 	* So we first map the 'big' image - and unmap the remainder at
358 	* the end. (which unmap is needed for ELF images with holes.)
359 	*/
360 	if (total_size) {
361 		total_size = ELF_PAGEALIGN(total_size);
362 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 		if (!BAD_ADDR(map_addr))
364 			vm_munmap(map_addr+size, total_size-size);
365 	} else
366 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
367 
368 	return(map_addr);
369 }
370 
371 #endif /* !elf_map */
372 
373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 {
375 	int i, first_idx = -1, last_idx = -1;
376 
377 	for (i = 0; i < nr; i++) {
378 		if (cmds[i].p_type == PT_LOAD) {
379 			last_idx = i;
380 			if (first_idx == -1)
381 				first_idx = i;
382 		}
383 	}
384 	if (first_idx == -1)
385 		return 0;
386 
387 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
389 }
390 
391 /**
392  * load_elf_phdrs() - load ELF program headers
393  * @elf_ex:   ELF header of the binary whose program headers should be loaded
394  * @elf_file: the opened ELF binary file
395  *
396  * Loads ELF program headers from the binary file elf_file, which has the ELF
397  * header pointed to by elf_ex, into a newly allocated array. The caller is
398  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399  */
400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 				       struct file *elf_file)
402 {
403 	struct elf_phdr *elf_phdata = NULL;
404 	int retval, size, err = -1;
405 
406 	/*
407 	 * If the size of this structure has changed, then punt, since
408 	 * we will be doing the wrong thing.
409 	 */
410 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411 		goto out;
412 
413 	/* Sanity check the number of program headers... */
414 	if (elf_ex->e_phnum < 1 ||
415 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416 		goto out;
417 
418 	/* ...and their total size. */
419 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 	if (size > ELF_MIN_ALIGN)
421 		goto out;
422 
423 	elf_phdata = kmalloc(size, GFP_KERNEL);
424 	if (!elf_phdata)
425 		goto out;
426 
427 	/* Read in the program headers */
428 	retval = kernel_read(elf_file, elf_ex->e_phoff,
429 			     (char *)elf_phdata, size);
430 	if (retval != size) {
431 		err = (retval < 0) ? retval : -EIO;
432 		goto out;
433 	}
434 
435 	/* Success! */
436 	err = 0;
437 out:
438 	if (err) {
439 		kfree(elf_phdata);
440 		elf_phdata = NULL;
441 	}
442 	return elf_phdata;
443 }
444 
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446 
447 /**
448  * struct arch_elf_state - arch-specific ELF loading state
449  *
450  * This structure is used to preserve architecture specific data during
451  * the loading of an ELF file, throughout the checking of architecture
452  * specific ELF headers & through to the point where the ELF load is
453  * known to be proceeding (ie. SET_PERSONALITY).
454  *
455  * This implementation is a dummy for architectures which require no
456  * specific state.
457  */
458 struct arch_elf_state {
459 };
460 
461 #define INIT_ARCH_ELF_STATE {}
462 
463 /**
464  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465  * @ehdr:	The main ELF header
466  * @phdr:	The program header to check
467  * @elf:	The open ELF file
468  * @is_interp:	True if the phdr is from the interpreter of the ELF being
469  *		loaded, else false.
470  * @state:	Architecture-specific state preserved throughout the process
471  *		of loading the ELF.
472  *
473  * Inspects the program header phdr to validate its correctness and/or
474  * suitability for the system. Called once per ELF program header in the
475  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476  * interpreter.
477  *
478  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479  *         with that return code.
480  */
481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 				   struct elf_phdr *phdr,
483 				   struct file *elf, bool is_interp,
484 				   struct arch_elf_state *state)
485 {
486 	/* Dummy implementation, always proceed */
487 	return 0;
488 }
489 
490 /**
491  * arch_check_elf() - check an ELF executable
492  * @ehdr:	The main ELF header
493  * @has_interp:	True if the ELF has an interpreter, else false.
494  * @state:	Architecture-specific state preserved throughout the process
495  *		of loading the ELF.
496  *
497  * Provides a final opportunity for architecture code to reject the loading
498  * of the ELF & cause an exec syscall to return an error. This is called after
499  * all program headers to be checked by arch_elf_pt_proc have been.
500  *
501  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502  *         with that return code.
503  */
504 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
505 				 struct arch_elf_state *state)
506 {
507 	/* Dummy implementation, always proceed */
508 	return 0;
509 }
510 
511 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
512 
513 /* This is much more generalized than the library routine read function,
514    so we keep this separate.  Technically the library read function
515    is only provided so that we can read a.out libraries that have
516    an ELF header */
517 
518 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
519 		struct file *interpreter, unsigned long *interp_map_addr,
520 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
521 {
522 	struct elf_phdr *eppnt;
523 	unsigned long load_addr = 0;
524 	int load_addr_set = 0;
525 	unsigned long last_bss = 0, elf_bss = 0;
526 	unsigned long error = ~0UL;
527 	unsigned long total_size;
528 	int i;
529 
530 	/* First of all, some simple consistency checks */
531 	if (interp_elf_ex->e_type != ET_EXEC &&
532 	    interp_elf_ex->e_type != ET_DYN)
533 		goto out;
534 	if (!elf_check_arch(interp_elf_ex))
535 		goto out;
536 	if (!interpreter->f_op->mmap)
537 		goto out;
538 
539 	total_size = total_mapping_size(interp_elf_phdata,
540 					interp_elf_ex->e_phnum);
541 	if (!total_size) {
542 		error = -EINVAL;
543 		goto out;
544 	}
545 
546 	eppnt = interp_elf_phdata;
547 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
548 		if (eppnt->p_type == PT_LOAD) {
549 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
550 			int elf_prot = 0;
551 			unsigned long vaddr = 0;
552 			unsigned long k, map_addr;
553 
554 			if (eppnt->p_flags & PF_R)
555 		    		elf_prot = PROT_READ;
556 			if (eppnt->p_flags & PF_W)
557 				elf_prot |= PROT_WRITE;
558 			if (eppnt->p_flags & PF_X)
559 				elf_prot |= PROT_EXEC;
560 			vaddr = eppnt->p_vaddr;
561 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
562 				elf_type |= MAP_FIXED;
563 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
564 				load_addr = -vaddr;
565 
566 			map_addr = elf_map(interpreter, load_addr + vaddr,
567 					eppnt, elf_prot, elf_type, total_size);
568 			total_size = 0;
569 			if (!*interp_map_addr)
570 				*interp_map_addr = map_addr;
571 			error = map_addr;
572 			if (BAD_ADDR(map_addr))
573 				goto out;
574 
575 			if (!load_addr_set &&
576 			    interp_elf_ex->e_type == ET_DYN) {
577 				load_addr = map_addr - ELF_PAGESTART(vaddr);
578 				load_addr_set = 1;
579 			}
580 
581 			/*
582 			 * Check to see if the section's size will overflow the
583 			 * allowed task size. Note that p_filesz must always be
584 			 * <= p_memsize so it's only necessary to check p_memsz.
585 			 */
586 			k = load_addr + eppnt->p_vaddr;
587 			if (BAD_ADDR(k) ||
588 			    eppnt->p_filesz > eppnt->p_memsz ||
589 			    eppnt->p_memsz > TASK_SIZE ||
590 			    TASK_SIZE - eppnt->p_memsz < k) {
591 				error = -ENOMEM;
592 				goto out;
593 			}
594 
595 			/*
596 			 * Find the end of the file mapping for this phdr, and
597 			 * keep track of the largest address we see for this.
598 			 */
599 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
600 			if (k > elf_bss)
601 				elf_bss = k;
602 
603 			/*
604 			 * Do the same thing for the memory mapping - between
605 			 * elf_bss and last_bss is the bss section.
606 			 */
607 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
608 			if (k > last_bss)
609 				last_bss = k;
610 		}
611 	}
612 
613 	if (last_bss > elf_bss) {
614 		/*
615 		 * Now fill out the bss section.  First pad the last page up
616 		 * to the page boundary, and then perform a mmap to make sure
617 		 * that there are zero-mapped pages up to and including the
618 		 * last bss page.
619 		 */
620 		if (padzero(elf_bss)) {
621 			error = -EFAULT;
622 			goto out;
623 		}
624 
625 		/* What we have mapped so far */
626 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
627 
628 		/* Map the last of the bss segment */
629 		error = vm_brk(elf_bss, last_bss - elf_bss);
630 		if (BAD_ADDR(error))
631 			goto out;
632 	}
633 
634 	error = load_addr;
635 out:
636 	return error;
637 }
638 
639 /*
640  * These are the functions used to load ELF style executables and shared
641  * libraries.  There is no binary dependent code anywhere else.
642  */
643 
644 #ifndef STACK_RND_MASK
645 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
646 #endif
647 
648 static unsigned long randomize_stack_top(unsigned long stack_top)
649 {
650 	unsigned long random_variable = 0;
651 
652 	if ((current->flags & PF_RANDOMIZE) &&
653 		!(current->personality & ADDR_NO_RANDOMIZE)) {
654 		random_variable = (unsigned long) get_random_int();
655 		random_variable &= STACK_RND_MASK;
656 		random_variable <<= PAGE_SHIFT;
657 	}
658 #ifdef CONFIG_STACK_GROWSUP
659 	return PAGE_ALIGN(stack_top) + random_variable;
660 #else
661 	return PAGE_ALIGN(stack_top) - random_variable;
662 #endif
663 }
664 
665 static int load_elf_binary(struct linux_binprm *bprm)
666 {
667 	struct file *interpreter = NULL; /* to shut gcc up */
668  	unsigned long load_addr = 0, load_bias = 0;
669 	int load_addr_set = 0;
670 	char * elf_interpreter = NULL;
671 	unsigned long error;
672 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
673 	unsigned long elf_bss, elf_brk;
674 	int retval, i;
675 	unsigned long elf_entry;
676 	unsigned long interp_load_addr = 0;
677 	unsigned long start_code, end_code, start_data, end_data;
678 	unsigned long reloc_func_desc __maybe_unused = 0;
679 	int executable_stack = EXSTACK_DEFAULT;
680 	struct pt_regs *regs = current_pt_regs();
681 	struct {
682 		struct elfhdr elf_ex;
683 		struct elfhdr interp_elf_ex;
684 	} *loc;
685 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
686 
687 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
688 	if (!loc) {
689 		retval = -ENOMEM;
690 		goto out_ret;
691 	}
692 
693 	/* Get the exec-header */
694 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
695 
696 	retval = -ENOEXEC;
697 	/* First of all, some simple consistency checks */
698 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
699 		goto out;
700 
701 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
702 		goto out;
703 	if (!elf_check_arch(&loc->elf_ex))
704 		goto out;
705 	if (!bprm->file->f_op->mmap)
706 		goto out;
707 
708 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
709 	if (!elf_phdata)
710 		goto out;
711 
712 	elf_ppnt = elf_phdata;
713 	elf_bss = 0;
714 	elf_brk = 0;
715 
716 	start_code = ~0UL;
717 	end_code = 0;
718 	start_data = 0;
719 	end_data = 0;
720 
721 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
722 		if (elf_ppnt->p_type == PT_INTERP) {
723 			/* This is the program interpreter used for
724 			 * shared libraries - for now assume that this
725 			 * is an a.out format binary
726 			 */
727 			retval = -ENOEXEC;
728 			if (elf_ppnt->p_filesz > PATH_MAX ||
729 			    elf_ppnt->p_filesz < 2)
730 				goto out_free_ph;
731 
732 			retval = -ENOMEM;
733 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
734 						  GFP_KERNEL);
735 			if (!elf_interpreter)
736 				goto out_free_ph;
737 
738 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
739 					     elf_interpreter,
740 					     elf_ppnt->p_filesz);
741 			if (retval != elf_ppnt->p_filesz) {
742 				if (retval >= 0)
743 					retval = -EIO;
744 				goto out_free_interp;
745 			}
746 			/* make sure path is NULL terminated */
747 			retval = -ENOEXEC;
748 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
749 				goto out_free_interp;
750 
751 			interpreter = open_exec(elf_interpreter);
752 			retval = PTR_ERR(interpreter);
753 			if (IS_ERR(interpreter))
754 				goto out_free_interp;
755 
756 			/*
757 			 * If the binary is not readable then enforce
758 			 * mm->dumpable = 0 regardless of the interpreter's
759 			 * permissions.
760 			 */
761 			would_dump(bprm, interpreter);
762 
763 			/* Get the exec headers */
764 			retval = kernel_read(interpreter, 0,
765 					     (void *)&loc->interp_elf_ex,
766 					     sizeof(loc->interp_elf_ex));
767 			if (retval != sizeof(loc->interp_elf_ex)) {
768 				if (retval >= 0)
769 					retval = -EIO;
770 				goto out_free_dentry;
771 			}
772 
773 			break;
774 		}
775 		elf_ppnt++;
776 	}
777 
778 	elf_ppnt = elf_phdata;
779 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
780 		switch (elf_ppnt->p_type) {
781 		case PT_GNU_STACK:
782 			if (elf_ppnt->p_flags & PF_X)
783 				executable_stack = EXSTACK_ENABLE_X;
784 			else
785 				executable_stack = EXSTACK_DISABLE_X;
786 			break;
787 
788 		case PT_LOPROC ... PT_HIPROC:
789 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
790 						  bprm->file, false,
791 						  &arch_state);
792 			if (retval)
793 				goto out_free_dentry;
794 			break;
795 		}
796 
797 	/* Some simple consistency checks for the interpreter */
798 	if (elf_interpreter) {
799 		retval = -ELIBBAD;
800 		/* Not an ELF interpreter */
801 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
802 			goto out_free_dentry;
803 		/* Verify the interpreter has a valid arch */
804 		if (!elf_check_arch(&loc->interp_elf_ex))
805 			goto out_free_dentry;
806 
807 		/* Load the interpreter program headers */
808 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
809 						   interpreter);
810 		if (!interp_elf_phdata)
811 			goto out_free_dentry;
812 
813 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
814 		elf_ppnt = interp_elf_phdata;
815 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
816 			switch (elf_ppnt->p_type) {
817 			case PT_LOPROC ... PT_HIPROC:
818 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
819 							  elf_ppnt, interpreter,
820 							  true, &arch_state);
821 				if (retval)
822 					goto out_free_dentry;
823 				break;
824 			}
825 	}
826 
827 	/*
828 	 * Allow arch code to reject the ELF at this point, whilst it's
829 	 * still possible to return an error to the code that invoked
830 	 * the exec syscall.
831 	 */
832 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
833 	if (retval)
834 		goto out_free_dentry;
835 
836 	/* Flush all traces of the currently running executable */
837 	retval = flush_old_exec(bprm);
838 	if (retval)
839 		goto out_free_dentry;
840 
841 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
842 	   may depend on the personality.  */
843 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
844 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
845 		current->personality |= READ_IMPLIES_EXEC;
846 
847 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
848 		current->flags |= PF_RANDOMIZE;
849 
850 	setup_new_exec(bprm);
851 
852 	/* Do this so that we can load the interpreter, if need be.  We will
853 	   change some of these later */
854 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
855 				 executable_stack);
856 	if (retval < 0)
857 		goto out_free_dentry;
858 
859 	current->mm->start_stack = bprm->p;
860 
861 	/* Now we do a little grungy work by mmapping the ELF image into
862 	   the correct location in memory. */
863 	for(i = 0, elf_ppnt = elf_phdata;
864 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
865 		int elf_prot = 0, elf_flags;
866 		unsigned long k, vaddr;
867 		unsigned long total_size = 0;
868 
869 		if (elf_ppnt->p_type != PT_LOAD)
870 			continue;
871 
872 		if (unlikely (elf_brk > elf_bss)) {
873 			unsigned long nbyte;
874 
875 			/* There was a PT_LOAD segment with p_memsz > p_filesz
876 			   before this one. Map anonymous pages, if needed,
877 			   and clear the area.  */
878 			retval = set_brk(elf_bss + load_bias,
879 					 elf_brk + load_bias);
880 			if (retval)
881 				goto out_free_dentry;
882 			nbyte = ELF_PAGEOFFSET(elf_bss);
883 			if (nbyte) {
884 				nbyte = ELF_MIN_ALIGN - nbyte;
885 				if (nbyte > elf_brk - elf_bss)
886 					nbyte = elf_brk - elf_bss;
887 				if (clear_user((void __user *)elf_bss +
888 							load_bias, nbyte)) {
889 					/*
890 					 * This bss-zeroing can fail if the ELF
891 					 * file specifies odd protections. So
892 					 * we don't check the return value
893 					 */
894 				}
895 			}
896 		}
897 
898 		if (elf_ppnt->p_flags & PF_R)
899 			elf_prot |= PROT_READ;
900 		if (elf_ppnt->p_flags & PF_W)
901 			elf_prot |= PROT_WRITE;
902 		if (elf_ppnt->p_flags & PF_X)
903 			elf_prot |= PROT_EXEC;
904 
905 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906 
907 		vaddr = elf_ppnt->p_vaddr;
908 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
909 			elf_flags |= MAP_FIXED;
910 		} else if (loc->elf_ex.e_type == ET_DYN) {
911 			/* Try and get dynamic programs out of the way of the
912 			 * default mmap base, as well as whatever program they
913 			 * might try to exec.  This is because the brk will
914 			 * follow the loader, and is not movable.  */
915 			load_bias = ELF_ET_DYN_BASE - vaddr;
916 			if (current->flags & PF_RANDOMIZE)
917 				load_bias += arch_mmap_rnd();
918 			load_bias = ELF_PAGESTART(load_bias);
919 			total_size = total_mapping_size(elf_phdata,
920 							loc->elf_ex.e_phnum);
921 			if (!total_size) {
922 				retval = -EINVAL;
923 				goto out_free_dentry;
924 			}
925 		}
926 
927 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928 				elf_prot, elf_flags, total_size);
929 		if (BAD_ADDR(error)) {
930 			retval = IS_ERR((void *)error) ?
931 				PTR_ERR((void*)error) : -EINVAL;
932 			goto out_free_dentry;
933 		}
934 
935 		if (!load_addr_set) {
936 			load_addr_set = 1;
937 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
938 			if (loc->elf_ex.e_type == ET_DYN) {
939 				load_bias += error -
940 				             ELF_PAGESTART(load_bias + vaddr);
941 				load_addr += load_bias;
942 				reloc_func_desc = load_bias;
943 			}
944 		}
945 		k = elf_ppnt->p_vaddr;
946 		if (k < start_code)
947 			start_code = k;
948 		if (start_data < k)
949 			start_data = k;
950 
951 		/*
952 		 * Check to see if the section's size will overflow the
953 		 * allowed task size. Note that p_filesz must always be
954 		 * <= p_memsz so it is only necessary to check p_memsz.
955 		 */
956 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
957 		    elf_ppnt->p_memsz > TASK_SIZE ||
958 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
959 			/* set_brk can never work. Avoid overflows. */
960 			retval = -EINVAL;
961 			goto out_free_dentry;
962 		}
963 
964 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
965 
966 		if (k > elf_bss)
967 			elf_bss = k;
968 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
969 			end_code = k;
970 		if (end_data < k)
971 			end_data = k;
972 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
973 		if (k > elf_brk)
974 			elf_brk = k;
975 	}
976 
977 	loc->elf_ex.e_entry += load_bias;
978 	elf_bss += load_bias;
979 	elf_brk += load_bias;
980 	start_code += load_bias;
981 	end_code += load_bias;
982 	start_data += load_bias;
983 	end_data += load_bias;
984 
985 	/* Calling set_brk effectively mmaps the pages that we need
986 	 * for the bss and break sections.  We must do this before
987 	 * mapping in the interpreter, to make sure it doesn't wind
988 	 * up getting placed where the bss needs to go.
989 	 */
990 	retval = set_brk(elf_bss, elf_brk);
991 	if (retval)
992 		goto out_free_dentry;
993 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
994 		retval = -EFAULT; /* Nobody gets to see this, but.. */
995 		goto out_free_dentry;
996 	}
997 
998 	if (elf_interpreter) {
999 		unsigned long interp_map_addr = 0;
1000 
1001 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1002 					    interpreter,
1003 					    &interp_map_addr,
1004 					    load_bias, interp_elf_phdata);
1005 		if (!IS_ERR((void *)elf_entry)) {
1006 			/*
1007 			 * load_elf_interp() returns relocation
1008 			 * adjustment
1009 			 */
1010 			interp_load_addr = elf_entry;
1011 			elf_entry += loc->interp_elf_ex.e_entry;
1012 		}
1013 		if (BAD_ADDR(elf_entry)) {
1014 			retval = IS_ERR((void *)elf_entry) ?
1015 					(int)elf_entry : -EINVAL;
1016 			goto out_free_dentry;
1017 		}
1018 		reloc_func_desc = interp_load_addr;
1019 
1020 		allow_write_access(interpreter);
1021 		fput(interpreter);
1022 		kfree(elf_interpreter);
1023 	} else {
1024 		elf_entry = loc->elf_ex.e_entry;
1025 		if (BAD_ADDR(elf_entry)) {
1026 			retval = -EINVAL;
1027 			goto out_free_dentry;
1028 		}
1029 	}
1030 
1031 	kfree(interp_elf_phdata);
1032 	kfree(elf_phdata);
1033 
1034 	set_binfmt(&elf_format);
1035 
1036 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1037 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1038 	if (retval < 0)
1039 		goto out;
1040 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1041 
1042 	install_exec_creds(bprm);
1043 	retval = create_elf_tables(bprm, &loc->elf_ex,
1044 			  load_addr, interp_load_addr);
1045 	if (retval < 0)
1046 		goto out;
1047 	/* N.B. passed_fileno might not be initialized? */
1048 	current->mm->end_code = end_code;
1049 	current->mm->start_code = start_code;
1050 	current->mm->start_data = start_data;
1051 	current->mm->end_data = end_data;
1052 	current->mm->start_stack = bprm->p;
1053 
1054 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1055 		current->mm->brk = current->mm->start_brk =
1056 			arch_randomize_brk(current->mm);
1057 #ifdef compat_brk_randomized
1058 		current->brk_randomized = 1;
1059 #endif
1060 	}
1061 
1062 	if (current->personality & MMAP_PAGE_ZERO) {
1063 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1064 		   and some applications "depend" upon this behavior.
1065 		   Since we do not have the power to recompile these, we
1066 		   emulate the SVr4 behavior. Sigh. */
1067 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1068 				MAP_FIXED | MAP_PRIVATE, 0);
1069 	}
1070 
1071 #ifdef ELF_PLAT_INIT
1072 	/*
1073 	 * The ABI may specify that certain registers be set up in special
1074 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1075 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1076 	 * that the e_entry field is the address of the function descriptor
1077 	 * for the startup routine, rather than the address of the startup
1078 	 * routine itself.  This macro performs whatever initialization to
1079 	 * the regs structure is required as well as any relocations to the
1080 	 * function descriptor entries when executing dynamically links apps.
1081 	 */
1082 	ELF_PLAT_INIT(regs, reloc_func_desc);
1083 #endif
1084 
1085 	start_thread(regs, elf_entry, bprm->p);
1086 	retval = 0;
1087 out:
1088 	kfree(loc);
1089 out_ret:
1090 	return retval;
1091 
1092 	/* error cleanup */
1093 out_free_dentry:
1094 	kfree(interp_elf_phdata);
1095 	allow_write_access(interpreter);
1096 	if (interpreter)
1097 		fput(interpreter);
1098 out_free_interp:
1099 	kfree(elf_interpreter);
1100 out_free_ph:
1101 	kfree(elf_phdata);
1102 	goto out;
1103 }
1104 
1105 #ifdef CONFIG_USELIB
1106 /* This is really simpleminded and specialized - we are loading an
1107    a.out library that is given an ELF header. */
1108 static int load_elf_library(struct file *file)
1109 {
1110 	struct elf_phdr *elf_phdata;
1111 	struct elf_phdr *eppnt;
1112 	unsigned long elf_bss, bss, len;
1113 	int retval, error, i, j;
1114 	struct elfhdr elf_ex;
1115 
1116 	error = -ENOEXEC;
1117 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1118 	if (retval != sizeof(elf_ex))
1119 		goto out;
1120 
1121 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1122 		goto out;
1123 
1124 	/* First of all, some simple consistency checks */
1125 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1126 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1127 		goto out;
1128 
1129 	/* Now read in all of the header information */
1130 
1131 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1132 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1133 
1134 	error = -ENOMEM;
1135 	elf_phdata = kmalloc(j, GFP_KERNEL);
1136 	if (!elf_phdata)
1137 		goto out;
1138 
1139 	eppnt = elf_phdata;
1140 	error = -ENOEXEC;
1141 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1142 	if (retval != j)
1143 		goto out_free_ph;
1144 
1145 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1146 		if ((eppnt + i)->p_type == PT_LOAD)
1147 			j++;
1148 	if (j != 1)
1149 		goto out_free_ph;
1150 
1151 	while (eppnt->p_type != PT_LOAD)
1152 		eppnt++;
1153 
1154 	/* Now use mmap to map the library into memory. */
1155 	error = vm_mmap(file,
1156 			ELF_PAGESTART(eppnt->p_vaddr),
1157 			(eppnt->p_filesz +
1158 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1159 			PROT_READ | PROT_WRITE | PROT_EXEC,
1160 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1161 			(eppnt->p_offset -
1162 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1163 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1164 		goto out_free_ph;
1165 
1166 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1167 	if (padzero(elf_bss)) {
1168 		error = -EFAULT;
1169 		goto out_free_ph;
1170 	}
1171 
1172 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1173 			    ELF_MIN_ALIGN - 1);
1174 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1175 	if (bss > len)
1176 		vm_brk(len, bss - len);
1177 	error = 0;
1178 
1179 out_free_ph:
1180 	kfree(elf_phdata);
1181 out:
1182 	return error;
1183 }
1184 #endif /* #ifdef CONFIG_USELIB */
1185 
1186 #ifdef CONFIG_ELF_CORE
1187 /*
1188  * ELF core dumper
1189  *
1190  * Modelled on fs/exec.c:aout_core_dump()
1191  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1192  */
1193 
1194 /*
1195  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1196  * that are useful for post-mortem analysis are included in every core dump.
1197  * In that way we ensure that the core dump is fully interpretable later
1198  * without matching up the same kernel and hardware config to see what PC values
1199  * meant. These special mappings include - vDSO, vsyscall, and other
1200  * architecture specific mappings
1201  */
1202 static bool always_dump_vma(struct vm_area_struct *vma)
1203 {
1204 	/* Any vsyscall mappings? */
1205 	if (vma == get_gate_vma(vma->vm_mm))
1206 		return true;
1207 
1208 	/*
1209 	 * Assume that all vmas with a .name op should always be dumped.
1210 	 * If this changes, a new vm_ops field can easily be added.
1211 	 */
1212 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1213 		return true;
1214 
1215 	/*
1216 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1217 	 * such as vDSO sections.
1218 	 */
1219 	if (arch_vma_name(vma))
1220 		return true;
1221 
1222 	return false;
1223 }
1224 
1225 /*
1226  * Decide what to dump of a segment, part, all or none.
1227  */
1228 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1229 				   unsigned long mm_flags)
1230 {
1231 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1232 
1233 	/* always dump the vdso and vsyscall sections */
1234 	if (always_dump_vma(vma))
1235 		goto whole;
1236 
1237 	if (vma->vm_flags & VM_DONTDUMP)
1238 		return 0;
1239 
1240 	/* support for DAX */
1241 	if (vma_is_dax(vma)) {
1242 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1243 			goto whole;
1244 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1245 			goto whole;
1246 		return 0;
1247 	}
1248 
1249 	/* Hugetlb memory check */
1250 	if (vma->vm_flags & VM_HUGETLB) {
1251 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1252 			goto whole;
1253 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1254 			goto whole;
1255 		return 0;
1256 	}
1257 
1258 	/* Do not dump I/O mapped devices or special mappings */
1259 	if (vma->vm_flags & VM_IO)
1260 		return 0;
1261 
1262 	/* By default, dump shared memory if mapped from an anonymous file. */
1263 	if (vma->vm_flags & VM_SHARED) {
1264 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1265 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1266 			goto whole;
1267 		return 0;
1268 	}
1269 
1270 	/* Dump segments that have been written to.  */
1271 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1272 		goto whole;
1273 	if (vma->vm_file == NULL)
1274 		return 0;
1275 
1276 	if (FILTER(MAPPED_PRIVATE))
1277 		goto whole;
1278 
1279 	/*
1280 	 * If this looks like the beginning of a DSO or executable mapping,
1281 	 * check for an ELF header.  If we find one, dump the first page to
1282 	 * aid in determining what was mapped here.
1283 	 */
1284 	if (FILTER(ELF_HEADERS) &&
1285 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1286 		u32 __user *header = (u32 __user *) vma->vm_start;
1287 		u32 word;
1288 		mm_segment_t fs = get_fs();
1289 		/*
1290 		 * Doing it this way gets the constant folded by GCC.
1291 		 */
1292 		union {
1293 			u32 cmp;
1294 			char elfmag[SELFMAG];
1295 		} magic;
1296 		BUILD_BUG_ON(SELFMAG != sizeof word);
1297 		magic.elfmag[EI_MAG0] = ELFMAG0;
1298 		magic.elfmag[EI_MAG1] = ELFMAG1;
1299 		magic.elfmag[EI_MAG2] = ELFMAG2;
1300 		magic.elfmag[EI_MAG3] = ELFMAG3;
1301 		/*
1302 		 * Switch to the user "segment" for get_user(),
1303 		 * then put back what elf_core_dump() had in place.
1304 		 */
1305 		set_fs(USER_DS);
1306 		if (unlikely(get_user(word, header)))
1307 			word = 0;
1308 		set_fs(fs);
1309 		if (word == magic.cmp)
1310 			return PAGE_SIZE;
1311 	}
1312 
1313 #undef	FILTER
1314 
1315 	return 0;
1316 
1317 whole:
1318 	return vma->vm_end - vma->vm_start;
1319 }
1320 
1321 /* An ELF note in memory */
1322 struct memelfnote
1323 {
1324 	const char *name;
1325 	int type;
1326 	unsigned int datasz;
1327 	void *data;
1328 };
1329 
1330 static int notesize(struct memelfnote *en)
1331 {
1332 	int sz;
1333 
1334 	sz = sizeof(struct elf_note);
1335 	sz += roundup(strlen(en->name) + 1, 4);
1336 	sz += roundup(en->datasz, 4);
1337 
1338 	return sz;
1339 }
1340 
1341 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1342 {
1343 	struct elf_note en;
1344 	en.n_namesz = strlen(men->name) + 1;
1345 	en.n_descsz = men->datasz;
1346 	en.n_type = men->type;
1347 
1348 	return dump_emit(cprm, &en, sizeof(en)) &&
1349 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1350 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1351 }
1352 
1353 static void fill_elf_header(struct elfhdr *elf, int segs,
1354 			    u16 machine, u32 flags)
1355 {
1356 	memset(elf, 0, sizeof(*elf));
1357 
1358 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1359 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1360 	elf->e_ident[EI_DATA] = ELF_DATA;
1361 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1362 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1363 
1364 	elf->e_type = ET_CORE;
1365 	elf->e_machine = machine;
1366 	elf->e_version = EV_CURRENT;
1367 	elf->e_phoff = sizeof(struct elfhdr);
1368 	elf->e_flags = flags;
1369 	elf->e_ehsize = sizeof(struct elfhdr);
1370 	elf->e_phentsize = sizeof(struct elf_phdr);
1371 	elf->e_phnum = segs;
1372 
1373 	return;
1374 }
1375 
1376 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1377 {
1378 	phdr->p_type = PT_NOTE;
1379 	phdr->p_offset = offset;
1380 	phdr->p_vaddr = 0;
1381 	phdr->p_paddr = 0;
1382 	phdr->p_filesz = sz;
1383 	phdr->p_memsz = 0;
1384 	phdr->p_flags = 0;
1385 	phdr->p_align = 0;
1386 	return;
1387 }
1388 
1389 static void fill_note(struct memelfnote *note, const char *name, int type,
1390 		unsigned int sz, void *data)
1391 {
1392 	note->name = name;
1393 	note->type = type;
1394 	note->datasz = sz;
1395 	note->data = data;
1396 	return;
1397 }
1398 
1399 /*
1400  * fill up all the fields in prstatus from the given task struct, except
1401  * registers which need to be filled up separately.
1402  */
1403 static void fill_prstatus(struct elf_prstatus *prstatus,
1404 		struct task_struct *p, long signr)
1405 {
1406 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1407 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1408 	prstatus->pr_sighold = p->blocked.sig[0];
1409 	rcu_read_lock();
1410 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1411 	rcu_read_unlock();
1412 	prstatus->pr_pid = task_pid_vnr(p);
1413 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1414 	prstatus->pr_sid = task_session_vnr(p);
1415 	if (thread_group_leader(p)) {
1416 		struct task_cputime cputime;
1417 
1418 		/*
1419 		 * This is the record for the group leader.  It shows the
1420 		 * group-wide total, not its individual thread total.
1421 		 */
1422 		thread_group_cputime(p, &cputime);
1423 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1424 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1425 	} else {
1426 		cputime_t utime, stime;
1427 
1428 		task_cputime(p, &utime, &stime);
1429 		cputime_to_timeval(utime, &prstatus->pr_utime);
1430 		cputime_to_timeval(stime, &prstatus->pr_stime);
1431 	}
1432 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1433 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1434 }
1435 
1436 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1437 		       struct mm_struct *mm)
1438 {
1439 	const struct cred *cred;
1440 	unsigned int i, len;
1441 
1442 	/* first copy the parameters from user space */
1443 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1444 
1445 	len = mm->arg_end - mm->arg_start;
1446 	if (len >= ELF_PRARGSZ)
1447 		len = ELF_PRARGSZ-1;
1448 	if (copy_from_user(&psinfo->pr_psargs,
1449 		           (const char __user *)mm->arg_start, len))
1450 		return -EFAULT;
1451 	for(i = 0; i < len; i++)
1452 		if (psinfo->pr_psargs[i] == 0)
1453 			psinfo->pr_psargs[i] = ' ';
1454 	psinfo->pr_psargs[len] = 0;
1455 
1456 	rcu_read_lock();
1457 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1458 	rcu_read_unlock();
1459 	psinfo->pr_pid = task_pid_vnr(p);
1460 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1461 	psinfo->pr_sid = task_session_vnr(p);
1462 
1463 	i = p->state ? ffz(~p->state) + 1 : 0;
1464 	psinfo->pr_state = i;
1465 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1466 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1467 	psinfo->pr_nice = task_nice(p);
1468 	psinfo->pr_flag = p->flags;
1469 	rcu_read_lock();
1470 	cred = __task_cred(p);
1471 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1472 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1473 	rcu_read_unlock();
1474 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1475 
1476 	return 0;
1477 }
1478 
1479 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1480 {
1481 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1482 	int i = 0;
1483 	do
1484 		i += 2;
1485 	while (auxv[i - 2] != AT_NULL);
1486 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1487 }
1488 
1489 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1490 		const siginfo_t *siginfo)
1491 {
1492 	mm_segment_t old_fs = get_fs();
1493 	set_fs(KERNEL_DS);
1494 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1495 	set_fs(old_fs);
1496 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1497 }
1498 
1499 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1500 /*
1501  * Format of NT_FILE note:
1502  *
1503  * long count     -- how many files are mapped
1504  * long page_size -- units for file_ofs
1505  * array of [COUNT] elements of
1506  *   long start
1507  *   long end
1508  *   long file_ofs
1509  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1510  */
1511 static int fill_files_note(struct memelfnote *note)
1512 {
1513 	struct vm_area_struct *vma;
1514 	unsigned count, size, names_ofs, remaining, n;
1515 	user_long_t *data;
1516 	user_long_t *start_end_ofs;
1517 	char *name_base, *name_curpos;
1518 
1519 	/* *Estimated* file count and total data size needed */
1520 	count = current->mm->map_count;
1521 	size = count * 64;
1522 
1523 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1524  alloc:
1525 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1526 		return -EINVAL;
1527 	size = round_up(size, PAGE_SIZE);
1528 	data = vmalloc(size);
1529 	if (!data)
1530 		return -ENOMEM;
1531 
1532 	start_end_ofs = data + 2;
1533 	name_base = name_curpos = ((char *)data) + names_ofs;
1534 	remaining = size - names_ofs;
1535 	count = 0;
1536 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1537 		struct file *file;
1538 		const char *filename;
1539 
1540 		file = vma->vm_file;
1541 		if (!file)
1542 			continue;
1543 		filename = file_path(file, name_curpos, remaining);
1544 		if (IS_ERR(filename)) {
1545 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1546 				vfree(data);
1547 				size = size * 5 / 4;
1548 				goto alloc;
1549 			}
1550 			continue;
1551 		}
1552 
1553 		/* file_path() fills at the end, move name down */
1554 		/* n = strlen(filename) + 1: */
1555 		n = (name_curpos + remaining) - filename;
1556 		remaining = filename - name_curpos;
1557 		memmove(name_curpos, filename, n);
1558 		name_curpos += n;
1559 
1560 		*start_end_ofs++ = vma->vm_start;
1561 		*start_end_ofs++ = vma->vm_end;
1562 		*start_end_ofs++ = vma->vm_pgoff;
1563 		count++;
1564 	}
1565 
1566 	/* Now we know exact count of files, can store it */
1567 	data[0] = count;
1568 	data[1] = PAGE_SIZE;
1569 	/*
1570 	 * Count usually is less than current->mm->map_count,
1571 	 * we need to move filenames down.
1572 	 */
1573 	n = current->mm->map_count - count;
1574 	if (n != 0) {
1575 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1576 		memmove(name_base - shift_bytes, name_base,
1577 			name_curpos - name_base);
1578 		name_curpos -= shift_bytes;
1579 	}
1580 
1581 	size = name_curpos - (char *)data;
1582 	fill_note(note, "CORE", NT_FILE, size, data);
1583 	return 0;
1584 }
1585 
1586 #ifdef CORE_DUMP_USE_REGSET
1587 #include <linux/regset.h>
1588 
1589 struct elf_thread_core_info {
1590 	struct elf_thread_core_info *next;
1591 	struct task_struct *task;
1592 	struct elf_prstatus prstatus;
1593 	struct memelfnote notes[0];
1594 };
1595 
1596 struct elf_note_info {
1597 	struct elf_thread_core_info *thread;
1598 	struct memelfnote psinfo;
1599 	struct memelfnote signote;
1600 	struct memelfnote auxv;
1601 	struct memelfnote files;
1602 	user_siginfo_t csigdata;
1603 	size_t size;
1604 	int thread_notes;
1605 };
1606 
1607 /*
1608  * When a regset has a writeback hook, we call it on each thread before
1609  * dumping user memory.  On register window machines, this makes sure the
1610  * user memory backing the register data is up to date before we read it.
1611  */
1612 static void do_thread_regset_writeback(struct task_struct *task,
1613 				       const struct user_regset *regset)
1614 {
1615 	if (regset->writeback)
1616 		regset->writeback(task, regset, 1);
1617 }
1618 
1619 #ifndef PR_REG_SIZE
1620 #define PR_REG_SIZE(S) sizeof(S)
1621 #endif
1622 
1623 #ifndef PRSTATUS_SIZE
1624 #define PRSTATUS_SIZE(S) sizeof(S)
1625 #endif
1626 
1627 #ifndef PR_REG_PTR
1628 #define PR_REG_PTR(S) (&((S)->pr_reg))
1629 #endif
1630 
1631 #ifndef SET_PR_FPVALID
1632 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1633 #endif
1634 
1635 static int fill_thread_core_info(struct elf_thread_core_info *t,
1636 				 const struct user_regset_view *view,
1637 				 long signr, size_t *total)
1638 {
1639 	unsigned int i;
1640 
1641 	/*
1642 	 * NT_PRSTATUS is the one special case, because the regset data
1643 	 * goes into the pr_reg field inside the note contents, rather
1644 	 * than being the whole note contents.  We fill the reset in here.
1645 	 * We assume that regset 0 is NT_PRSTATUS.
1646 	 */
1647 	fill_prstatus(&t->prstatus, t->task, signr);
1648 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1649 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1650 				    PR_REG_PTR(&t->prstatus), NULL);
1651 
1652 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1653 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1654 	*total += notesize(&t->notes[0]);
1655 
1656 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1657 
1658 	/*
1659 	 * Each other regset might generate a note too.  For each regset
1660 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1661 	 * all zero and we'll know to skip writing it later.
1662 	 */
1663 	for (i = 1; i < view->n; ++i) {
1664 		const struct user_regset *regset = &view->regsets[i];
1665 		do_thread_regset_writeback(t->task, regset);
1666 		if (regset->core_note_type && regset->get &&
1667 		    (!regset->active || regset->active(t->task, regset))) {
1668 			int ret;
1669 			size_t size = regset->n * regset->size;
1670 			void *data = kmalloc(size, GFP_KERNEL);
1671 			if (unlikely(!data))
1672 				return 0;
1673 			ret = regset->get(t->task, regset,
1674 					  0, size, data, NULL);
1675 			if (unlikely(ret))
1676 				kfree(data);
1677 			else {
1678 				if (regset->core_note_type != NT_PRFPREG)
1679 					fill_note(&t->notes[i], "LINUX",
1680 						  regset->core_note_type,
1681 						  size, data);
1682 				else {
1683 					SET_PR_FPVALID(&t->prstatus, 1);
1684 					fill_note(&t->notes[i], "CORE",
1685 						  NT_PRFPREG, size, data);
1686 				}
1687 				*total += notesize(&t->notes[i]);
1688 			}
1689 		}
1690 	}
1691 
1692 	return 1;
1693 }
1694 
1695 static int fill_note_info(struct elfhdr *elf, int phdrs,
1696 			  struct elf_note_info *info,
1697 			  const siginfo_t *siginfo, struct pt_regs *regs)
1698 {
1699 	struct task_struct *dump_task = current;
1700 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1701 	struct elf_thread_core_info *t;
1702 	struct elf_prpsinfo *psinfo;
1703 	struct core_thread *ct;
1704 	unsigned int i;
1705 
1706 	info->size = 0;
1707 	info->thread = NULL;
1708 
1709 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1710 	if (psinfo == NULL) {
1711 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1712 		return 0;
1713 	}
1714 
1715 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1716 
1717 	/*
1718 	 * Figure out how many notes we're going to need for each thread.
1719 	 */
1720 	info->thread_notes = 0;
1721 	for (i = 0; i < view->n; ++i)
1722 		if (view->regsets[i].core_note_type != 0)
1723 			++info->thread_notes;
1724 
1725 	/*
1726 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1727 	 * since it is our one special case.
1728 	 */
1729 	if (unlikely(info->thread_notes == 0) ||
1730 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1731 		WARN_ON(1);
1732 		return 0;
1733 	}
1734 
1735 	/*
1736 	 * Initialize the ELF file header.
1737 	 */
1738 	fill_elf_header(elf, phdrs,
1739 			view->e_machine, view->e_flags);
1740 
1741 	/*
1742 	 * Allocate a structure for each thread.
1743 	 */
1744 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1745 		t = kzalloc(offsetof(struct elf_thread_core_info,
1746 				     notes[info->thread_notes]),
1747 			    GFP_KERNEL);
1748 		if (unlikely(!t))
1749 			return 0;
1750 
1751 		t->task = ct->task;
1752 		if (ct->task == dump_task || !info->thread) {
1753 			t->next = info->thread;
1754 			info->thread = t;
1755 		} else {
1756 			/*
1757 			 * Make sure to keep the original task at
1758 			 * the head of the list.
1759 			 */
1760 			t->next = info->thread->next;
1761 			info->thread->next = t;
1762 		}
1763 	}
1764 
1765 	/*
1766 	 * Now fill in each thread's information.
1767 	 */
1768 	for (t = info->thread; t != NULL; t = t->next)
1769 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1770 			return 0;
1771 
1772 	/*
1773 	 * Fill in the two process-wide notes.
1774 	 */
1775 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1776 	info->size += notesize(&info->psinfo);
1777 
1778 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1779 	info->size += notesize(&info->signote);
1780 
1781 	fill_auxv_note(&info->auxv, current->mm);
1782 	info->size += notesize(&info->auxv);
1783 
1784 	if (fill_files_note(&info->files) == 0)
1785 		info->size += notesize(&info->files);
1786 
1787 	return 1;
1788 }
1789 
1790 static size_t get_note_info_size(struct elf_note_info *info)
1791 {
1792 	return info->size;
1793 }
1794 
1795 /*
1796  * Write all the notes for each thread.  When writing the first thread, the
1797  * process-wide notes are interleaved after the first thread-specific note.
1798  */
1799 static int write_note_info(struct elf_note_info *info,
1800 			   struct coredump_params *cprm)
1801 {
1802 	bool first = true;
1803 	struct elf_thread_core_info *t = info->thread;
1804 
1805 	do {
1806 		int i;
1807 
1808 		if (!writenote(&t->notes[0], cprm))
1809 			return 0;
1810 
1811 		if (first && !writenote(&info->psinfo, cprm))
1812 			return 0;
1813 		if (first && !writenote(&info->signote, cprm))
1814 			return 0;
1815 		if (first && !writenote(&info->auxv, cprm))
1816 			return 0;
1817 		if (first && info->files.data &&
1818 				!writenote(&info->files, cprm))
1819 			return 0;
1820 
1821 		for (i = 1; i < info->thread_notes; ++i)
1822 			if (t->notes[i].data &&
1823 			    !writenote(&t->notes[i], cprm))
1824 				return 0;
1825 
1826 		first = false;
1827 		t = t->next;
1828 	} while (t);
1829 
1830 	return 1;
1831 }
1832 
1833 static void free_note_info(struct elf_note_info *info)
1834 {
1835 	struct elf_thread_core_info *threads = info->thread;
1836 	while (threads) {
1837 		unsigned int i;
1838 		struct elf_thread_core_info *t = threads;
1839 		threads = t->next;
1840 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1841 		for (i = 1; i < info->thread_notes; ++i)
1842 			kfree(t->notes[i].data);
1843 		kfree(t);
1844 	}
1845 	kfree(info->psinfo.data);
1846 	vfree(info->files.data);
1847 }
1848 
1849 #else
1850 
1851 /* Here is the structure in which status of each thread is captured. */
1852 struct elf_thread_status
1853 {
1854 	struct list_head list;
1855 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1856 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1857 	struct task_struct *thread;
1858 #ifdef ELF_CORE_COPY_XFPREGS
1859 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1860 #endif
1861 	struct memelfnote notes[3];
1862 	int num_notes;
1863 };
1864 
1865 /*
1866  * In order to add the specific thread information for the elf file format,
1867  * we need to keep a linked list of every threads pr_status and then create
1868  * a single section for them in the final core file.
1869  */
1870 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1871 {
1872 	int sz = 0;
1873 	struct task_struct *p = t->thread;
1874 	t->num_notes = 0;
1875 
1876 	fill_prstatus(&t->prstatus, p, signr);
1877 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1878 
1879 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1880 		  &(t->prstatus));
1881 	t->num_notes++;
1882 	sz += notesize(&t->notes[0]);
1883 
1884 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1885 								&t->fpu))) {
1886 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1887 			  &(t->fpu));
1888 		t->num_notes++;
1889 		sz += notesize(&t->notes[1]);
1890 	}
1891 
1892 #ifdef ELF_CORE_COPY_XFPREGS
1893 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1894 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1895 			  sizeof(t->xfpu), &t->xfpu);
1896 		t->num_notes++;
1897 		sz += notesize(&t->notes[2]);
1898 	}
1899 #endif
1900 	return sz;
1901 }
1902 
1903 struct elf_note_info {
1904 	struct memelfnote *notes;
1905 	struct memelfnote *notes_files;
1906 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1907 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1908 	struct list_head thread_list;
1909 	elf_fpregset_t *fpu;
1910 #ifdef ELF_CORE_COPY_XFPREGS
1911 	elf_fpxregset_t *xfpu;
1912 #endif
1913 	user_siginfo_t csigdata;
1914 	int thread_status_size;
1915 	int numnote;
1916 };
1917 
1918 static int elf_note_info_init(struct elf_note_info *info)
1919 {
1920 	memset(info, 0, sizeof(*info));
1921 	INIT_LIST_HEAD(&info->thread_list);
1922 
1923 	/* Allocate space for ELF notes */
1924 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1925 	if (!info->notes)
1926 		return 0;
1927 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1928 	if (!info->psinfo)
1929 		return 0;
1930 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1931 	if (!info->prstatus)
1932 		return 0;
1933 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1934 	if (!info->fpu)
1935 		return 0;
1936 #ifdef ELF_CORE_COPY_XFPREGS
1937 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1938 	if (!info->xfpu)
1939 		return 0;
1940 #endif
1941 	return 1;
1942 }
1943 
1944 static int fill_note_info(struct elfhdr *elf, int phdrs,
1945 			  struct elf_note_info *info,
1946 			  const siginfo_t *siginfo, struct pt_regs *regs)
1947 {
1948 	struct list_head *t;
1949 	struct core_thread *ct;
1950 	struct elf_thread_status *ets;
1951 
1952 	if (!elf_note_info_init(info))
1953 		return 0;
1954 
1955 	for (ct = current->mm->core_state->dumper.next;
1956 					ct; ct = ct->next) {
1957 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1958 		if (!ets)
1959 			return 0;
1960 
1961 		ets->thread = ct->task;
1962 		list_add(&ets->list, &info->thread_list);
1963 	}
1964 
1965 	list_for_each(t, &info->thread_list) {
1966 		int sz;
1967 
1968 		ets = list_entry(t, struct elf_thread_status, list);
1969 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1970 		info->thread_status_size += sz;
1971 	}
1972 	/* now collect the dump for the current */
1973 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1974 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1975 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1976 
1977 	/* Set up header */
1978 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1979 
1980 	/*
1981 	 * Set up the notes in similar form to SVR4 core dumps made
1982 	 * with info from their /proc.
1983 	 */
1984 
1985 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1986 		  sizeof(*info->prstatus), info->prstatus);
1987 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1988 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1989 		  sizeof(*info->psinfo), info->psinfo);
1990 
1991 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1992 	fill_auxv_note(info->notes + 3, current->mm);
1993 	info->numnote = 4;
1994 
1995 	if (fill_files_note(info->notes + info->numnote) == 0) {
1996 		info->notes_files = info->notes + info->numnote;
1997 		info->numnote++;
1998 	}
1999 
2000 	/* Try to dump the FPU. */
2001 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2002 							       info->fpu);
2003 	if (info->prstatus->pr_fpvalid)
2004 		fill_note(info->notes + info->numnote++,
2005 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2006 #ifdef ELF_CORE_COPY_XFPREGS
2007 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2008 		fill_note(info->notes + info->numnote++,
2009 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2010 			  sizeof(*info->xfpu), info->xfpu);
2011 #endif
2012 
2013 	return 1;
2014 }
2015 
2016 static size_t get_note_info_size(struct elf_note_info *info)
2017 {
2018 	int sz = 0;
2019 	int i;
2020 
2021 	for (i = 0; i < info->numnote; i++)
2022 		sz += notesize(info->notes + i);
2023 
2024 	sz += info->thread_status_size;
2025 
2026 	return sz;
2027 }
2028 
2029 static int write_note_info(struct elf_note_info *info,
2030 			   struct coredump_params *cprm)
2031 {
2032 	int i;
2033 	struct list_head *t;
2034 
2035 	for (i = 0; i < info->numnote; i++)
2036 		if (!writenote(info->notes + i, cprm))
2037 			return 0;
2038 
2039 	/* write out the thread status notes section */
2040 	list_for_each(t, &info->thread_list) {
2041 		struct elf_thread_status *tmp =
2042 				list_entry(t, struct elf_thread_status, list);
2043 
2044 		for (i = 0; i < tmp->num_notes; i++)
2045 			if (!writenote(&tmp->notes[i], cprm))
2046 				return 0;
2047 	}
2048 
2049 	return 1;
2050 }
2051 
2052 static void free_note_info(struct elf_note_info *info)
2053 {
2054 	while (!list_empty(&info->thread_list)) {
2055 		struct list_head *tmp = info->thread_list.next;
2056 		list_del(tmp);
2057 		kfree(list_entry(tmp, struct elf_thread_status, list));
2058 	}
2059 
2060 	/* Free data possibly allocated by fill_files_note(): */
2061 	if (info->notes_files)
2062 		vfree(info->notes_files->data);
2063 
2064 	kfree(info->prstatus);
2065 	kfree(info->psinfo);
2066 	kfree(info->notes);
2067 	kfree(info->fpu);
2068 #ifdef ELF_CORE_COPY_XFPREGS
2069 	kfree(info->xfpu);
2070 #endif
2071 }
2072 
2073 #endif
2074 
2075 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2076 					struct vm_area_struct *gate_vma)
2077 {
2078 	struct vm_area_struct *ret = tsk->mm->mmap;
2079 
2080 	if (ret)
2081 		return ret;
2082 	return gate_vma;
2083 }
2084 /*
2085  * Helper function for iterating across a vma list.  It ensures that the caller
2086  * will visit `gate_vma' prior to terminating the search.
2087  */
2088 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2089 					struct vm_area_struct *gate_vma)
2090 {
2091 	struct vm_area_struct *ret;
2092 
2093 	ret = this_vma->vm_next;
2094 	if (ret)
2095 		return ret;
2096 	if (this_vma == gate_vma)
2097 		return NULL;
2098 	return gate_vma;
2099 }
2100 
2101 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2102 			     elf_addr_t e_shoff, int segs)
2103 {
2104 	elf->e_shoff = e_shoff;
2105 	elf->e_shentsize = sizeof(*shdr4extnum);
2106 	elf->e_shnum = 1;
2107 	elf->e_shstrndx = SHN_UNDEF;
2108 
2109 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2110 
2111 	shdr4extnum->sh_type = SHT_NULL;
2112 	shdr4extnum->sh_size = elf->e_shnum;
2113 	shdr4extnum->sh_link = elf->e_shstrndx;
2114 	shdr4extnum->sh_info = segs;
2115 }
2116 
2117 /*
2118  * Actual dumper
2119  *
2120  * This is a two-pass process; first we find the offsets of the bits,
2121  * and then they are actually written out.  If we run out of core limit
2122  * we just truncate.
2123  */
2124 static int elf_core_dump(struct coredump_params *cprm)
2125 {
2126 	int has_dumped = 0;
2127 	mm_segment_t fs;
2128 	int segs, i;
2129 	size_t vma_data_size = 0;
2130 	struct vm_area_struct *vma, *gate_vma;
2131 	struct elfhdr *elf = NULL;
2132 	loff_t offset = 0, dataoff;
2133 	struct elf_note_info info = { };
2134 	struct elf_phdr *phdr4note = NULL;
2135 	struct elf_shdr *shdr4extnum = NULL;
2136 	Elf_Half e_phnum;
2137 	elf_addr_t e_shoff;
2138 	elf_addr_t *vma_filesz = NULL;
2139 
2140 	/*
2141 	 * We no longer stop all VM operations.
2142 	 *
2143 	 * This is because those proceses that could possibly change map_count
2144 	 * or the mmap / vma pages are now blocked in do_exit on current
2145 	 * finishing this core dump.
2146 	 *
2147 	 * Only ptrace can touch these memory addresses, but it doesn't change
2148 	 * the map_count or the pages allocated. So no possibility of crashing
2149 	 * exists while dumping the mm->vm_next areas to the core file.
2150 	 */
2151 
2152 	/* alloc memory for large data structures: too large to be on stack */
2153 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2154 	if (!elf)
2155 		goto out;
2156 	/*
2157 	 * The number of segs are recored into ELF header as 16bit value.
2158 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2159 	 */
2160 	segs = current->mm->map_count;
2161 	segs += elf_core_extra_phdrs();
2162 
2163 	gate_vma = get_gate_vma(current->mm);
2164 	if (gate_vma != NULL)
2165 		segs++;
2166 
2167 	/* for notes section */
2168 	segs++;
2169 
2170 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2171 	 * this, kernel supports extended numbering. Have a look at
2172 	 * include/linux/elf.h for further information. */
2173 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2174 
2175 	/*
2176 	 * Collect all the non-memory information about the process for the
2177 	 * notes.  This also sets up the file header.
2178 	 */
2179 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2180 		goto cleanup;
2181 
2182 	has_dumped = 1;
2183 
2184 	fs = get_fs();
2185 	set_fs(KERNEL_DS);
2186 
2187 	offset += sizeof(*elf);				/* Elf header */
2188 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2189 
2190 	/* Write notes phdr entry */
2191 	{
2192 		size_t sz = get_note_info_size(&info);
2193 
2194 		sz += elf_coredump_extra_notes_size();
2195 
2196 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2197 		if (!phdr4note)
2198 			goto end_coredump;
2199 
2200 		fill_elf_note_phdr(phdr4note, sz, offset);
2201 		offset += sz;
2202 	}
2203 
2204 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2205 
2206 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2207 	if (!vma_filesz)
2208 		goto end_coredump;
2209 
2210 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2211 			vma = next_vma(vma, gate_vma)) {
2212 		unsigned long dump_size;
2213 
2214 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2215 		vma_filesz[i++] = dump_size;
2216 		vma_data_size += dump_size;
2217 	}
2218 
2219 	offset += vma_data_size;
2220 	offset += elf_core_extra_data_size();
2221 	e_shoff = offset;
2222 
2223 	if (e_phnum == PN_XNUM) {
2224 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2225 		if (!shdr4extnum)
2226 			goto end_coredump;
2227 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2228 	}
2229 
2230 	offset = dataoff;
2231 
2232 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2233 		goto end_coredump;
2234 
2235 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2236 		goto end_coredump;
2237 
2238 	/* Write program headers for segments dump */
2239 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2240 			vma = next_vma(vma, gate_vma)) {
2241 		struct elf_phdr phdr;
2242 
2243 		phdr.p_type = PT_LOAD;
2244 		phdr.p_offset = offset;
2245 		phdr.p_vaddr = vma->vm_start;
2246 		phdr.p_paddr = 0;
2247 		phdr.p_filesz = vma_filesz[i++];
2248 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2249 		offset += phdr.p_filesz;
2250 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2251 		if (vma->vm_flags & VM_WRITE)
2252 			phdr.p_flags |= PF_W;
2253 		if (vma->vm_flags & VM_EXEC)
2254 			phdr.p_flags |= PF_X;
2255 		phdr.p_align = ELF_EXEC_PAGESIZE;
2256 
2257 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2258 			goto end_coredump;
2259 	}
2260 
2261 	if (!elf_core_write_extra_phdrs(cprm, offset))
2262 		goto end_coredump;
2263 
2264  	/* write out the notes section */
2265 	if (!write_note_info(&info, cprm))
2266 		goto end_coredump;
2267 
2268 	if (elf_coredump_extra_notes_write(cprm))
2269 		goto end_coredump;
2270 
2271 	/* Align to page */
2272 	if (!dump_skip(cprm, dataoff - cprm->written))
2273 		goto end_coredump;
2274 
2275 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2276 			vma = next_vma(vma, gate_vma)) {
2277 		unsigned long addr;
2278 		unsigned long end;
2279 
2280 		end = vma->vm_start + vma_filesz[i++];
2281 
2282 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2283 			struct page *page;
2284 			int stop;
2285 
2286 			page = get_dump_page(addr);
2287 			if (page) {
2288 				void *kaddr = kmap(page);
2289 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2290 				kunmap(page);
2291 				page_cache_release(page);
2292 			} else
2293 				stop = !dump_skip(cprm, PAGE_SIZE);
2294 			if (stop)
2295 				goto end_coredump;
2296 		}
2297 	}
2298 
2299 	if (!elf_core_write_extra_data(cprm))
2300 		goto end_coredump;
2301 
2302 	if (e_phnum == PN_XNUM) {
2303 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2304 			goto end_coredump;
2305 	}
2306 
2307 end_coredump:
2308 	set_fs(fs);
2309 
2310 cleanup:
2311 	free_note_info(&info);
2312 	kfree(shdr4extnum);
2313 	kfree(vma_filesz);
2314 	kfree(phdr4note);
2315 	kfree(elf);
2316 out:
2317 	return has_dumped;
2318 }
2319 
2320 #endif		/* CONFIG_ELF_CORE */
2321 
2322 static int __init init_elf_binfmt(void)
2323 {
2324 	register_binfmt(&elf_format);
2325 	return 0;
2326 }
2327 
2328 static void __exit exit_elf_binfmt(void)
2329 {
2330 	/* Remove the COFF and ELF loaders. */
2331 	unregister_binfmt(&elf_format);
2332 }
2333 
2334 core_initcall(init_elf_binfmt);
2335 module_exit(exit_elf_binfmt);
2336 MODULE_LICENSE("GPL");
2337