xref: /openbmc/linux/fs/binfmt_elf.c (revision 174cd4b1)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/cred.h>
40 #include <linux/dax.h>
41 #include <linux/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 #ifndef user_long_t
46 #define user_long_t long
47 #endif
48 #ifndef user_siginfo_t
49 #define user_siginfo_t siginfo_t
50 #endif
51 
52 static int load_elf_binary(struct linux_binprm *bprm);
53 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
54 				int, int, unsigned long);
55 
56 #ifdef CONFIG_USELIB
57 static int load_elf_library(struct file *);
58 #else
59 #define load_elf_library NULL
60 #endif
61 
62 /*
63  * If we don't support core dumping, then supply a NULL so we
64  * don't even try.
65  */
66 #ifdef CONFIG_ELF_CORE
67 static int elf_core_dump(struct coredump_params *cprm);
68 #else
69 #define elf_core_dump	NULL
70 #endif
71 
72 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
73 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
74 #else
75 #define ELF_MIN_ALIGN	PAGE_SIZE
76 #endif
77 
78 #ifndef ELF_CORE_EFLAGS
79 #define ELF_CORE_EFLAGS	0
80 #endif
81 
82 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
83 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
84 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
85 
86 static struct linux_binfmt elf_format = {
87 	.module		= THIS_MODULE,
88 	.load_binary	= load_elf_binary,
89 	.load_shlib	= load_elf_library,
90 	.core_dump	= elf_core_dump,
91 	.min_coredump	= ELF_EXEC_PAGESIZE,
92 };
93 
94 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
95 
96 static int set_brk(unsigned long start, unsigned long end, int prot)
97 {
98 	start = ELF_PAGEALIGN(start);
99 	end = ELF_PAGEALIGN(end);
100 	if (end > start) {
101 		/*
102 		 * Map the last of the bss segment.
103 		 * If the header is requesting these pages to be
104 		 * executable, honour that (ppc32 needs this).
105 		 */
106 		int error = vm_brk_flags(start, end - start,
107 				prot & PROT_EXEC ? VM_EXEC : 0);
108 		if (error)
109 			return error;
110 	}
111 	current->mm->start_brk = current->mm->brk = end;
112 	return 0;
113 }
114 
115 /* We need to explicitly zero any fractional pages
116    after the data section (i.e. bss).  This would
117    contain the junk from the file that should not
118    be in memory
119  */
120 static int padzero(unsigned long elf_bss)
121 {
122 	unsigned long nbyte;
123 
124 	nbyte = ELF_PAGEOFFSET(elf_bss);
125 	if (nbyte) {
126 		nbyte = ELF_MIN_ALIGN - nbyte;
127 		if (clear_user((void __user *) elf_bss, nbyte))
128 			return -EFAULT;
129 	}
130 	return 0;
131 }
132 
133 /* Let's use some macros to make this stack manipulation a little clearer */
134 #ifdef CONFIG_STACK_GROWSUP
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
136 #define STACK_ROUND(sp, items) \
137 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ \
139 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
140 	old_sp; })
141 #else
142 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
143 #define STACK_ROUND(sp, items) \
144 	(((unsigned long) (sp - items)) &~ 15UL)
145 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
146 #endif
147 
148 #ifndef ELF_BASE_PLATFORM
149 /*
150  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
151  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
152  * will be copied to the user stack in the same manner as AT_PLATFORM.
153  */
154 #define ELF_BASE_PLATFORM NULL
155 #endif
156 
157 static int
158 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
159 		unsigned long load_addr, unsigned long interp_load_addr)
160 {
161 	unsigned long p = bprm->p;
162 	int argc = bprm->argc;
163 	int envc = bprm->envc;
164 	elf_addr_t __user *argv;
165 	elf_addr_t __user *envp;
166 	elf_addr_t __user *sp;
167 	elf_addr_t __user *u_platform;
168 	elf_addr_t __user *u_base_platform;
169 	elf_addr_t __user *u_rand_bytes;
170 	const char *k_platform = ELF_PLATFORM;
171 	const char *k_base_platform = ELF_BASE_PLATFORM;
172 	unsigned char k_rand_bytes[16];
173 	int items;
174 	elf_addr_t *elf_info;
175 	int ei_index = 0;
176 	const struct cred *cred = current_cred();
177 	struct vm_area_struct *vma;
178 
179 	/*
180 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
181 	 * evictions by the processes running on the same package. One
182 	 * thing we can do is to shuffle the initial stack for them.
183 	 */
184 
185 	p = arch_align_stack(p);
186 
187 	/*
188 	 * If this architecture has a platform capability string, copy it
189 	 * to userspace.  In some cases (Sparc), this info is impossible
190 	 * for userspace to get any other way, in others (i386) it is
191 	 * merely difficult.
192 	 */
193 	u_platform = NULL;
194 	if (k_platform) {
195 		size_t len = strlen(k_platform) + 1;
196 
197 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
198 		if (__copy_to_user(u_platform, k_platform, len))
199 			return -EFAULT;
200 	}
201 
202 	/*
203 	 * If this architecture has a "base" platform capability
204 	 * string, copy it to userspace.
205 	 */
206 	u_base_platform = NULL;
207 	if (k_base_platform) {
208 		size_t len = strlen(k_base_platform) + 1;
209 
210 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
211 		if (__copy_to_user(u_base_platform, k_base_platform, len))
212 			return -EFAULT;
213 	}
214 
215 	/*
216 	 * Generate 16 random bytes for userspace PRNG seeding.
217 	 */
218 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
219 	u_rand_bytes = (elf_addr_t __user *)
220 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
221 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
222 		return -EFAULT;
223 
224 	/* Create the ELF interpreter info */
225 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
226 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
227 #define NEW_AUX_ENT(id, val) \
228 	do { \
229 		elf_info[ei_index++] = id; \
230 		elf_info[ei_index++] = val; \
231 	} while (0)
232 
233 #ifdef ARCH_DLINFO
234 	/*
235 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
236 	 * AUXV.
237 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
238 	 * ARCH_DLINFO changes
239 	 */
240 	ARCH_DLINFO;
241 #endif
242 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
243 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
244 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
245 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
246 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
247 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
248 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
249 	NEW_AUX_ENT(AT_FLAGS, 0);
250 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
251 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
252 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
253 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
254 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
255  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
256 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
257 #ifdef ELF_HWCAP2
258 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
259 #endif
260 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
261 	if (k_platform) {
262 		NEW_AUX_ENT(AT_PLATFORM,
263 			    (elf_addr_t)(unsigned long)u_platform);
264 	}
265 	if (k_base_platform) {
266 		NEW_AUX_ENT(AT_BASE_PLATFORM,
267 			    (elf_addr_t)(unsigned long)u_base_platform);
268 	}
269 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
270 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
271 	}
272 #undef NEW_AUX_ENT
273 	/* AT_NULL is zero; clear the rest too */
274 	memset(&elf_info[ei_index], 0,
275 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
276 
277 	/* And advance past the AT_NULL entry.  */
278 	ei_index += 2;
279 
280 	sp = STACK_ADD(p, ei_index);
281 
282 	items = (argc + 1) + (envc + 1) + 1;
283 	bprm->p = STACK_ROUND(sp, items);
284 
285 	/* Point sp at the lowest address on the stack */
286 #ifdef CONFIG_STACK_GROWSUP
287 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
288 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
289 #else
290 	sp = (elf_addr_t __user *)bprm->p;
291 #endif
292 
293 
294 	/*
295 	 * Grow the stack manually; some architectures have a limit on how
296 	 * far ahead a user-space access may be in order to grow the stack.
297 	 */
298 	vma = find_extend_vma(current->mm, bprm->p);
299 	if (!vma)
300 		return -EFAULT;
301 
302 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
303 	if (__put_user(argc, sp++))
304 		return -EFAULT;
305 	argv = sp;
306 	envp = argv + argc + 1;
307 
308 	/* Populate argv and envp */
309 	p = current->mm->arg_end = current->mm->arg_start;
310 	while (argc-- > 0) {
311 		size_t len;
312 		if (__put_user((elf_addr_t)p, argv++))
313 			return -EFAULT;
314 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
315 		if (!len || len > MAX_ARG_STRLEN)
316 			return -EINVAL;
317 		p += len;
318 	}
319 	if (__put_user(0, argv))
320 		return -EFAULT;
321 	current->mm->arg_end = current->mm->env_start = p;
322 	while (envc-- > 0) {
323 		size_t len;
324 		if (__put_user((elf_addr_t)p, envp++))
325 			return -EFAULT;
326 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
327 		if (!len || len > MAX_ARG_STRLEN)
328 			return -EINVAL;
329 		p += len;
330 	}
331 	if (__put_user(0, envp))
332 		return -EFAULT;
333 	current->mm->env_end = p;
334 
335 	/* Put the elf_info on the stack in the right place.  */
336 	sp = (elf_addr_t __user *)envp + 1;
337 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
338 		return -EFAULT;
339 	return 0;
340 }
341 
342 #ifndef elf_map
343 
344 static unsigned long elf_map(struct file *filep, unsigned long addr,
345 		struct elf_phdr *eppnt, int prot, int type,
346 		unsigned long total_size)
347 {
348 	unsigned long map_addr;
349 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
350 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
351 	addr = ELF_PAGESTART(addr);
352 	size = ELF_PAGEALIGN(size);
353 
354 	/* mmap() will return -EINVAL if given a zero size, but a
355 	 * segment with zero filesize is perfectly valid */
356 	if (!size)
357 		return addr;
358 
359 	/*
360 	* total_size is the size of the ELF (interpreter) image.
361 	* The _first_ mmap needs to know the full size, otherwise
362 	* randomization might put this image into an overlapping
363 	* position with the ELF binary image. (since size < total_size)
364 	* So we first map the 'big' image - and unmap the remainder at
365 	* the end. (which unmap is needed for ELF images with holes.)
366 	*/
367 	if (total_size) {
368 		total_size = ELF_PAGEALIGN(total_size);
369 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
370 		if (!BAD_ADDR(map_addr))
371 			vm_munmap(map_addr+size, total_size-size);
372 	} else
373 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
374 
375 	return(map_addr);
376 }
377 
378 #endif /* !elf_map */
379 
380 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
381 {
382 	int i, first_idx = -1, last_idx = -1;
383 
384 	for (i = 0; i < nr; i++) {
385 		if (cmds[i].p_type == PT_LOAD) {
386 			last_idx = i;
387 			if (first_idx == -1)
388 				first_idx = i;
389 		}
390 	}
391 	if (first_idx == -1)
392 		return 0;
393 
394 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
395 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
396 }
397 
398 /**
399  * load_elf_phdrs() - load ELF program headers
400  * @elf_ex:   ELF header of the binary whose program headers should be loaded
401  * @elf_file: the opened ELF binary file
402  *
403  * Loads ELF program headers from the binary file elf_file, which has the ELF
404  * header pointed to by elf_ex, into a newly allocated array. The caller is
405  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
406  */
407 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
408 				       struct file *elf_file)
409 {
410 	struct elf_phdr *elf_phdata = NULL;
411 	int retval, size, err = -1;
412 
413 	/*
414 	 * If the size of this structure has changed, then punt, since
415 	 * we will be doing the wrong thing.
416 	 */
417 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
418 		goto out;
419 
420 	/* Sanity check the number of program headers... */
421 	if (elf_ex->e_phnum < 1 ||
422 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
423 		goto out;
424 
425 	/* ...and their total size. */
426 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
427 	if (size > ELF_MIN_ALIGN)
428 		goto out;
429 
430 	elf_phdata = kmalloc(size, GFP_KERNEL);
431 	if (!elf_phdata)
432 		goto out;
433 
434 	/* Read in the program headers */
435 	retval = kernel_read(elf_file, elf_ex->e_phoff,
436 			     (char *)elf_phdata, size);
437 	if (retval != size) {
438 		err = (retval < 0) ? retval : -EIO;
439 		goto out;
440 	}
441 
442 	/* Success! */
443 	err = 0;
444 out:
445 	if (err) {
446 		kfree(elf_phdata);
447 		elf_phdata = NULL;
448 	}
449 	return elf_phdata;
450 }
451 
452 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
453 
454 /**
455  * struct arch_elf_state - arch-specific ELF loading state
456  *
457  * This structure is used to preserve architecture specific data during
458  * the loading of an ELF file, throughout the checking of architecture
459  * specific ELF headers & through to the point where the ELF load is
460  * known to be proceeding (ie. SET_PERSONALITY).
461  *
462  * This implementation is a dummy for architectures which require no
463  * specific state.
464  */
465 struct arch_elf_state {
466 };
467 
468 #define INIT_ARCH_ELF_STATE {}
469 
470 /**
471  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
472  * @ehdr:	The main ELF header
473  * @phdr:	The program header to check
474  * @elf:	The open ELF file
475  * @is_interp:	True if the phdr is from the interpreter of the ELF being
476  *		loaded, else false.
477  * @state:	Architecture-specific state preserved throughout the process
478  *		of loading the ELF.
479  *
480  * Inspects the program header phdr to validate its correctness and/or
481  * suitability for the system. Called once per ELF program header in the
482  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
483  * interpreter.
484  *
485  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
486  *         with that return code.
487  */
488 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
489 				   struct elf_phdr *phdr,
490 				   struct file *elf, bool is_interp,
491 				   struct arch_elf_state *state)
492 {
493 	/* Dummy implementation, always proceed */
494 	return 0;
495 }
496 
497 /**
498  * arch_check_elf() - check an ELF executable
499  * @ehdr:	The main ELF header
500  * @has_interp:	True if the ELF has an interpreter, else false.
501  * @interp_ehdr: The interpreter's ELF header
502  * @state:	Architecture-specific state preserved throughout the process
503  *		of loading the ELF.
504  *
505  * Provides a final opportunity for architecture code to reject the loading
506  * of the ELF & cause an exec syscall to return an error. This is called after
507  * all program headers to be checked by arch_elf_pt_proc have been.
508  *
509  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
510  *         with that return code.
511  */
512 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
513 				 struct elfhdr *interp_ehdr,
514 				 struct arch_elf_state *state)
515 {
516 	/* Dummy implementation, always proceed */
517 	return 0;
518 }
519 
520 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
521 
522 /* This is much more generalized than the library routine read function,
523    so we keep this separate.  Technically the library read function
524    is only provided so that we can read a.out libraries that have
525    an ELF header */
526 
527 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
528 		struct file *interpreter, unsigned long *interp_map_addr,
529 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
530 {
531 	struct elf_phdr *eppnt;
532 	unsigned long load_addr = 0;
533 	int load_addr_set = 0;
534 	unsigned long last_bss = 0, elf_bss = 0;
535 	int bss_prot = 0;
536 	unsigned long error = ~0UL;
537 	unsigned long total_size;
538 	int i;
539 
540 	/* First of all, some simple consistency checks */
541 	if (interp_elf_ex->e_type != ET_EXEC &&
542 	    interp_elf_ex->e_type != ET_DYN)
543 		goto out;
544 	if (!elf_check_arch(interp_elf_ex))
545 		goto out;
546 	if (!interpreter->f_op->mmap)
547 		goto out;
548 
549 	total_size = total_mapping_size(interp_elf_phdata,
550 					interp_elf_ex->e_phnum);
551 	if (!total_size) {
552 		error = -EINVAL;
553 		goto out;
554 	}
555 
556 	eppnt = interp_elf_phdata;
557 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
558 		if (eppnt->p_type == PT_LOAD) {
559 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
560 			int elf_prot = 0;
561 			unsigned long vaddr = 0;
562 			unsigned long k, map_addr;
563 
564 			if (eppnt->p_flags & PF_R)
565 		    		elf_prot = PROT_READ;
566 			if (eppnt->p_flags & PF_W)
567 				elf_prot |= PROT_WRITE;
568 			if (eppnt->p_flags & PF_X)
569 				elf_prot |= PROT_EXEC;
570 			vaddr = eppnt->p_vaddr;
571 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
572 				elf_type |= MAP_FIXED;
573 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
574 				load_addr = -vaddr;
575 
576 			map_addr = elf_map(interpreter, load_addr + vaddr,
577 					eppnt, elf_prot, elf_type, total_size);
578 			total_size = 0;
579 			if (!*interp_map_addr)
580 				*interp_map_addr = map_addr;
581 			error = map_addr;
582 			if (BAD_ADDR(map_addr))
583 				goto out;
584 
585 			if (!load_addr_set &&
586 			    interp_elf_ex->e_type == ET_DYN) {
587 				load_addr = map_addr - ELF_PAGESTART(vaddr);
588 				load_addr_set = 1;
589 			}
590 
591 			/*
592 			 * Check to see if the section's size will overflow the
593 			 * allowed task size. Note that p_filesz must always be
594 			 * <= p_memsize so it's only necessary to check p_memsz.
595 			 */
596 			k = load_addr + eppnt->p_vaddr;
597 			if (BAD_ADDR(k) ||
598 			    eppnt->p_filesz > eppnt->p_memsz ||
599 			    eppnt->p_memsz > TASK_SIZE ||
600 			    TASK_SIZE - eppnt->p_memsz < k) {
601 				error = -ENOMEM;
602 				goto out;
603 			}
604 
605 			/*
606 			 * Find the end of the file mapping for this phdr, and
607 			 * keep track of the largest address we see for this.
608 			 */
609 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
610 			if (k > elf_bss)
611 				elf_bss = k;
612 
613 			/*
614 			 * Do the same thing for the memory mapping - between
615 			 * elf_bss and last_bss is the bss section.
616 			 */
617 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
618 			if (k > last_bss) {
619 				last_bss = k;
620 				bss_prot = elf_prot;
621 			}
622 		}
623 	}
624 
625 	/*
626 	 * Now fill out the bss section: first pad the last page from
627 	 * the file up to the page boundary, and zero it from elf_bss
628 	 * up to the end of the page.
629 	 */
630 	if (padzero(elf_bss)) {
631 		error = -EFAULT;
632 		goto out;
633 	}
634 	/*
635 	 * Next, align both the file and mem bss up to the page size,
636 	 * since this is where elf_bss was just zeroed up to, and where
637 	 * last_bss will end after the vm_brk_flags() below.
638 	 */
639 	elf_bss = ELF_PAGEALIGN(elf_bss);
640 	last_bss = ELF_PAGEALIGN(last_bss);
641 	/* Finally, if there is still more bss to allocate, do it. */
642 	if (last_bss > elf_bss) {
643 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
644 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
645 		if (error)
646 			goto out;
647 	}
648 
649 	error = load_addr;
650 out:
651 	return error;
652 }
653 
654 /*
655  * These are the functions used to load ELF style executables and shared
656  * libraries.  There is no binary dependent code anywhere else.
657  */
658 
659 #ifndef STACK_RND_MASK
660 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
661 #endif
662 
663 static unsigned long randomize_stack_top(unsigned long stack_top)
664 {
665 	unsigned long random_variable = 0;
666 
667 	if ((current->flags & PF_RANDOMIZE) &&
668 		!(current->personality & ADDR_NO_RANDOMIZE)) {
669 		random_variable = get_random_long();
670 		random_variable &= STACK_RND_MASK;
671 		random_variable <<= PAGE_SHIFT;
672 	}
673 #ifdef CONFIG_STACK_GROWSUP
674 	return PAGE_ALIGN(stack_top) + random_variable;
675 #else
676 	return PAGE_ALIGN(stack_top) - random_variable;
677 #endif
678 }
679 
680 static int load_elf_binary(struct linux_binprm *bprm)
681 {
682 	struct file *interpreter = NULL; /* to shut gcc up */
683  	unsigned long load_addr = 0, load_bias = 0;
684 	int load_addr_set = 0;
685 	char * elf_interpreter = NULL;
686 	unsigned long error;
687 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
688 	unsigned long elf_bss, elf_brk;
689 	int bss_prot = 0;
690 	int retval, i;
691 	unsigned long elf_entry;
692 	unsigned long interp_load_addr = 0;
693 	unsigned long start_code, end_code, start_data, end_data;
694 	unsigned long reloc_func_desc __maybe_unused = 0;
695 	int executable_stack = EXSTACK_DEFAULT;
696 	struct pt_regs *regs = current_pt_regs();
697 	struct {
698 		struct elfhdr elf_ex;
699 		struct elfhdr interp_elf_ex;
700 	} *loc;
701 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
702 
703 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
704 	if (!loc) {
705 		retval = -ENOMEM;
706 		goto out_ret;
707 	}
708 
709 	/* Get the exec-header */
710 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
711 
712 	retval = -ENOEXEC;
713 	/* First of all, some simple consistency checks */
714 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 		goto out;
716 
717 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
718 		goto out;
719 	if (!elf_check_arch(&loc->elf_ex))
720 		goto out;
721 	if (!bprm->file->f_op->mmap)
722 		goto out;
723 
724 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
725 	if (!elf_phdata)
726 		goto out;
727 
728 	elf_ppnt = elf_phdata;
729 	elf_bss = 0;
730 	elf_brk = 0;
731 
732 	start_code = ~0UL;
733 	end_code = 0;
734 	start_data = 0;
735 	end_data = 0;
736 
737 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
738 		if (elf_ppnt->p_type == PT_INTERP) {
739 			/* This is the program interpreter used for
740 			 * shared libraries - for now assume that this
741 			 * is an a.out format binary
742 			 */
743 			retval = -ENOEXEC;
744 			if (elf_ppnt->p_filesz > PATH_MAX ||
745 			    elf_ppnt->p_filesz < 2)
746 				goto out_free_ph;
747 
748 			retval = -ENOMEM;
749 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
750 						  GFP_KERNEL);
751 			if (!elf_interpreter)
752 				goto out_free_ph;
753 
754 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
755 					     elf_interpreter,
756 					     elf_ppnt->p_filesz);
757 			if (retval != elf_ppnt->p_filesz) {
758 				if (retval >= 0)
759 					retval = -EIO;
760 				goto out_free_interp;
761 			}
762 			/* make sure path is NULL terminated */
763 			retval = -ENOEXEC;
764 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
765 				goto out_free_interp;
766 
767 			interpreter = open_exec(elf_interpreter);
768 			retval = PTR_ERR(interpreter);
769 			if (IS_ERR(interpreter))
770 				goto out_free_interp;
771 
772 			/*
773 			 * If the binary is not readable then enforce
774 			 * mm->dumpable = 0 regardless of the interpreter's
775 			 * permissions.
776 			 */
777 			would_dump(bprm, interpreter);
778 
779 			/* Get the exec headers */
780 			retval = kernel_read(interpreter, 0,
781 					     (void *)&loc->interp_elf_ex,
782 					     sizeof(loc->interp_elf_ex));
783 			if (retval != sizeof(loc->interp_elf_ex)) {
784 				if (retval >= 0)
785 					retval = -EIO;
786 				goto out_free_dentry;
787 			}
788 
789 			break;
790 		}
791 		elf_ppnt++;
792 	}
793 
794 	elf_ppnt = elf_phdata;
795 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
796 		switch (elf_ppnt->p_type) {
797 		case PT_GNU_STACK:
798 			if (elf_ppnt->p_flags & PF_X)
799 				executable_stack = EXSTACK_ENABLE_X;
800 			else
801 				executable_stack = EXSTACK_DISABLE_X;
802 			break;
803 
804 		case PT_LOPROC ... PT_HIPROC:
805 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
806 						  bprm->file, false,
807 						  &arch_state);
808 			if (retval)
809 				goto out_free_dentry;
810 			break;
811 		}
812 
813 	/* Some simple consistency checks for the interpreter */
814 	if (elf_interpreter) {
815 		retval = -ELIBBAD;
816 		/* Not an ELF interpreter */
817 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
818 			goto out_free_dentry;
819 		/* Verify the interpreter has a valid arch */
820 		if (!elf_check_arch(&loc->interp_elf_ex))
821 			goto out_free_dentry;
822 
823 		/* Load the interpreter program headers */
824 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
825 						   interpreter);
826 		if (!interp_elf_phdata)
827 			goto out_free_dentry;
828 
829 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
830 		elf_ppnt = interp_elf_phdata;
831 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
832 			switch (elf_ppnt->p_type) {
833 			case PT_LOPROC ... PT_HIPROC:
834 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
835 							  elf_ppnt, interpreter,
836 							  true, &arch_state);
837 				if (retval)
838 					goto out_free_dentry;
839 				break;
840 			}
841 	}
842 
843 	/*
844 	 * Allow arch code to reject the ELF at this point, whilst it's
845 	 * still possible to return an error to the code that invoked
846 	 * the exec syscall.
847 	 */
848 	retval = arch_check_elf(&loc->elf_ex,
849 				!!interpreter, &loc->interp_elf_ex,
850 				&arch_state);
851 	if (retval)
852 		goto out_free_dentry;
853 
854 	/* Flush all traces of the currently running executable */
855 	retval = flush_old_exec(bprm);
856 	if (retval)
857 		goto out_free_dentry;
858 
859 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
860 	   may depend on the personality.  */
861 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
862 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
863 		current->personality |= READ_IMPLIES_EXEC;
864 
865 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
866 		current->flags |= PF_RANDOMIZE;
867 
868 	setup_new_exec(bprm);
869 	install_exec_creds(bprm);
870 
871 	/* Do this so that we can load the interpreter, if need be.  We will
872 	   change some of these later */
873 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
874 				 executable_stack);
875 	if (retval < 0)
876 		goto out_free_dentry;
877 
878 	current->mm->start_stack = bprm->p;
879 
880 	/* Now we do a little grungy work by mmapping the ELF image into
881 	   the correct location in memory. */
882 	for(i = 0, elf_ppnt = elf_phdata;
883 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
884 		int elf_prot = 0, elf_flags;
885 		unsigned long k, vaddr;
886 		unsigned long total_size = 0;
887 
888 		if (elf_ppnt->p_type != PT_LOAD)
889 			continue;
890 
891 		if (unlikely (elf_brk > elf_bss)) {
892 			unsigned long nbyte;
893 
894 			/* There was a PT_LOAD segment with p_memsz > p_filesz
895 			   before this one. Map anonymous pages, if needed,
896 			   and clear the area.  */
897 			retval = set_brk(elf_bss + load_bias,
898 					 elf_brk + load_bias,
899 					 bss_prot);
900 			if (retval)
901 				goto out_free_dentry;
902 			nbyte = ELF_PAGEOFFSET(elf_bss);
903 			if (nbyte) {
904 				nbyte = ELF_MIN_ALIGN - nbyte;
905 				if (nbyte > elf_brk - elf_bss)
906 					nbyte = elf_brk - elf_bss;
907 				if (clear_user((void __user *)elf_bss +
908 							load_bias, nbyte)) {
909 					/*
910 					 * This bss-zeroing can fail if the ELF
911 					 * file specifies odd protections. So
912 					 * we don't check the return value
913 					 */
914 				}
915 			}
916 		}
917 
918 		if (elf_ppnt->p_flags & PF_R)
919 			elf_prot |= PROT_READ;
920 		if (elf_ppnt->p_flags & PF_W)
921 			elf_prot |= PROT_WRITE;
922 		if (elf_ppnt->p_flags & PF_X)
923 			elf_prot |= PROT_EXEC;
924 
925 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
926 
927 		vaddr = elf_ppnt->p_vaddr;
928 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
929 			elf_flags |= MAP_FIXED;
930 		} else if (loc->elf_ex.e_type == ET_DYN) {
931 			/* Try and get dynamic programs out of the way of the
932 			 * default mmap base, as well as whatever program they
933 			 * might try to exec.  This is because the brk will
934 			 * follow the loader, and is not movable.  */
935 			load_bias = ELF_ET_DYN_BASE - vaddr;
936 			if (current->flags & PF_RANDOMIZE)
937 				load_bias += arch_mmap_rnd();
938 			load_bias = ELF_PAGESTART(load_bias);
939 			total_size = total_mapping_size(elf_phdata,
940 							loc->elf_ex.e_phnum);
941 			if (!total_size) {
942 				retval = -EINVAL;
943 				goto out_free_dentry;
944 			}
945 		}
946 
947 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
948 				elf_prot, elf_flags, total_size);
949 		if (BAD_ADDR(error)) {
950 			retval = IS_ERR((void *)error) ?
951 				PTR_ERR((void*)error) : -EINVAL;
952 			goto out_free_dentry;
953 		}
954 
955 		if (!load_addr_set) {
956 			load_addr_set = 1;
957 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
958 			if (loc->elf_ex.e_type == ET_DYN) {
959 				load_bias += error -
960 				             ELF_PAGESTART(load_bias + vaddr);
961 				load_addr += load_bias;
962 				reloc_func_desc = load_bias;
963 			}
964 		}
965 		k = elf_ppnt->p_vaddr;
966 		if (k < start_code)
967 			start_code = k;
968 		if (start_data < k)
969 			start_data = k;
970 
971 		/*
972 		 * Check to see if the section's size will overflow the
973 		 * allowed task size. Note that p_filesz must always be
974 		 * <= p_memsz so it is only necessary to check p_memsz.
975 		 */
976 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
977 		    elf_ppnt->p_memsz > TASK_SIZE ||
978 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
979 			/* set_brk can never work. Avoid overflows. */
980 			retval = -EINVAL;
981 			goto out_free_dentry;
982 		}
983 
984 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
985 
986 		if (k > elf_bss)
987 			elf_bss = k;
988 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
989 			end_code = k;
990 		if (end_data < k)
991 			end_data = k;
992 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
993 		if (k > elf_brk) {
994 			bss_prot = elf_prot;
995 			elf_brk = k;
996 		}
997 	}
998 
999 	loc->elf_ex.e_entry += load_bias;
1000 	elf_bss += load_bias;
1001 	elf_brk += load_bias;
1002 	start_code += load_bias;
1003 	end_code += load_bias;
1004 	start_data += load_bias;
1005 	end_data += load_bias;
1006 
1007 	/* Calling set_brk effectively mmaps the pages that we need
1008 	 * for the bss and break sections.  We must do this before
1009 	 * mapping in the interpreter, to make sure it doesn't wind
1010 	 * up getting placed where the bss needs to go.
1011 	 */
1012 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1013 	if (retval)
1014 		goto out_free_dentry;
1015 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1016 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1017 		goto out_free_dentry;
1018 	}
1019 
1020 	if (elf_interpreter) {
1021 		unsigned long interp_map_addr = 0;
1022 
1023 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1024 					    interpreter,
1025 					    &interp_map_addr,
1026 					    load_bias, interp_elf_phdata);
1027 		if (!IS_ERR((void *)elf_entry)) {
1028 			/*
1029 			 * load_elf_interp() returns relocation
1030 			 * adjustment
1031 			 */
1032 			interp_load_addr = elf_entry;
1033 			elf_entry += loc->interp_elf_ex.e_entry;
1034 		}
1035 		if (BAD_ADDR(elf_entry)) {
1036 			retval = IS_ERR((void *)elf_entry) ?
1037 					(int)elf_entry : -EINVAL;
1038 			goto out_free_dentry;
1039 		}
1040 		reloc_func_desc = interp_load_addr;
1041 
1042 		allow_write_access(interpreter);
1043 		fput(interpreter);
1044 		kfree(elf_interpreter);
1045 	} else {
1046 		elf_entry = loc->elf_ex.e_entry;
1047 		if (BAD_ADDR(elf_entry)) {
1048 			retval = -EINVAL;
1049 			goto out_free_dentry;
1050 		}
1051 	}
1052 
1053 	kfree(interp_elf_phdata);
1054 	kfree(elf_phdata);
1055 
1056 	set_binfmt(&elf_format);
1057 
1058 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1059 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1060 	if (retval < 0)
1061 		goto out;
1062 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1063 
1064 	retval = create_elf_tables(bprm, &loc->elf_ex,
1065 			  load_addr, interp_load_addr);
1066 	if (retval < 0)
1067 		goto out;
1068 	/* N.B. passed_fileno might not be initialized? */
1069 	current->mm->end_code = end_code;
1070 	current->mm->start_code = start_code;
1071 	current->mm->start_data = start_data;
1072 	current->mm->end_data = end_data;
1073 	current->mm->start_stack = bprm->p;
1074 
1075 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1076 		current->mm->brk = current->mm->start_brk =
1077 			arch_randomize_brk(current->mm);
1078 #ifdef compat_brk_randomized
1079 		current->brk_randomized = 1;
1080 #endif
1081 	}
1082 
1083 	if (current->personality & MMAP_PAGE_ZERO) {
1084 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1085 		   and some applications "depend" upon this behavior.
1086 		   Since we do not have the power to recompile these, we
1087 		   emulate the SVr4 behavior. Sigh. */
1088 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1089 				MAP_FIXED | MAP_PRIVATE, 0);
1090 	}
1091 
1092 #ifdef ELF_PLAT_INIT
1093 	/*
1094 	 * The ABI may specify that certain registers be set up in special
1095 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1096 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1097 	 * that the e_entry field is the address of the function descriptor
1098 	 * for the startup routine, rather than the address of the startup
1099 	 * routine itself.  This macro performs whatever initialization to
1100 	 * the regs structure is required as well as any relocations to the
1101 	 * function descriptor entries when executing dynamically links apps.
1102 	 */
1103 	ELF_PLAT_INIT(regs, reloc_func_desc);
1104 #endif
1105 
1106 	start_thread(regs, elf_entry, bprm->p);
1107 	retval = 0;
1108 out:
1109 	kfree(loc);
1110 out_ret:
1111 	return retval;
1112 
1113 	/* error cleanup */
1114 out_free_dentry:
1115 	kfree(interp_elf_phdata);
1116 	allow_write_access(interpreter);
1117 	if (interpreter)
1118 		fput(interpreter);
1119 out_free_interp:
1120 	kfree(elf_interpreter);
1121 out_free_ph:
1122 	kfree(elf_phdata);
1123 	goto out;
1124 }
1125 
1126 #ifdef CONFIG_USELIB
1127 /* This is really simpleminded and specialized - we are loading an
1128    a.out library that is given an ELF header. */
1129 static int load_elf_library(struct file *file)
1130 {
1131 	struct elf_phdr *elf_phdata;
1132 	struct elf_phdr *eppnt;
1133 	unsigned long elf_bss, bss, len;
1134 	int retval, error, i, j;
1135 	struct elfhdr elf_ex;
1136 
1137 	error = -ENOEXEC;
1138 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1139 	if (retval != sizeof(elf_ex))
1140 		goto out;
1141 
1142 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1143 		goto out;
1144 
1145 	/* First of all, some simple consistency checks */
1146 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1147 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1148 		goto out;
1149 
1150 	/* Now read in all of the header information */
1151 
1152 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1153 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1154 
1155 	error = -ENOMEM;
1156 	elf_phdata = kmalloc(j, GFP_KERNEL);
1157 	if (!elf_phdata)
1158 		goto out;
1159 
1160 	eppnt = elf_phdata;
1161 	error = -ENOEXEC;
1162 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1163 	if (retval != j)
1164 		goto out_free_ph;
1165 
1166 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1167 		if ((eppnt + i)->p_type == PT_LOAD)
1168 			j++;
1169 	if (j != 1)
1170 		goto out_free_ph;
1171 
1172 	while (eppnt->p_type != PT_LOAD)
1173 		eppnt++;
1174 
1175 	/* Now use mmap to map the library into memory. */
1176 	error = vm_mmap(file,
1177 			ELF_PAGESTART(eppnt->p_vaddr),
1178 			(eppnt->p_filesz +
1179 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1180 			PROT_READ | PROT_WRITE | PROT_EXEC,
1181 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1182 			(eppnt->p_offset -
1183 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1184 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1185 		goto out_free_ph;
1186 
1187 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1188 	if (padzero(elf_bss)) {
1189 		error = -EFAULT;
1190 		goto out_free_ph;
1191 	}
1192 
1193 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1194 			    ELF_MIN_ALIGN - 1);
1195 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1196 	if (bss > len) {
1197 		error = vm_brk(len, bss - len);
1198 		if (error)
1199 			goto out_free_ph;
1200 	}
1201 	error = 0;
1202 
1203 out_free_ph:
1204 	kfree(elf_phdata);
1205 out:
1206 	return error;
1207 }
1208 #endif /* #ifdef CONFIG_USELIB */
1209 
1210 #ifdef CONFIG_ELF_CORE
1211 /*
1212  * ELF core dumper
1213  *
1214  * Modelled on fs/exec.c:aout_core_dump()
1215  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1216  */
1217 
1218 /*
1219  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1220  * that are useful for post-mortem analysis are included in every core dump.
1221  * In that way we ensure that the core dump is fully interpretable later
1222  * without matching up the same kernel and hardware config to see what PC values
1223  * meant. These special mappings include - vDSO, vsyscall, and other
1224  * architecture specific mappings
1225  */
1226 static bool always_dump_vma(struct vm_area_struct *vma)
1227 {
1228 	/* Any vsyscall mappings? */
1229 	if (vma == get_gate_vma(vma->vm_mm))
1230 		return true;
1231 
1232 	/*
1233 	 * Assume that all vmas with a .name op should always be dumped.
1234 	 * If this changes, a new vm_ops field can easily be added.
1235 	 */
1236 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1237 		return true;
1238 
1239 	/*
1240 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1241 	 * such as vDSO sections.
1242 	 */
1243 	if (arch_vma_name(vma))
1244 		return true;
1245 
1246 	return false;
1247 }
1248 
1249 /*
1250  * Decide what to dump of a segment, part, all or none.
1251  */
1252 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1253 				   unsigned long mm_flags)
1254 {
1255 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1256 
1257 	/* always dump the vdso and vsyscall sections */
1258 	if (always_dump_vma(vma))
1259 		goto whole;
1260 
1261 	if (vma->vm_flags & VM_DONTDUMP)
1262 		return 0;
1263 
1264 	/* support for DAX */
1265 	if (vma_is_dax(vma)) {
1266 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1267 			goto whole;
1268 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1269 			goto whole;
1270 		return 0;
1271 	}
1272 
1273 	/* Hugetlb memory check */
1274 	if (vma->vm_flags & VM_HUGETLB) {
1275 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1276 			goto whole;
1277 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1278 			goto whole;
1279 		return 0;
1280 	}
1281 
1282 	/* Do not dump I/O mapped devices or special mappings */
1283 	if (vma->vm_flags & VM_IO)
1284 		return 0;
1285 
1286 	/* By default, dump shared memory if mapped from an anonymous file. */
1287 	if (vma->vm_flags & VM_SHARED) {
1288 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1289 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1290 			goto whole;
1291 		return 0;
1292 	}
1293 
1294 	/* Dump segments that have been written to.  */
1295 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1296 		goto whole;
1297 	if (vma->vm_file == NULL)
1298 		return 0;
1299 
1300 	if (FILTER(MAPPED_PRIVATE))
1301 		goto whole;
1302 
1303 	/*
1304 	 * If this looks like the beginning of a DSO or executable mapping,
1305 	 * check for an ELF header.  If we find one, dump the first page to
1306 	 * aid in determining what was mapped here.
1307 	 */
1308 	if (FILTER(ELF_HEADERS) &&
1309 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1310 		u32 __user *header = (u32 __user *) vma->vm_start;
1311 		u32 word;
1312 		mm_segment_t fs = get_fs();
1313 		/*
1314 		 * Doing it this way gets the constant folded by GCC.
1315 		 */
1316 		union {
1317 			u32 cmp;
1318 			char elfmag[SELFMAG];
1319 		} magic;
1320 		BUILD_BUG_ON(SELFMAG != sizeof word);
1321 		magic.elfmag[EI_MAG0] = ELFMAG0;
1322 		magic.elfmag[EI_MAG1] = ELFMAG1;
1323 		magic.elfmag[EI_MAG2] = ELFMAG2;
1324 		magic.elfmag[EI_MAG3] = ELFMAG3;
1325 		/*
1326 		 * Switch to the user "segment" for get_user(),
1327 		 * then put back what elf_core_dump() had in place.
1328 		 */
1329 		set_fs(USER_DS);
1330 		if (unlikely(get_user(word, header)))
1331 			word = 0;
1332 		set_fs(fs);
1333 		if (word == magic.cmp)
1334 			return PAGE_SIZE;
1335 	}
1336 
1337 #undef	FILTER
1338 
1339 	return 0;
1340 
1341 whole:
1342 	return vma->vm_end - vma->vm_start;
1343 }
1344 
1345 /* An ELF note in memory */
1346 struct memelfnote
1347 {
1348 	const char *name;
1349 	int type;
1350 	unsigned int datasz;
1351 	void *data;
1352 };
1353 
1354 static int notesize(struct memelfnote *en)
1355 {
1356 	int sz;
1357 
1358 	sz = sizeof(struct elf_note);
1359 	sz += roundup(strlen(en->name) + 1, 4);
1360 	sz += roundup(en->datasz, 4);
1361 
1362 	return sz;
1363 }
1364 
1365 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1366 {
1367 	struct elf_note en;
1368 	en.n_namesz = strlen(men->name) + 1;
1369 	en.n_descsz = men->datasz;
1370 	en.n_type = men->type;
1371 
1372 	return dump_emit(cprm, &en, sizeof(en)) &&
1373 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1374 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1375 }
1376 
1377 static void fill_elf_header(struct elfhdr *elf, int segs,
1378 			    u16 machine, u32 flags)
1379 {
1380 	memset(elf, 0, sizeof(*elf));
1381 
1382 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1383 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1384 	elf->e_ident[EI_DATA] = ELF_DATA;
1385 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1386 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1387 
1388 	elf->e_type = ET_CORE;
1389 	elf->e_machine = machine;
1390 	elf->e_version = EV_CURRENT;
1391 	elf->e_phoff = sizeof(struct elfhdr);
1392 	elf->e_flags = flags;
1393 	elf->e_ehsize = sizeof(struct elfhdr);
1394 	elf->e_phentsize = sizeof(struct elf_phdr);
1395 	elf->e_phnum = segs;
1396 
1397 	return;
1398 }
1399 
1400 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1401 {
1402 	phdr->p_type = PT_NOTE;
1403 	phdr->p_offset = offset;
1404 	phdr->p_vaddr = 0;
1405 	phdr->p_paddr = 0;
1406 	phdr->p_filesz = sz;
1407 	phdr->p_memsz = 0;
1408 	phdr->p_flags = 0;
1409 	phdr->p_align = 0;
1410 	return;
1411 }
1412 
1413 static void fill_note(struct memelfnote *note, const char *name, int type,
1414 		unsigned int sz, void *data)
1415 {
1416 	note->name = name;
1417 	note->type = type;
1418 	note->datasz = sz;
1419 	note->data = data;
1420 	return;
1421 }
1422 
1423 /*
1424  * fill up all the fields in prstatus from the given task struct, except
1425  * registers which need to be filled up separately.
1426  */
1427 static void fill_prstatus(struct elf_prstatus *prstatus,
1428 		struct task_struct *p, long signr)
1429 {
1430 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1431 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1432 	prstatus->pr_sighold = p->blocked.sig[0];
1433 	rcu_read_lock();
1434 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1435 	rcu_read_unlock();
1436 	prstatus->pr_pid = task_pid_vnr(p);
1437 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1438 	prstatus->pr_sid = task_session_vnr(p);
1439 	if (thread_group_leader(p)) {
1440 		struct task_cputime cputime;
1441 
1442 		/*
1443 		 * This is the record for the group leader.  It shows the
1444 		 * group-wide total, not its individual thread total.
1445 		 */
1446 		thread_group_cputime(p, &cputime);
1447 		prstatus->pr_utime = ns_to_timeval(cputime.utime);
1448 		prstatus->pr_stime = ns_to_timeval(cputime.stime);
1449 	} else {
1450 		u64 utime, stime;
1451 
1452 		task_cputime(p, &utime, &stime);
1453 		prstatus->pr_utime = ns_to_timeval(utime);
1454 		prstatus->pr_stime = ns_to_timeval(stime);
1455 	}
1456 
1457 	prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1458 	prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1459 }
1460 
1461 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1462 		       struct mm_struct *mm)
1463 {
1464 	const struct cred *cred;
1465 	unsigned int i, len;
1466 
1467 	/* first copy the parameters from user space */
1468 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1469 
1470 	len = mm->arg_end - mm->arg_start;
1471 	if (len >= ELF_PRARGSZ)
1472 		len = ELF_PRARGSZ-1;
1473 	if (copy_from_user(&psinfo->pr_psargs,
1474 		           (const char __user *)mm->arg_start, len))
1475 		return -EFAULT;
1476 	for(i = 0; i < len; i++)
1477 		if (psinfo->pr_psargs[i] == 0)
1478 			psinfo->pr_psargs[i] = ' ';
1479 	psinfo->pr_psargs[len] = 0;
1480 
1481 	rcu_read_lock();
1482 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1483 	rcu_read_unlock();
1484 	psinfo->pr_pid = task_pid_vnr(p);
1485 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1486 	psinfo->pr_sid = task_session_vnr(p);
1487 
1488 	i = p->state ? ffz(~p->state) + 1 : 0;
1489 	psinfo->pr_state = i;
1490 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1491 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1492 	psinfo->pr_nice = task_nice(p);
1493 	psinfo->pr_flag = p->flags;
1494 	rcu_read_lock();
1495 	cred = __task_cred(p);
1496 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1497 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1498 	rcu_read_unlock();
1499 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1500 
1501 	return 0;
1502 }
1503 
1504 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1505 {
1506 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1507 	int i = 0;
1508 	do
1509 		i += 2;
1510 	while (auxv[i - 2] != AT_NULL);
1511 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1512 }
1513 
1514 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1515 		const siginfo_t *siginfo)
1516 {
1517 	mm_segment_t old_fs = get_fs();
1518 	set_fs(KERNEL_DS);
1519 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1520 	set_fs(old_fs);
1521 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1522 }
1523 
1524 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1525 /*
1526  * Format of NT_FILE note:
1527  *
1528  * long count     -- how many files are mapped
1529  * long page_size -- units for file_ofs
1530  * array of [COUNT] elements of
1531  *   long start
1532  *   long end
1533  *   long file_ofs
1534  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1535  */
1536 static int fill_files_note(struct memelfnote *note)
1537 {
1538 	struct vm_area_struct *vma;
1539 	unsigned count, size, names_ofs, remaining, n;
1540 	user_long_t *data;
1541 	user_long_t *start_end_ofs;
1542 	char *name_base, *name_curpos;
1543 
1544 	/* *Estimated* file count and total data size needed */
1545 	count = current->mm->map_count;
1546 	size = count * 64;
1547 
1548 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1549  alloc:
1550 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1551 		return -EINVAL;
1552 	size = round_up(size, PAGE_SIZE);
1553 	data = vmalloc(size);
1554 	if (!data)
1555 		return -ENOMEM;
1556 
1557 	start_end_ofs = data + 2;
1558 	name_base = name_curpos = ((char *)data) + names_ofs;
1559 	remaining = size - names_ofs;
1560 	count = 0;
1561 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1562 		struct file *file;
1563 		const char *filename;
1564 
1565 		file = vma->vm_file;
1566 		if (!file)
1567 			continue;
1568 		filename = file_path(file, name_curpos, remaining);
1569 		if (IS_ERR(filename)) {
1570 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1571 				vfree(data);
1572 				size = size * 5 / 4;
1573 				goto alloc;
1574 			}
1575 			continue;
1576 		}
1577 
1578 		/* file_path() fills at the end, move name down */
1579 		/* n = strlen(filename) + 1: */
1580 		n = (name_curpos + remaining) - filename;
1581 		remaining = filename - name_curpos;
1582 		memmove(name_curpos, filename, n);
1583 		name_curpos += n;
1584 
1585 		*start_end_ofs++ = vma->vm_start;
1586 		*start_end_ofs++ = vma->vm_end;
1587 		*start_end_ofs++ = vma->vm_pgoff;
1588 		count++;
1589 	}
1590 
1591 	/* Now we know exact count of files, can store it */
1592 	data[0] = count;
1593 	data[1] = PAGE_SIZE;
1594 	/*
1595 	 * Count usually is less than current->mm->map_count,
1596 	 * we need to move filenames down.
1597 	 */
1598 	n = current->mm->map_count - count;
1599 	if (n != 0) {
1600 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1601 		memmove(name_base - shift_bytes, name_base,
1602 			name_curpos - name_base);
1603 		name_curpos -= shift_bytes;
1604 	}
1605 
1606 	size = name_curpos - (char *)data;
1607 	fill_note(note, "CORE", NT_FILE, size, data);
1608 	return 0;
1609 }
1610 
1611 #ifdef CORE_DUMP_USE_REGSET
1612 #include <linux/regset.h>
1613 
1614 struct elf_thread_core_info {
1615 	struct elf_thread_core_info *next;
1616 	struct task_struct *task;
1617 	struct elf_prstatus prstatus;
1618 	struct memelfnote notes[0];
1619 };
1620 
1621 struct elf_note_info {
1622 	struct elf_thread_core_info *thread;
1623 	struct memelfnote psinfo;
1624 	struct memelfnote signote;
1625 	struct memelfnote auxv;
1626 	struct memelfnote files;
1627 	user_siginfo_t csigdata;
1628 	size_t size;
1629 	int thread_notes;
1630 };
1631 
1632 /*
1633  * When a regset has a writeback hook, we call it on each thread before
1634  * dumping user memory.  On register window machines, this makes sure the
1635  * user memory backing the register data is up to date before we read it.
1636  */
1637 static void do_thread_regset_writeback(struct task_struct *task,
1638 				       const struct user_regset *regset)
1639 {
1640 	if (regset->writeback)
1641 		regset->writeback(task, regset, 1);
1642 }
1643 
1644 #ifndef PRSTATUS_SIZE
1645 #define PRSTATUS_SIZE(S, R) sizeof(S)
1646 #endif
1647 
1648 #ifndef SET_PR_FPVALID
1649 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1650 #endif
1651 
1652 static int fill_thread_core_info(struct elf_thread_core_info *t,
1653 				 const struct user_regset_view *view,
1654 				 long signr, size_t *total)
1655 {
1656 	unsigned int i;
1657 	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1658 
1659 	/*
1660 	 * NT_PRSTATUS is the one special case, because the regset data
1661 	 * goes into the pr_reg field inside the note contents, rather
1662 	 * than being the whole note contents.  We fill the reset in here.
1663 	 * We assume that regset 0 is NT_PRSTATUS.
1664 	 */
1665 	fill_prstatus(&t->prstatus, t->task, signr);
1666 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1667 				    &t->prstatus.pr_reg, NULL);
1668 
1669 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1670 		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1671 	*total += notesize(&t->notes[0]);
1672 
1673 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1674 
1675 	/*
1676 	 * Each other regset might generate a note too.  For each regset
1677 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1678 	 * all zero and we'll know to skip writing it later.
1679 	 */
1680 	for (i = 1; i < view->n; ++i) {
1681 		const struct user_regset *regset = &view->regsets[i];
1682 		do_thread_regset_writeback(t->task, regset);
1683 		if (regset->core_note_type && regset->get &&
1684 		    (!regset->active || regset->active(t->task, regset))) {
1685 			int ret;
1686 			size_t size = regset->n * regset->size;
1687 			void *data = kmalloc(size, GFP_KERNEL);
1688 			if (unlikely(!data))
1689 				return 0;
1690 			ret = regset->get(t->task, regset,
1691 					  0, size, data, NULL);
1692 			if (unlikely(ret))
1693 				kfree(data);
1694 			else {
1695 				if (regset->core_note_type != NT_PRFPREG)
1696 					fill_note(&t->notes[i], "LINUX",
1697 						  regset->core_note_type,
1698 						  size, data);
1699 				else {
1700 					SET_PR_FPVALID(&t->prstatus,
1701 							1, regset_size);
1702 					fill_note(&t->notes[i], "CORE",
1703 						  NT_PRFPREG, size, data);
1704 				}
1705 				*total += notesize(&t->notes[i]);
1706 			}
1707 		}
1708 	}
1709 
1710 	return 1;
1711 }
1712 
1713 static int fill_note_info(struct elfhdr *elf, int phdrs,
1714 			  struct elf_note_info *info,
1715 			  const siginfo_t *siginfo, struct pt_regs *regs)
1716 {
1717 	struct task_struct *dump_task = current;
1718 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1719 	struct elf_thread_core_info *t;
1720 	struct elf_prpsinfo *psinfo;
1721 	struct core_thread *ct;
1722 	unsigned int i;
1723 
1724 	info->size = 0;
1725 	info->thread = NULL;
1726 
1727 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1728 	if (psinfo == NULL) {
1729 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1730 		return 0;
1731 	}
1732 
1733 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1734 
1735 	/*
1736 	 * Figure out how many notes we're going to need for each thread.
1737 	 */
1738 	info->thread_notes = 0;
1739 	for (i = 0; i < view->n; ++i)
1740 		if (view->regsets[i].core_note_type != 0)
1741 			++info->thread_notes;
1742 
1743 	/*
1744 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1745 	 * since it is our one special case.
1746 	 */
1747 	if (unlikely(info->thread_notes == 0) ||
1748 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1749 		WARN_ON(1);
1750 		return 0;
1751 	}
1752 
1753 	/*
1754 	 * Initialize the ELF file header.
1755 	 */
1756 	fill_elf_header(elf, phdrs,
1757 			view->e_machine, view->e_flags);
1758 
1759 	/*
1760 	 * Allocate a structure for each thread.
1761 	 */
1762 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1763 		t = kzalloc(offsetof(struct elf_thread_core_info,
1764 				     notes[info->thread_notes]),
1765 			    GFP_KERNEL);
1766 		if (unlikely(!t))
1767 			return 0;
1768 
1769 		t->task = ct->task;
1770 		if (ct->task == dump_task || !info->thread) {
1771 			t->next = info->thread;
1772 			info->thread = t;
1773 		} else {
1774 			/*
1775 			 * Make sure to keep the original task at
1776 			 * the head of the list.
1777 			 */
1778 			t->next = info->thread->next;
1779 			info->thread->next = t;
1780 		}
1781 	}
1782 
1783 	/*
1784 	 * Now fill in each thread's information.
1785 	 */
1786 	for (t = info->thread; t != NULL; t = t->next)
1787 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1788 			return 0;
1789 
1790 	/*
1791 	 * Fill in the two process-wide notes.
1792 	 */
1793 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1794 	info->size += notesize(&info->psinfo);
1795 
1796 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1797 	info->size += notesize(&info->signote);
1798 
1799 	fill_auxv_note(&info->auxv, current->mm);
1800 	info->size += notesize(&info->auxv);
1801 
1802 	if (fill_files_note(&info->files) == 0)
1803 		info->size += notesize(&info->files);
1804 
1805 	return 1;
1806 }
1807 
1808 static size_t get_note_info_size(struct elf_note_info *info)
1809 {
1810 	return info->size;
1811 }
1812 
1813 /*
1814  * Write all the notes for each thread.  When writing the first thread, the
1815  * process-wide notes are interleaved after the first thread-specific note.
1816  */
1817 static int write_note_info(struct elf_note_info *info,
1818 			   struct coredump_params *cprm)
1819 {
1820 	bool first = true;
1821 	struct elf_thread_core_info *t = info->thread;
1822 
1823 	do {
1824 		int i;
1825 
1826 		if (!writenote(&t->notes[0], cprm))
1827 			return 0;
1828 
1829 		if (first && !writenote(&info->psinfo, cprm))
1830 			return 0;
1831 		if (first && !writenote(&info->signote, cprm))
1832 			return 0;
1833 		if (first && !writenote(&info->auxv, cprm))
1834 			return 0;
1835 		if (first && info->files.data &&
1836 				!writenote(&info->files, cprm))
1837 			return 0;
1838 
1839 		for (i = 1; i < info->thread_notes; ++i)
1840 			if (t->notes[i].data &&
1841 			    !writenote(&t->notes[i], cprm))
1842 				return 0;
1843 
1844 		first = false;
1845 		t = t->next;
1846 	} while (t);
1847 
1848 	return 1;
1849 }
1850 
1851 static void free_note_info(struct elf_note_info *info)
1852 {
1853 	struct elf_thread_core_info *threads = info->thread;
1854 	while (threads) {
1855 		unsigned int i;
1856 		struct elf_thread_core_info *t = threads;
1857 		threads = t->next;
1858 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1859 		for (i = 1; i < info->thread_notes; ++i)
1860 			kfree(t->notes[i].data);
1861 		kfree(t);
1862 	}
1863 	kfree(info->psinfo.data);
1864 	vfree(info->files.data);
1865 }
1866 
1867 #else
1868 
1869 /* Here is the structure in which status of each thread is captured. */
1870 struct elf_thread_status
1871 {
1872 	struct list_head list;
1873 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1874 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1875 	struct task_struct *thread;
1876 #ifdef ELF_CORE_COPY_XFPREGS
1877 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1878 #endif
1879 	struct memelfnote notes[3];
1880 	int num_notes;
1881 };
1882 
1883 /*
1884  * In order to add the specific thread information for the elf file format,
1885  * we need to keep a linked list of every threads pr_status and then create
1886  * a single section for them in the final core file.
1887  */
1888 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1889 {
1890 	int sz = 0;
1891 	struct task_struct *p = t->thread;
1892 	t->num_notes = 0;
1893 
1894 	fill_prstatus(&t->prstatus, p, signr);
1895 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1896 
1897 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1898 		  &(t->prstatus));
1899 	t->num_notes++;
1900 	sz += notesize(&t->notes[0]);
1901 
1902 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1903 								&t->fpu))) {
1904 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1905 			  &(t->fpu));
1906 		t->num_notes++;
1907 		sz += notesize(&t->notes[1]);
1908 	}
1909 
1910 #ifdef ELF_CORE_COPY_XFPREGS
1911 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1912 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1913 			  sizeof(t->xfpu), &t->xfpu);
1914 		t->num_notes++;
1915 		sz += notesize(&t->notes[2]);
1916 	}
1917 #endif
1918 	return sz;
1919 }
1920 
1921 struct elf_note_info {
1922 	struct memelfnote *notes;
1923 	struct memelfnote *notes_files;
1924 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1925 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1926 	struct list_head thread_list;
1927 	elf_fpregset_t *fpu;
1928 #ifdef ELF_CORE_COPY_XFPREGS
1929 	elf_fpxregset_t *xfpu;
1930 #endif
1931 	user_siginfo_t csigdata;
1932 	int thread_status_size;
1933 	int numnote;
1934 };
1935 
1936 static int elf_note_info_init(struct elf_note_info *info)
1937 {
1938 	memset(info, 0, sizeof(*info));
1939 	INIT_LIST_HEAD(&info->thread_list);
1940 
1941 	/* Allocate space for ELF notes */
1942 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1943 	if (!info->notes)
1944 		return 0;
1945 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1946 	if (!info->psinfo)
1947 		return 0;
1948 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1949 	if (!info->prstatus)
1950 		return 0;
1951 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1952 	if (!info->fpu)
1953 		return 0;
1954 #ifdef ELF_CORE_COPY_XFPREGS
1955 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1956 	if (!info->xfpu)
1957 		return 0;
1958 #endif
1959 	return 1;
1960 }
1961 
1962 static int fill_note_info(struct elfhdr *elf, int phdrs,
1963 			  struct elf_note_info *info,
1964 			  const siginfo_t *siginfo, struct pt_regs *regs)
1965 {
1966 	struct list_head *t;
1967 	struct core_thread *ct;
1968 	struct elf_thread_status *ets;
1969 
1970 	if (!elf_note_info_init(info))
1971 		return 0;
1972 
1973 	for (ct = current->mm->core_state->dumper.next;
1974 					ct; ct = ct->next) {
1975 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1976 		if (!ets)
1977 			return 0;
1978 
1979 		ets->thread = ct->task;
1980 		list_add(&ets->list, &info->thread_list);
1981 	}
1982 
1983 	list_for_each(t, &info->thread_list) {
1984 		int sz;
1985 
1986 		ets = list_entry(t, struct elf_thread_status, list);
1987 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1988 		info->thread_status_size += sz;
1989 	}
1990 	/* now collect the dump for the current */
1991 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1992 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1993 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1994 
1995 	/* Set up header */
1996 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1997 
1998 	/*
1999 	 * Set up the notes in similar form to SVR4 core dumps made
2000 	 * with info from their /proc.
2001 	 */
2002 
2003 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2004 		  sizeof(*info->prstatus), info->prstatus);
2005 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2006 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2007 		  sizeof(*info->psinfo), info->psinfo);
2008 
2009 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2010 	fill_auxv_note(info->notes + 3, current->mm);
2011 	info->numnote = 4;
2012 
2013 	if (fill_files_note(info->notes + info->numnote) == 0) {
2014 		info->notes_files = info->notes + info->numnote;
2015 		info->numnote++;
2016 	}
2017 
2018 	/* Try to dump the FPU. */
2019 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2020 							       info->fpu);
2021 	if (info->prstatus->pr_fpvalid)
2022 		fill_note(info->notes + info->numnote++,
2023 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2024 #ifdef ELF_CORE_COPY_XFPREGS
2025 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2026 		fill_note(info->notes + info->numnote++,
2027 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2028 			  sizeof(*info->xfpu), info->xfpu);
2029 #endif
2030 
2031 	return 1;
2032 }
2033 
2034 static size_t get_note_info_size(struct elf_note_info *info)
2035 {
2036 	int sz = 0;
2037 	int i;
2038 
2039 	for (i = 0; i < info->numnote; i++)
2040 		sz += notesize(info->notes + i);
2041 
2042 	sz += info->thread_status_size;
2043 
2044 	return sz;
2045 }
2046 
2047 static int write_note_info(struct elf_note_info *info,
2048 			   struct coredump_params *cprm)
2049 {
2050 	int i;
2051 	struct list_head *t;
2052 
2053 	for (i = 0; i < info->numnote; i++)
2054 		if (!writenote(info->notes + i, cprm))
2055 			return 0;
2056 
2057 	/* write out the thread status notes section */
2058 	list_for_each(t, &info->thread_list) {
2059 		struct elf_thread_status *tmp =
2060 				list_entry(t, struct elf_thread_status, list);
2061 
2062 		for (i = 0; i < tmp->num_notes; i++)
2063 			if (!writenote(&tmp->notes[i], cprm))
2064 				return 0;
2065 	}
2066 
2067 	return 1;
2068 }
2069 
2070 static void free_note_info(struct elf_note_info *info)
2071 {
2072 	while (!list_empty(&info->thread_list)) {
2073 		struct list_head *tmp = info->thread_list.next;
2074 		list_del(tmp);
2075 		kfree(list_entry(tmp, struct elf_thread_status, list));
2076 	}
2077 
2078 	/* Free data possibly allocated by fill_files_note(): */
2079 	if (info->notes_files)
2080 		vfree(info->notes_files->data);
2081 
2082 	kfree(info->prstatus);
2083 	kfree(info->psinfo);
2084 	kfree(info->notes);
2085 	kfree(info->fpu);
2086 #ifdef ELF_CORE_COPY_XFPREGS
2087 	kfree(info->xfpu);
2088 #endif
2089 }
2090 
2091 #endif
2092 
2093 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2094 					struct vm_area_struct *gate_vma)
2095 {
2096 	struct vm_area_struct *ret = tsk->mm->mmap;
2097 
2098 	if (ret)
2099 		return ret;
2100 	return gate_vma;
2101 }
2102 /*
2103  * Helper function for iterating across a vma list.  It ensures that the caller
2104  * will visit `gate_vma' prior to terminating the search.
2105  */
2106 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2107 					struct vm_area_struct *gate_vma)
2108 {
2109 	struct vm_area_struct *ret;
2110 
2111 	ret = this_vma->vm_next;
2112 	if (ret)
2113 		return ret;
2114 	if (this_vma == gate_vma)
2115 		return NULL;
2116 	return gate_vma;
2117 }
2118 
2119 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2120 			     elf_addr_t e_shoff, int segs)
2121 {
2122 	elf->e_shoff = e_shoff;
2123 	elf->e_shentsize = sizeof(*shdr4extnum);
2124 	elf->e_shnum = 1;
2125 	elf->e_shstrndx = SHN_UNDEF;
2126 
2127 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2128 
2129 	shdr4extnum->sh_type = SHT_NULL;
2130 	shdr4extnum->sh_size = elf->e_shnum;
2131 	shdr4extnum->sh_link = elf->e_shstrndx;
2132 	shdr4extnum->sh_info = segs;
2133 }
2134 
2135 /*
2136  * Actual dumper
2137  *
2138  * This is a two-pass process; first we find the offsets of the bits,
2139  * and then they are actually written out.  If we run out of core limit
2140  * we just truncate.
2141  */
2142 static int elf_core_dump(struct coredump_params *cprm)
2143 {
2144 	int has_dumped = 0;
2145 	mm_segment_t fs;
2146 	int segs, i;
2147 	size_t vma_data_size = 0;
2148 	struct vm_area_struct *vma, *gate_vma;
2149 	struct elfhdr *elf = NULL;
2150 	loff_t offset = 0, dataoff;
2151 	struct elf_note_info info = { };
2152 	struct elf_phdr *phdr4note = NULL;
2153 	struct elf_shdr *shdr4extnum = NULL;
2154 	Elf_Half e_phnum;
2155 	elf_addr_t e_shoff;
2156 	elf_addr_t *vma_filesz = NULL;
2157 
2158 	/*
2159 	 * We no longer stop all VM operations.
2160 	 *
2161 	 * This is because those proceses that could possibly change map_count
2162 	 * or the mmap / vma pages are now blocked in do_exit on current
2163 	 * finishing this core dump.
2164 	 *
2165 	 * Only ptrace can touch these memory addresses, but it doesn't change
2166 	 * the map_count or the pages allocated. So no possibility of crashing
2167 	 * exists while dumping the mm->vm_next areas to the core file.
2168 	 */
2169 
2170 	/* alloc memory for large data structures: too large to be on stack */
2171 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2172 	if (!elf)
2173 		goto out;
2174 	/*
2175 	 * The number of segs are recored into ELF header as 16bit value.
2176 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2177 	 */
2178 	segs = current->mm->map_count;
2179 	segs += elf_core_extra_phdrs();
2180 
2181 	gate_vma = get_gate_vma(current->mm);
2182 	if (gate_vma != NULL)
2183 		segs++;
2184 
2185 	/* for notes section */
2186 	segs++;
2187 
2188 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2189 	 * this, kernel supports extended numbering. Have a look at
2190 	 * include/linux/elf.h for further information. */
2191 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2192 
2193 	/*
2194 	 * Collect all the non-memory information about the process for the
2195 	 * notes.  This also sets up the file header.
2196 	 */
2197 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2198 		goto cleanup;
2199 
2200 	has_dumped = 1;
2201 
2202 	fs = get_fs();
2203 	set_fs(KERNEL_DS);
2204 
2205 	offset += sizeof(*elf);				/* Elf header */
2206 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2207 
2208 	/* Write notes phdr entry */
2209 	{
2210 		size_t sz = get_note_info_size(&info);
2211 
2212 		sz += elf_coredump_extra_notes_size();
2213 
2214 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2215 		if (!phdr4note)
2216 			goto end_coredump;
2217 
2218 		fill_elf_note_phdr(phdr4note, sz, offset);
2219 		offset += sz;
2220 	}
2221 
2222 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2223 
2224 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2225 		goto end_coredump;
2226 	vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2227 	if (!vma_filesz)
2228 		goto end_coredump;
2229 
2230 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2231 			vma = next_vma(vma, gate_vma)) {
2232 		unsigned long dump_size;
2233 
2234 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2235 		vma_filesz[i++] = dump_size;
2236 		vma_data_size += dump_size;
2237 	}
2238 
2239 	offset += vma_data_size;
2240 	offset += elf_core_extra_data_size();
2241 	e_shoff = offset;
2242 
2243 	if (e_phnum == PN_XNUM) {
2244 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2245 		if (!shdr4extnum)
2246 			goto end_coredump;
2247 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2248 	}
2249 
2250 	offset = dataoff;
2251 
2252 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2253 		goto end_coredump;
2254 
2255 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2256 		goto end_coredump;
2257 
2258 	/* Write program headers for segments dump */
2259 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2260 			vma = next_vma(vma, gate_vma)) {
2261 		struct elf_phdr phdr;
2262 
2263 		phdr.p_type = PT_LOAD;
2264 		phdr.p_offset = offset;
2265 		phdr.p_vaddr = vma->vm_start;
2266 		phdr.p_paddr = 0;
2267 		phdr.p_filesz = vma_filesz[i++];
2268 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2269 		offset += phdr.p_filesz;
2270 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2271 		if (vma->vm_flags & VM_WRITE)
2272 			phdr.p_flags |= PF_W;
2273 		if (vma->vm_flags & VM_EXEC)
2274 			phdr.p_flags |= PF_X;
2275 		phdr.p_align = ELF_EXEC_PAGESIZE;
2276 
2277 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2278 			goto end_coredump;
2279 	}
2280 
2281 	if (!elf_core_write_extra_phdrs(cprm, offset))
2282 		goto end_coredump;
2283 
2284  	/* write out the notes section */
2285 	if (!write_note_info(&info, cprm))
2286 		goto end_coredump;
2287 
2288 	if (elf_coredump_extra_notes_write(cprm))
2289 		goto end_coredump;
2290 
2291 	/* Align to page */
2292 	if (!dump_skip(cprm, dataoff - cprm->pos))
2293 		goto end_coredump;
2294 
2295 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2296 			vma = next_vma(vma, gate_vma)) {
2297 		unsigned long addr;
2298 		unsigned long end;
2299 
2300 		end = vma->vm_start + vma_filesz[i++];
2301 
2302 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2303 			struct page *page;
2304 			int stop;
2305 
2306 			page = get_dump_page(addr);
2307 			if (page) {
2308 				void *kaddr = kmap(page);
2309 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2310 				kunmap(page);
2311 				put_page(page);
2312 			} else
2313 				stop = !dump_skip(cprm, PAGE_SIZE);
2314 			if (stop)
2315 				goto end_coredump;
2316 		}
2317 	}
2318 	dump_truncate(cprm);
2319 
2320 	if (!elf_core_write_extra_data(cprm))
2321 		goto end_coredump;
2322 
2323 	if (e_phnum == PN_XNUM) {
2324 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2325 			goto end_coredump;
2326 	}
2327 
2328 end_coredump:
2329 	set_fs(fs);
2330 
2331 cleanup:
2332 	free_note_info(&info);
2333 	kfree(shdr4extnum);
2334 	vfree(vma_filesz);
2335 	kfree(phdr4note);
2336 	kfree(elf);
2337 out:
2338 	return has_dumped;
2339 }
2340 
2341 #endif		/* CONFIG_ELF_CORE */
2342 
2343 static int __init init_elf_binfmt(void)
2344 {
2345 	register_binfmt(&elf_format);
2346 	return 0;
2347 }
2348 
2349 static void __exit exit_elf_binfmt(void)
2350 {
2351 	/* Remove the COFF and ELF loaders. */
2352 	unregister_binfmt(&elf_format);
2353 }
2354 
2355 core_initcall(init_elf_binfmt);
2356 module_exit(exit_elf_binfmt);
2357 MODULE_LICENSE("GPL");
2358