xref: /openbmc/linux/fs/binfmt_elf.c (revision 7bcae826)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <linux/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42 
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49 
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 				int, int, unsigned long);
53 
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59 
60 /*
61  * If we don't support core dumping, then supply a NULL so we
62  * don't even try.
63  */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump	NULL
68 #endif
69 
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN	PAGE_SIZE
74 #endif
75 
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS	0
78 #endif
79 
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 
84 static struct linux_binfmt elf_format = {
85 	.module		= THIS_MODULE,
86 	.load_binary	= load_elf_binary,
87 	.load_shlib	= load_elf_library,
88 	.core_dump	= elf_core_dump,
89 	.min_coredump	= ELF_EXEC_PAGESIZE,
90 };
91 
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 
94 static int set_brk(unsigned long start, unsigned long end, int prot)
95 {
96 	start = ELF_PAGEALIGN(start);
97 	end = ELF_PAGEALIGN(end);
98 	if (end > start) {
99 		/*
100 		 * Map the last of the bss segment.
101 		 * If the header is requesting these pages to be
102 		 * executable, honour that (ppc32 needs this).
103 		 */
104 		int error = vm_brk_flags(start, end - start,
105 				prot & PROT_EXEC ? VM_EXEC : 0);
106 		if (error)
107 			return error;
108 	}
109 	current->mm->start_brk = current->mm->brk = end;
110 	return 0;
111 }
112 
113 /* We need to explicitly zero any fractional pages
114    after the data section (i.e. bss).  This would
115    contain the junk from the file that should not
116    be in memory
117  */
118 static int padzero(unsigned long elf_bss)
119 {
120 	unsigned long nbyte;
121 
122 	nbyte = ELF_PAGEOFFSET(elf_bss);
123 	if (nbyte) {
124 		nbyte = ELF_MIN_ALIGN - nbyte;
125 		if (clear_user((void __user *) elf_bss, nbyte))
126 			return -EFAULT;
127 	}
128 	return 0;
129 }
130 
131 /* Let's use some macros to make this stack manipulation a little clearer */
132 #ifdef CONFIG_STACK_GROWSUP
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134 #define STACK_ROUND(sp, items) \
135 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ \
137 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
138 	old_sp; })
139 #else
140 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141 #define STACK_ROUND(sp, items) \
142 	(((unsigned long) (sp - items)) &~ 15UL)
143 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
144 #endif
145 
146 #ifndef ELF_BASE_PLATFORM
147 /*
148  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150  * will be copied to the user stack in the same manner as AT_PLATFORM.
151  */
152 #define ELF_BASE_PLATFORM NULL
153 #endif
154 
155 static int
156 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
157 		unsigned long load_addr, unsigned long interp_load_addr)
158 {
159 	unsigned long p = bprm->p;
160 	int argc = bprm->argc;
161 	int envc = bprm->envc;
162 	elf_addr_t __user *argv;
163 	elf_addr_t __user *envp;
164 	elf_addr_t __user *sp;
165 	elf_addr_t __user *u_platform;
166 	elf_addr_t __user *u_base_platform;
167 	elf_addr_t __user *u_rand_bytes;
168 	const char *k_platform = ELF_PLATFORM;
169 	const char *k_base_platform = ELF_BASE_PLATFORM;
170 	unsigned char k_rand_bytes[16];
171 	int items;
172 	elf_addr_t *elf_info;
173 	int ei_index = 0;
174 	const struct cred *cred = current_cred();
175 	struct vm_area_struct *vma;
176 
177 	/*
178 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
179 	 * evictions by the processes running on the same package. One
180 	 * thing we can do is to shuffle the initial stack for them.
181 	 */
182 
183 	p = arch_align_stack(p);
184 
185 	/*
186 	 * If this architecture has a platform capability string, copy it
187 	 * to userspace.  In some cases (Sparc), this info is impossible
188 	 * for userspace to get any other way, in others (i386) it is
189 	 * merely difficult.
190 	 */
191 	u_platform = NULL;
192 	if (k_platform) {
193 		size_t len = strlen(k_platform) + 1;
194 
195 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
196 		if (__copy_to_user(u_platform, k_platform, len))
197 			return -EFAULT;
198 	}
199 
200 	/*
201 	 * If this architecture has a "base" platform capability
202 	 * string, copy it to userspace.
203 	 */
204 	u_base_platform = NULL;
205 	if (k_base_platform) {
206 		size_t len = strlen(k_base_platform) + 1;
207 
208 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
209 		if (__copy_to_user(u_base_platform, k_base_platform, len))
210 			return -EFAULT;
211 	}
212 
213 	/*
214 	 * Generate 16 random bytes for userspace PRNG seeding.
215 	 */
216 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
217 	u_rand_bytes = (elf_addr_t __user *)
218 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
219 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
220 		return -EFAULT;
221 
222 	/* Create the ELF interpreter info */
223 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
224 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
225 #define NEW_AUX_ENT(id, val) \
226 	do { \
227 		elf_info[ei_index++] = id; \
228 		elf_info[ei_index++] = val; \
229 	} while (0)
230 
231 #ifdef ARCH_DLINFO
232 	/*
233 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
234 	 * AUXV.
235 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
236 	 * ARCH_DLINFO changes
237 	 */
238 	ARCH_DLINFO;
239 #endif
240 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
241 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
242 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
243 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
244 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
245 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
246 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
247 	NEW_AUX_ENT(AT_FLAGS, 0);
248 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
249 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
250 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
251 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
252 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
253  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
254 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
255 #ifdef ELF_HWCAP2
256 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
257 #endif
258 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
259 	if (k_platform) {
260 		NEW_AUX_ENT(AT_PLATFORM,
261 			    (elf_addr_t)(unsigned long)u_platform);
262 	}
263 	if (k_base_platform) {
264 		NEW_AUX_ENT(AT_BASE_PLATFORM,
265 			    (elf_addr_t)(unsigned long)u_base_platform);
266 	}
267 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
268 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
269 	}
270 #undef NEW_AUX_ENT
271 	/* AT_NULL is zero; clear the rest too */
272 	memset(&elf_info[ei_index], 0,
273 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
274 
275 	/* And advance past the AT_NULL entry.  */
276 	ei_index += 2;
277 
278 	sp = STACK_ADD(p, ei_index);
279 
280 	items = (argc + 1) + (envc + 1) + 1;
281 	bprm->p = STACK_ROUND(sp, items);
282 
283 	/* Point sp at the lowest address on the stack */
284 #ifdef CONFIG_STACK_GROWSUP
285 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
286 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
287 #else
288 	sp = (elf_addr_t __user *)bprm->p;
289 #endif
290 
291 
292 	/*
293 	 * Grow the stack manually; some architectures have a limit on how
294 	 * far ahead a user-space access may be in order to grow the stack.
295 	 */
296 	vma = find_extend_vma(current->mm, bprm->p);
297 	if (!vma)
298 		return -EFAULT;
299 
300 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
301 	if (__put_user(argc, sp++))
302 		return -EFAULT;
303 	argv = sp;
304 	envp = argv + argc + 1;
305 
306 	/* Populate argv and envp */
307 	p = current->mm->arg_end = current->mm->arg_start;
308 	while (argc-- > 0) {
309 		size_t len;
310 		if (__put_user((elf_addr_t)p, argv++))
311 			return -EFAULT;
312 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 		if (!len || len > MAX_ARG_STRLEN)
314 			return -EINVAL;
315 		p += len;
316 	}
317 	if (__put_user(0, argv))
318 		return -EFAULT;
319 	current->mm->arg_end = current->mm->env_start = p;
320 	while (envc-- > 0) {
321 		size_t len;
322 		if (__put_user((elf_addr_t)p, envp++))
323 			return -EFAULT;
324 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
325 		if (!len || len > MAX_ARG_STRLEN)
326 			return -EINVAL;
327 		p += len;
328 	}
329 	if (__put_user(0, envp))
330 		return -EFAULT;
331 	current->mm->env_end = p;
332 
333 	/* Put the elf_info on the stack in the right place.  */
334 	sp = (elf_addr_t __user *)envp + 1;
335 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
336 		return -EFAULT;
337 	return 0;
338 }
339 
340 #ifndef elf_map
341 
342 static unsigned long elf_map(struct file *filep, unsigned long addr,
343 		struct elf_phdr *eppnt, int prot, int type,
344 		unsigned long total_size)
345 {
346 	unsigned long map_addr;
347 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
348 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
349 	addr = ELF_PAGESTART(addr);
350 	size = ELF_PAGEALIGN(size);
351 
352 	/* mmap() will return -EINVAL if given a zero size, but a
353 	 * segment with zero filesize is perfectly valid */
354 	if (!size)
355 		return addr;
356 
357 	/*
358 	* total_size is the size of the ELF (interpreter) image.
359 	* The _first_ mmap needs to know the full size, otherwise
360 	* randomization might put this image into an overlapping
361 	* position with the ELF binary image. (since size < total_size)
362 	* So we first map the 'big' image - and unmap the remainder at
363 	* the end. (which unmap is needed for ELF images with holes.)
364 	*/
365 	if (total_size) {
366 		total_size = ELF_PAGEALIGN(total_size);
367 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
368 		if (!BAD_ADDR(map_addr))
369 			vm_munmap(map_addr+size, total_size-size);
370 	} else
371 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
372 
373 	return(map_addr);
374 }
375 
376 #endif /* !elf_map */
377 
378 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
379 {
380 	int i, first_idx = -1, last_idx = -1;
381 
382 	for (i = 0; i < nr; i++) {
383 		if (cmds[i].p_type == PT_LOAD) {
384 			last_idx = i;
385 			if (first_idx == -1)
386 				first_idx = i;
387 		}
388 	}
389 	if (first_idx == -1)
390 		return 0;
391 
392 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
393 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
394 }
395 
396 /**
397  * load_elf_phdrs() - load ELF program headers
398  * @elf_ex:   ELF header of the binary whose program headers should be loaded
399  * @elf_file: the opened ELF binary file
400  *
401  * Loads ELF program headers from the binary file elf_file, which has the ELF
402  * header pointed to by elf_ex, into a newly allocated array. The caller is
403  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
404  */
405 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
406 				       struct file *elf_file)
407 {
408 	struct elf_phdr *elf_phdata = NULL;
409 	int retval, size, err = -1;
410 
411 	/*
412 	 * If the size of this structure has changed, then punt, since
413 	 * we will be doing the wrong thing.
414 	 */
415 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
416 		goto out;
417 
418 	/* Sanity check the number of program headers... */
419 	if (elf_ex->e_phnum < 1 ||
420 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
421 		goto out;
422 
423 	/* ...and their total size. */
424 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
425 	if (size > ELF_MIN_ALIGN)
426 		goto out;
427 
428 	elf_phdata = kmalloc(size, GFP_KERNEL);
429 	if (!elf_phdata)
430 		goto out;
431 
432 	/* Read in the program headers */
433 	retval = kernel_read(elf_file, elf_ex->e_phoff,
434 			     (char *)elf_phdata, size);
435 	if (retval != size) {
436 		err = (retval < 0) ? retval : -EIO;
437 		goto out;
438 	}
439 
440 	/* Success! */
441 	err = 0;
442 out:
443 	if (err) {
444 		kfree(elf_phdata);
445 		elf_phdata = NULL;
446 	}
447 	return elf_phdata;
448 }
449 
450 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
451 
452 /**
453  * struct arch_elf_state - arch-specific ELF loading state
454  *
455  * This structure is used to preserve architecture specific data during
456  * the loading of an ELF file, throughout the checking of architecture
457  * specific ELF headers & through to the point where the ELF load is
458  * known to be proceeding (ie. SET_PERSONALITY).
459  *
460  * This implementation is a dummy for architectures which require no
461  * specific state.
462  */
463 struct arch_elf_state {
464 };
465 
466 #define INIT_ARCH_ELF_STATE {}
467 
468 /**
469  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
470  * @ehdr:	The main ELF header
471  * @phdr:	The program header to check
472  * @elf:	The open ELF file
473  * @is_interp:	True if the phdr is from the interpreter of the ELF being
474  *		loaded, else false.
475  * @state:	Architecture-specific state preserved throughout the process
476  *		of loading the ELF.
477  *
478  * Inspects the program header phdr to validate its correctness and/or
479  * suitability for the system. Called once per ELF program header in the
480  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
481  * interpreter.
482  *
483  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
484  *         with that return code.
485  */
486 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
487 				   struct elf_phdr *phdr,
488 				   struct file *elf, bool is_interp,
489 				   struct arch_elf_state *state)
490 {
491 	/* Dummy implementation, always proceed */
492 	return 0;
493 }
494 
495 /**
496  * arch_check_elf() - check an ELF executable
497  * @ehdr:	The main ELF header
498  * @has_interp:	True if the ELF has an interpreter, else false.
499  * @interp_ehdr: The interpreter's ELF header
500  * @state:	Architecture-specific state preserved throughout the process
501  *		of loading the ELF.
502  *
503  * Provides a final opportunity for architecture code to reject the loading
504  * of the ELF & cause an exec syscall to return an error. This is called after
505  * all program headers to be checked by arch_elf_pt_proc have been.
506  *
507  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
508  *         with that return code.
509  */
510 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
511 				 struct elfhdr *interp_ehdr,
512 				 struct arch_elf_state *state)
513 {
514 	/* Dummy implementation, always proceed */
515 	return 0;
516 }
517 
518 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
519 
520 /* This is much more generalized than the library routine read function,
521    so we keep this separate.  Technically the library read function
522    is only provided so that we can read a.out libraries that have
523    an ELF header */
524 
525 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
526 		struct file *interpreter, unsigned long *interp_map_addr,
527 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
528 {
529 	struct elf_phdr *eppnt;
530 	unsigned long load_addr = 0;
531 	int load_addr_set = 0;
532 	unsigned long last_bss = 0, elf_bss = 0;
533 	int bss_prot = 0;
534 	unsigned long error = ~0UL;
535 	unsigned long total_size;
536 	int i;
537 
538 	/* First of all, some simple consistency checks */
539 	if (interp_elf_ex->e_type != ET_EXEC &&
540 	    interp_elf_ex->e_type != ET_DYN)
541 		goto out;
542 	if (!elf_check_arch(interp_elf_ex))
543 		goto out;
544 	if (!interpreter->f_op->mmap)
545 		goto out;
546 
547 	total_size = total_mapping_size(interp_elf_phdata,
548 					interp_elf_ex->e_phnum);
549 	if (!total_size) {
550 		error = -EINVAL;
551 		goto out;
552 	}
553 
554 	eppnt = interp_elf_phdata;
555 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
556 		if (eppnt->p_type == PT_LOAD) {
557 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
558 			int elf_prot = 0;
559 			unsigned long vaddr = 0;
560 			unsigned long k, map_addr;
561 
562 			if (eppnt->p_flags & PF_R)
563 		    		elf_prot = PROT_READ;
564 			if (eppnt->p_flags & PF_W)
565 				elf_prot |= PROT_WRITE;
566 			if (eppnt->p_flags & PF_X)
567 				elf_prot |= PROT_EXEC;
568 			vaddr = eppnt->p_vaddr;
569 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
570 				elf_type |= MAP_FIXED;
571 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
572 				load_addr = -vaddr;
573 
574 			map_addr = elf_map(interpreter, load_addr + vaddr,
575 					eppnt, elf_prot, elf_type, total_size);
576 			total_size = 0;
577 			if (!*interp_map_addr)
578 				*interp_map_addr = map_addr;
579 			error = map_addr;
580 			if (BAD_ADDR(map_addr))
581 				goto out;
582 
583 			if (!load_addr_set &&
584 			    interp_elf_ex->e_type == ET_DYN) {
585 				load_addr = map_addr - ELF_PAGESTART(vaddr);
586 				load_addr_set = 1;
587 			}
588 
589 			/*
590 			 * Check to see if the section's size will overflow the
591 			 * allowed task size. Note that p_filesz must always be
592 			 * <= p_memsize so it's only necessary to check p_memsz.
593 			 */
594 			k = load_addr + eppnt->p_vaddr;
595 			if (BAD_ADDR(k) ||
596 			    eppnt->p_filesz > eppnt->p_memsz ||
597 			    eppnt->p_memsz > TASK_SIZE ||
598 			    TASK_SIZE - eppnt->p_memsz < k) {
599 				error = -ENOMEM;
600 				goto out;
601 			}
602 
603 			/*
604 			 * Find the end of the file mapping for this phdr, and
605 			 * keep track of the largest address we see for this.
606 			 */
607 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
608 			if (k > elf_bss)
609 				elf_bss = k;
610 
611 			/*
612 			 * Do the same thing for the memory mapping - between
613 			 * elf_bss and last_bss is the bss section.
614 			 */
615 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
616 			if (k > last_bss) {
617 				last_bss = k;
618 				bss_prot = elf_prot;
619 			}
620 		}
621 	}
622 
623 	/*
624 	 * Now fill out the bss section: first pad the last page from
625 	 * the file up to the page boundary, and zero it from elf_bss
626 	 * up to the end of the page.
627 	 */
628 	if (padzero(elf_bss)) {
629 		error = -EFAULT;
630 		goto out;
631 	}
632 	/*
633 	 * Next, align both the file and mem bss up to the page size,
634 	 * since this is where elf_bss was just zeroed up to, and where
635 	 * last_bss will end after the vm_brk_flags() below.
636 	 */
637 	elf_bss = ELF_PAGEALIGN(elf_bss);
638 	last_bss = ELF_PAGEALIGN(last_bss);
639 	/* Finally, if there is still more bss to allocate, do it. */
640 	if (last_bss > elf_bss) {
641 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
642 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
643 		if (error)
644 			goto out;
645 	}
646 
647 	error = load_addr;
648 out:
649 	return error;
650 }
651 
652 /*
653  * These are the functions used to load ELF style executables and shared
654  * libraries.  There is no binary dependent code anywhere else.
655  */
656 
657 #ifndef STACK_RND_MASK
658 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
659 #endif
660 
661 static unsigned long randomize_stack_top(unsigned long stack_top)
662 {
663 	unsigned long random_variable = 0;
664 
665 	if ((current->flags & PF_RANDOMIZE) &&
666 		!(current->personality & ADDR_NO_RANDOMIZE)) {
667 		random_variable = get_random_long();
668 		random_variable &= STACK_RND_MASK;
669 		random_variable <<= PAGE_SHIFT;
670 	}
671 #ifdef CONFIG_STACK_GROWSUP
672 	return PAGE_ALIGN(stack_top) + random_variable;
673 #else
674 	return PAGE_ALIGN(stack_top) - random_variable;
675 #endif
676 }
677 
678 static int load_elf_binary(struct linux_binprm *bprm)
679 {
680 	struct file *interpreter = NULL; /* to shut gcc up */
681  	unsigned long load_addr = 0, load_bias = 0;
682 	int load_addr_set = 0;
683 	char * elf_interpreter = NULL;
684 	unsigned long error;
685 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
686 	unsigned long elf_bss, elf_brk;
687 	int bss_prot = 0;
688 	int retval, i;
689 	unsigned long elf_entry;
690 	unsigned long interp_load_addr = 0;
691 	unsigned long start_code, end_code, start_data, end_data;
692 	unsigned long reloc_func_desc __maybe_unused = 0;
693 	int executable_stack = EXSTACK_DEFAULT;
694 	struct pt_regs *regs = current_pt_regs();
695 	struct {
696 		struct elfhdr elf_ex;
697 		struct elfhdr interp_elf_ex;
698 	} *loc;
699 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
700 
701 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
702 	if (!loc) {
703 		retval = -ENOMEM;
704 		goto out_ret;
705 	}
706 
707 	/* Get the exec-header */
708 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
709 
710 	retval = -ENOEXEC;
711 	/* First of all, some simple consistency checks */
712 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
713 		goto out;
714 
715 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
716 		goto out;
717 	if (!elf_check_arch(&loc->elf_ex))
718 		goto out;
719 	if (!bprm->file->f_op->mmap)
720 		goto out;
721 
722 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
723 	if (!elf_phdata)
724 		goto out;
725 
726 	elf_ppnt = elf_phdata;
727 	elf_bss = 0;
728 	elf_brk = 0;
729 
730 	start_code = ~0UL;
731 	end_code = 0;
732 	start_data = 0;
733 	end_data = 0;
734 
735 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
736 		if (elf_ppnt->p_type == PT_INTERP) {
737 			/* This is the program interpreter used for
738 			 * shared libraries - for now assume that this
739 			 * is an a.out format binary
740 			 */
741 			retval = -ENOEXEC;
742 			if (elf_ppnt->p_filesz > PATH_MAX ||
743 			    elf_ppnt->p_filesz < 2)
744 				goto out_free_ph;
745 
746 			retval = -ENOMEM;
747 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
748 						  GFP_KERNEL);
749 			if (!elf_interpreter)
750 				goto out_free_ph;
751 
752 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
753 					     elf_interpreter,
754 					     elf_ppnt->p_filesz);
755 			if (retval != elf_ppnt->p_filesz) {
756 				if (retval >= 0)
757 					retval = -EIO;
758 				goto out_free_interp;
759 			}
760 			/* make sure path is NULL terminated */
761 			retval = -ENOEXEC;
762 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
763 				goto out_free_interp;
764 
765 			interpreter = open_exec(elf_interpreter);
766 			retval = PTR_ERR(interpreter);
767 			if (IS_ERR(interpreter))
768 				goto out_free_interp;
769 
770 			/*
771 			 * If the binary is not readable then enforce
772 			 * mm->dumpable = 0 regardless of the interpreter's
773 			 * permissions.
774 			 */
775 			would_dump(bprm, interpreter);
776 
777 			/* Get the exec headers */
778 			retval = kernel_read(interpreter, 0,
779 					     (void *)&loc->interp_elf_ex,
780 					     sizeof(loc->interp_elf_ex));
781 			if (retval != sizeof(loc->interp_elf_ex)) {
782 				if (retval >= 0)
783 					retval = -EIO;
784 				goto out_free_dentry;
785 			}
786 
787 			break;
788 		}
789 		elf_ppnt++;
790 	}
791 
792 	elf_ppnt = elf_phdata;
793 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
794 		switch (elf_ppnt->p_type) {
795 		case PT_GNU_STACK:
796 			if (elf_ppnt->p_flags & PF_X)
797 				executable_stack = EXSTACK_ENABLE_X;
798 			else
799 				executable_stack = EXSTACK_DISABLE_X;
800 			break;
801 
802 		case PT_LOPROC ... PT_HIPROC:
803 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
804 						  bprm->file, false,
805 						  &arch_state);
806 			if (retval)
807 				goto out_free_dentry;
808 			break;
809 		}
810 
811 	/* Some simple consistency checks for the interpreter */
812 	if (elf_interpreter) {
813 		retval = -ELIBBAD;
814 		/* Not an ELF interpreter */
815 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
816 			goto out_free_dentry;
817 		/* Verify the interpreter has a valid arch */
818 		if (!elf_check_arch(&loc->interp_elf_ex))
819 			goto out_free_dentry;
820 
821 		/* Load the interpreter program headers */
822 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
823 						   interpreter);
824 		if (!interp_elf_phdata)
825 			goto out_free_dentry;
826 
827 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
828 		elf_ppnt = interp_elf_phdata;
829 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
830 			switch (elf_ppnt->p_type) {
831 			case PT_LOPROC ... PT_HIPROC:
832 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
833 							  elf_ppnt, interpreter,
834 							  true, &arch_state);
835 				if (retval)
836 					goto out_free_dentry;
837 				break;
838 			}
839 	}
840 
841 	/*
842 	 * Allow arch code to reject the ELF at this point, whilst it's
843 	 * still possible to return an error to the code that invoked
844 	 * the exec syscall.
845 	 */
846 	retval = arch_check_elf(&loc->elf_ex,
847 				!!interpreter, &loc->interp_elf_ex,
848 				&arch_state);
849 	if (retval)
850 		goto out_free_dentry;
851 
852 	/* Flush all traces of the currently running executable */
853 	retval = flush_old_exec(bprm);
854 	if (retval)
855 		goto out_free_dentry;
856 
857 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
858 	   may depend on the personality.  */
859 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
860 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
861 		current->personality |= READ_IMPLIES_EXEC;
862 
863 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
864 		current->flags |= PF_RANDOMIZE;
865 
866 	setup_new_exec(bprm);
867 	install_exec_creds(bprm);
868 
869 	/* Do this so that we can load the interpreter, if need be.  We will
870 	   change some of these later */
871 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
872 				 executable_stack);
873 	if (retval < 0)
874 		goto out_free_dentry;
875 
876 	current->mm->start_stack = bprm->p;
877 
878 	/* Now we do a little grungy work by mmapping the ELF image into
879 	   the correct location in memory. */
880 	for(i = 0, elf_ppnt = elf_phdata;
881 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
882 		int elf_prot = 0, elf_flags;
883 		unsigned long k, vaddr;
884 		unsigned long total_size = 0;
885 
886 		if (elf_ppnt->p_type != PT_LOAD)
887 			continue;
888 
889 		if (unlikely (elf_brk > elf_bss)) {
890 			unsigned long nbyte;
891 
892 			/* There was a PT_LOAD segment with p_memsz > p_filesz
893 			   before this one. Map anonymous pages, if needed,
894 			   and clear the area.  */
895 			retval = set_brk(elf_bss + load_bias,
896 					 elf_brk + load_bias,
897 					 bss_prot);
898 			if (retval)
899 				goto out_free_dentry;
900 			nbyte = ELF_PAGEOFFSET(elf_bss);
901 			if (nbyte) {
902 				nbyte = ELF_MIN_ALIGN - nbyte;
903 				if (nbyte > elf_brk - elf_bss)
904 					nbyte = elf_brk - elf_bss;
905 				if (clear_user((void __user *)elf_bss +
906 							load_bias, nbyte)) {
907 					/*
908 					 * This bss-zeroing can fail if the ELF
909 					 * file specifies odd protections. So
910 					 * we don't check the return value
911 					 */
912 				}
913 			}
914 		}
915 
916 		if (elf_ppnt->p_flags & PF_R)
917 			elf_prot |= PROT_READ;
918 		if (elf_ppnt->p_flags & PF_W)
919 			elf_prot |= PROT_WRITE;
920 		if (elf_ppnt->p_flags & PF_X)
921 			elf_prot |= PROT_EXEC;
922 
923 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
924 
925 		vaddr = elf_ppnt->p_vaddr;
926 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
927 			elf_flags |= MAP_FIXED;
928 		} else if (loc->elf_ex.e_type == ET_DYN) {
929 			/* Try and get dynamic programs out of the way of the
930 			 * default mmap base, as well as whatever program they
931 			 * might try to exec.  This is because the brk will
932 			 * follow the loader, and is not movable.  */
933 			load_bias = ELF_ET_DYN_BASE - vaddr;
934 			if (current->flags & PF_RANDOMIZE)
935 				load_bias += arch_mmap_rnd();
936 			load_bias = ELF_PAGESTART(load_bias);
937 			total_size = total_mapping_size(elf_phdata,
938 							loc->elf_ex.e_phnum);
939 			if (!total_size) {
940 				retval = -EINVAL;
941 				goto out_free_dentry;
942 			}
943 		}
944 
945 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
946 				elf_prot, elf_flags, total_size);
947 		if (BAD_ADDR(error)) {
948 			retval = IS_ERR((void *)error) ?
949 				PTR_ERR((void*)error) : -EINVAL;
950 			goto out_free_dentry;
951 		}
952 
953 		if (!load_addr_set) {
954 			load_addr_set = 1;
955 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
956 			if (loc->elf_ex.e_type == ET_DYN) {
957 				load_bias += error -
958 				             ELF_PAGESTART(load_bias + vaddr);
959 				load_addr += load_bias;
960 				reloc_func_desc = load_bias;
961 			}
962 		}
963 		k = elf_ppnt->p_vaddr;
964 		if (k < start_code)
965 			start_code = k;
966 		if (start_data < k)
967 			start_data = k;
968 
969 		/*
970 		 * Check to see if the section's size will overflow the
971 		 * allowed task size. Note that p_filesz must always be
972 		 * <= p_memsz so it is only necessary to check p_memsz.
973 		 */
974 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
975 		    elf_ppnt->p_memsz > TASK_SIZE ||
976 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
977 			/* set_brk can never work. Avoid overflows. */
978 			retval = -EINVAL;
979 			goto out_free_dentry;
980 		}
981 
982 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
983 
984 		if (k > elf_bss)
985 			elf_bss = k;
986 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
987 			end_code = k;
988 		if (end_data < k)
989 			end_data = k;
990 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
991 		if (k > elf_brk) {
992 			bss_prot = elf_prot;
993 			elf_brk = k;
994 		}
995 	}
996 
997 	loc->elf_ex.e_entry += load_bias;
998 	elf_bss += load_bias;
999 	elf_brk += load_bias;
1000 	start_code += load_bias;
1001 	end_code += load_bias;
1002 	start_data += load_bias;
1003 	end_data += load_bias;
1004 
1005 	/* Calling set_brk effectively mmaps the pages that we need
1006 	 * for the bss and break sections.  We must do this before
1007 	 * mapping in the interpreter, to make sure it doesn't wind
1008 	 * up getting placed where the bss needs to go.
1009 	 */
1010 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1011 	if (retval)
1012 		goto out_free_dentry;
1013 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1014 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1015 		goto out_free_dentry;
1016 	}
1017 
1018 	if (elf_interpreter) {
1019 		unsigned long interp_map_addr = 0;
1020 
1021 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1022 					    interpreter,
1023 					    &interp_map_addr,
1024 					    load_bias, interp_elf_phdata);
1025 		if (!IS_ERR((void *)elf_entry)) {
1026 			/*
1027 			 * load_elf_interp() returns relocation
1028 			 * adjustment
1029 			 */
1030 			interp_load_addr = elf_entry;
1031 			elf_entry += loc->interp_elf_ex.e_entry;
1032 		}
1033 		if (BAD_ADDR(elf_entry)) {
1034 			retval = IS_ERR((void *)elf_entry) ?
1035 					(int)elf_entry : -EINVAL;
1036 			goto out_free_dentry;
1037 		}
1038 		reloc_func_desc = interp_load_addr;
1039 
1040 		allow_write_access(interpreter);
1041 		fput(interpreter);
1042 		kfree(elf_interpreter);
1043 	} else {
1044 		elf_entry = loc->elf_ex.e_entry;
1045 		if (BAD_ADDR(elf_entry)) {
1046 			retval = -EINVAL;
1047 			goto out_free_dentry;
1048 		}
1049 	}
1050 
1051 	kfree(interp_elf_phdata);
1052 	kfree(elf_phdata);
1053 
1054 	set_binfmt(&elf_format);
1055 
1056 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1057 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1058 	if (retval < 0)
1059 		goto out;
1060 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1061 
1062 	retval = create_elf_tables(bprm, &loc->elf_ex,
1063 			  load_addr, interp_load_addr);
1064 	if (retval < 0)
1065 		goto out;
1066 	/* N.B. passed_fileno might not be initialized? */
1067 	current->mm->end_code = end_code;
1068 	current->mm->start_code = start_code;
1069 	current->mm->start_data = start_data;
1070 	current->mm->end_data = end_data;
1071 	current->mm->start_stack = bprm->p;
1072 
1073 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1074 		current->mm->brk = current->mm->start_brk =
1075 			arch_randomize_brk(current->mm);
1076 #ifdef compat_brk_randomized
1077 		current->brk_randomized = 1;
1078 #endif
1079 	}
1080 
1081 	if (current->personality & MMAP_PAGE_ZERO) {
1082 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1083 		   and some applications "depend" upon this behavior.
1084 		   Since we do not have the power to recompile these, we
1085 		   emulate the SVr4 behavior. Sigh. */
1086 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1087 				MAP_FIXED | MAP_PRIVATE, 0);
1088 	}
1089 
1090 #ifdef ELF_PLAT_INIT
1091 	/*
1092 	 * The ABI may specify that certain registers be set up in special
1093 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1094 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1095 	 * that the e_entry field is the address of the function descriptor
1096 	 * for the startup routine, rather than the address of the startup
1097 	 * routine itself.  This macro performs whatever initialization to
1098 	 * the regs structure is required as well as any relocations to the
1099 	 * function descriptor entries when executing dynamically links apps.
1100 	 */
1101 	ELF_PLAT_INIT(regs, reloc_func_desc);
1102 #endif
1103 
1104 	start_thread(regs, elf_entry, bprm->p);
1105 	retval = 0;
1106 out:
1107 	kfree(loc);
1108 out_ret:
1109 	return retval;
1110 
1111 	/* error cleanup */
1112 out_free_dentry:
1113 	kfree(interp_elf_phdata);
1114 	allow_write_access(interpreter);
1115 	if (interpreter)
1116 		fput(interpreter);
1117 out_free_interp:
1118 	kfree(elf_interpreter);
1119 out_free_ph:
1120 	kfree(elf_phdata);
1121 	goto out;
1122 }
1123 
1124 #ifdef CONFIG_USELIB
1125 /* This is really simpleminded and specialized - we are loading an
1126    a.out library that is given an ELF header. */
1127 static int load_elf_library(struct file *file)
1128 {
1129 	struct elf_phdr *elf_phdata;
1130 	struct elf_phdr *eppnt;
1131 	unsigned long elf_bss, bss, len;
1132 	int retval, error, i, j;
1133 	struct elfhdr elf_ex;
1134 
1135 	error = -ENOEXEC;
1136 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1137 	if (retval != sizeof(elf_ex))
1138 		goto out;
1139 
1140 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1141 		goto out;
1142 
1143 	/* First of all, some simple consistency checks */
1144 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1145 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1146 		goto out;
1147 
1148 	/* Now read in all of the header information */
1149 
1150 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1151 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1152 
1153 	error = -ENOMEM;
1154 	elf_phdata = kmalloc(j, GFP_KERNEL);
1155 	if (!elf_phdata)
1156 		goto out;
1157 
1158 	eppnt = elf_phdata;
1159 	error = -ENOEXEC;
1160 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1161 	if (retval != j)
1162 		goto out_free_ph;
1163 
1164 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1165 		if ((eppnt + i)->p_type == PT_LOAD)
1166 			j++;
1167 	if (j != 1)
1168 		goto out_free_ph;
1169 
1170 	while (eppnt->p_type != PT_LOAD)
1171 		eppnt++;
1172 
1173 	/* Now use mmap to map the library into memory. */
1174 	error = vm_mmap(file,
1175 			ELF_PAGESTART(eppnt->p_vaddr),
1176 			(eppnt->p_filesz +
1177 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1178 			PROT_READ | PROT_WRITE | PROT_EXEC,
1179 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1180 			(eppnt->p_offset -
1181 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1182 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1183 		goto out_free_ph;
1184 
1185 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1186 	if (padzero(elf_bss)) {
1187 		error = -EFAULT;
1188 		goto out_free_ph;
1189 	}
1190 
1191 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1192 			    ELF_MIN_ALIGN - 1);
1193 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1194 	if (bss > len) {
1195 		error = vm_brk(len, bss - len);
1196 		if (error)
1197 			goto out_free_ph;
1198 	}
1199 	error = 0;
1200 
1201 out_free_ph:
1202 	kfree(elf_phdata);
1203 out:
1204 	return error;
1205 }
1206 #endif /* #ifdef CONFIG_USELIB */
1207 
1208 #ifdef CONFIG_ELF_CORE
1209 /*
1210  * ELF core dumper
1211  *
1212  * Modelled on fs/exec.c:aout_core_dump()
1213  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1214  */
1215 
1216 /*
1217  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1218  * that are useful for post-mortem analysis are included in every core dump.
1219  * In that way we ensure that the core dump is fully interpretable later
1220  * without matching up the same kernel and hardware config to see what PC values
1221  * meant. These special mappings include - vDSO, vsyscall, and other
1222  * architecture specific mappings
1223  */
1224 static bool always_dump_vma(struct vm_area_struct *vma)
1225 {
1226 	/* Any vsyscall mappings? */
1227 	if (vma == get_gate_vma(vma->vm_mm))
1228 		return true;
1229 
1230 	/*
1231 	 * Assume that all vmas with a .name op should always be dumped.
1232 	 * If this changes, a new vm_ops field can easily be added.
1233 	 */
1234 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1235 		return true;
1236 
1237 	/*
1238 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1239 	 * such as vDSO sections.
1240 	 */
1241 	if (arch_vma_name(vma))
1242 		return true;
1243 
1244 	return false;
1245 }
1246 
1247 /*
1248  * Decide what to dump of a segment, part, all or none.
1249  */
1250 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1251 				   unsigned long mm_flags)
1252 {
1253 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1254 
1255 	/* always dump the vdso and vsyscall sections */
1256 	if (always_dump_vma(vma))
1257 		goto whole;
1258 
1259 	if (vma->vm_flags & VM_DONTDUMP)
1260 		return 0;
1261 
1262 	/* support for DAX */
1263 	if (vma_is_dax(vma)) {
1264 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1265 			goto whole;
1266 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1267 			goto whole;
1268 		return 0;
1269 	}
1270 
1271 	/* Hugetlb memory check */
1272 	if (vma->vm_flags & VM_HUGETLB) {
1273 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1274 			goto whole;
1275 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1276 			goto whole;
1277 		return 0;
1278 	}
1279 
1280 	/* Do not dump I/O mapped devices or special mappings */
1281 	if (vma->vm_flags & VM_IO)
1282 		return 0;
1283 
1284 	/* By default, dump shared memory if mapped from an anonymous file. */
1285 	if (vma->vm_flags & VM_SHARED) {
1286 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1287 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1288 			goto whole;
1289 		return 0;
1290 	}
1291 
1292 	/* Dump segments that have been written to.  */
1293 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1294 		goto whole;
1295 	if (vma->vm_file == NULL)
1296 		return 0;
1297 
1298 	if (FILTER(MAPPED_PRIVATE))
1299 		goto whole;
1300 
1301 	/*
1302 	 * If this looks like the beginning of a DSO or executable mapping,
1303 	 * check for an ELF header.  If we find one, dump the first page to
1304 	 * aid in determining what was mapped here.
1305 	 */
1306 	if (FILTER(ELF_HEADERS) &&
1307 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1308 		u32 __user *header = (u32 __user *) vma->vm_start;
1309 		u32 word;
1310 		mm_segment_t fs = get_fs();
1311 		/*
1312 		 * Doing it this way gets the constant folded by GCC.
1313 		 */
1314 		union {
1315 			u32 cmp;
1316 			char elfmag[SELFMAG];
1317 		} magic;
1318 		BUILD_BUG_ON(SELFMAG != sizeof word);
1319 		magic.elfmag[EI_MAG0] = ELFMAG0;
1320 		magic.elfmag[EI_MAG1] = ELFMAG1;
1321 		magic.elfmag[EI_MAG2] = ELFMAG2;
1322 		magic.elfmag[EI_MAG3] = ELFMAG3;
1323 		/*
1324 		 * Switch to the user "segment" for get_user(),
1325 		 * then put back what elf_core_dump() had in place.
1326 		 */
1327 		set_fs(USER_DS);
1328 		if (unlikely(get_user(word, header)))
1329 			word = 0;
1330 		set_fs(fs);
1331 		if (word == magic.cmp)
1332 			return PAGE_SIZE;
1333 	}
1334 
1335 #undef	FILTER
1336 
1337 	return 0;
1338 
1339 whole:
1340 	return vma->vm_end - vma->vm_start;
1341 }
1342 
1343 /* An ELF note in memory */
1344 struct memelfnote
1345 {
1346 	const char *name;
1347 	int type;
1348 	unsigned int datasz;
1349 	void *data;
1350 };
1351 
1352 static int notesize(struct memelfnote *en)
1353 {
1354 	int sz;
1355 
1356 	sz = sizeof(struct elf_note);
1357 	sz += roundup(strlen(en->name) + 1, 4);
1358 	sz += roundup(en->datasz, 4);
1359 
1360 	return sz;
1361 }
1362 
1363 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1364 {
1365 	struct elf_note en;
1366 	en.n_namesz = strlen(men->name) + 1;
1367 	en.n_descsz = men->datasz;
1368 	en.n_type = men->type;
1369 
1370 	return dump_emit(cprm, &en, sizeof(en)) &&
1371 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1372 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1373 }
1374 
1375 static void fill_elf_header(struct elfhdr *elf, int segs,
1376 			    u16 machine, u32 flags)
1377 {
1378 	memset(elf, 0, sizeof(*elf));
1379 
1380 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1381 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1382 	elf->e_ident[EI_DATA] = ELF_DATA;
1383 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1384 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1385 
1386 	elf->e_type = ET_CORE;
1387 	elf->e_machine = machine;
1388 	elf->e_version = EV_CURRENT;
1389 	elf->e_phoff = sizeof(struct elfhdr);
1390 	elf->e_flags = flags;
1391 	elf->e_ehsize = sizeof(struct elfhdr);
1392 	elf->e_phentsize = sizeof(struct elf_phdr);
1393 	elf->e_phnum = segs;
1394 
1395 	return;
1396 }
1397 
1398 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1399 {
1400 	phdr->p_type = PT_NOTE;
1401 	phdr->p_offset = offset;
1402 	phdr->p_vaddr = 0;
1403 	phdr->p_paddr = 0;
1404 	phdr->p_filesz = sz;
1405 	phdr->p_memsz = 0;
1406 	phdr->p_flags = 0;
1407 	phdr->p_align = 0;
1408 	return;
1409 }
1410 
1411 static void fill_note(struct memelfnote *note, const char *name, int type,
1412 		unsigned int sz, void *data)
1413 {
1414 	note->name = name;
1415 	note->type = type;
1416 	note->datasz = sz;
1417 	note->data = data;
1418 	return;
1419 }
1420 
1421 /*
1422  * fill up all the fields in prstatus from the given task struct, except
1423  * registers which need to be filled up separately.
1424  */
1425 static void fill_prstatus(struct elf_prstatus *prstatus,
1426 		struct task_struct *p, long signr)
1427 {
1428 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1429 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1430 	prstatus->pr_sighold = p->blocked.sig[0];
1431 	rcu_read_lock();
1432 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1433 	rcu_read_unlock();
1434 	prstatus->pr_pid = task_pid_vnr(p);
1435 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1436 	prstatus->pr_sid = task_session_vnr(p);
1437 	if (thread_group_leader(p)) {
1438 		struct task_cputime cputime;
1439 
1440 		/*
1441 		 * This is the record for the group leader.  It shows the
1442 		 * group-wide total, not its individual thread total.
1443 		 */
1444 		thread_group_cputime(p, &cputime);
1445 		prstatus->pr_utime = ns_to_timeval(cputime.utime);
1446 		prstatus->pr_stime = ns_to_timeval(cputime.stime);
1447 	} else {
1448 		u64 utime, stime;
1449 
1450 		task_cputime(p, &utime, &stime);
1451 		prstatus->pr_utime = ns_to_timeval(utime);
1452 		prstatus->pr_stime = ns_to_timeval(stime);
1453 	}
1454 
1455 	prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1456 	prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1457 }
1458 
1459 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1460 		       struct mm_struct *mm)
1461 {
1462 	const struct cred *cred;
1463 	unsigned int i, len;
1464 
1465 	/* first copy the parameters from user space */
1466 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1467 
1468 	len = mm->arg_end - mm->arg_start;
1469 	if (len >= ELF_PRARGSZ)
1470 		len = ELF_PRARGSZ-1;
1471 	if (copy_from_user(&psinfo->pr_psargs,
1472 		           (const char __user *)mm->arg_start, len))
1473 		return -EFAULT;
1474 	for(i = 0; i < len; i++)
1475 		if (psinfo->pr_psargs[i] == 0)
1476 			psinfo->pr_psargs[i] = ' ';
1477 	psinfo->pr_psargs[len] = 0;
1478 
1479 	rcu_read_lock();
1480 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1481 	rcu_read_unlock();
1482 	psinfo->pr_pid = task_pid_vnr(p);
1483 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1484 	psinfo->pr_sid = task_session_vnr(p);
1485 
1486 	i = p->state ? ffz(~p->state) + 1 : 0;
1487 	psinfo->pr_state = i;
1488 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1489 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1490 	psinfo->pr_nice = task_nice(p);
1491 	psinfo->pr_flag = p->flags;
1492 	rcu_read_lock();
1493 	cred = __task_cred(p);
1494 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1495 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1496 	rcu_read_unlock();
1497 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1498 
1499 	return 0;
1500 }
1501 
1502 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1503 {
1504 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1505 	int i = 0;
1506 	do
1507 		i += 2;
1508 	while (auxv[i - 2] != AT_NULL);
1509 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1510 }
1511 
1512 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1513 		const siginfo_t *siginfo)
1514 {
1515 	mm_segment_t old_fs = get_fs();
1516 	set_fs(KERNEL_DS);
1517 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1518 	set_fs(old_fs);
1519 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1520 }
1521 
1522 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1523 /*
1524  * Format of NT_FILE note:
1525  *
1526  * long count     -- how many files are mapped
1527  * long page_size -- units for file_ofs
1528  * array of [COUNT] elements of
1529  *   long start
1530  *   long end
1531  *   long file_ofs
1532  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1533  */
1534 static int fill_files_note(struct memelfnote *note)
1535 {
1536 	struct vm_area_struct *vma;
1537 	unsigned count, size, names_ofs, remaining, n;
1538 	user_long_t *data;
1539 	user_long_t *start_end_ofs;
1540 	char *name_base, *name_curpos;
1541 
1542 	/* *Estimated* file count and total data size needed */
1543 	count = current->mm->map_count;
1544 	size = count * 64;
1545 
1546 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1547  alloc:
1548 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1549 		return -EINVAL;
1550 	size = round_up(size, PAGE_SIZE);
1551 	data = vmalloc(size);
1552 	if (!data)
1553 		return -ENOMEM;
1554 
1555 	start_end_ofs = data + 2;
1556 	name_base = name_curpos = ((char *)data) + names_ofs;
1557 	remaining = size - names_ofs;
1558 	count = 0;
1559 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1560 		struct file *file;
1561 		const char *filename;
1562 
1563 		file = vma->vm_file;
1564 		if (!file)
1565 			continue;
1566 		filename = file_path(file, name_curpos, remaining);
1567 		if (IS_ERR(filename)) {
1568 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1569 				vfree(data);
1570 				size = size * 5 / 4;
1571 				goto alloc;
1572 			}
1573 			continue;
1574 		}
1575 
1576 		/* file_path() fills at the end, move name down */
1577 		/* n = strlen(filename) + 1: */
1578 		n = (name_curpos + remaining) - filename;
1579 		remaining = filename - name_curpos;
1580 		memmove(name_curpos, filename, n);
1581 		name_curpos += n;
1582 
1583 		*start_end_ofs++ = vma->vm_start;
1584 		*start_end_ofs++ = vma->vm_end;
1585 		*start_end_ofs++ = vma->vm_pgoff;
1586 		count++;
1587 	}
1588 
1589 	/* Now we know exact count of files, can store it */
1590 	data[0] = count;
1591 	data[1] = PAGE_SIZE;
1592 	/*
1593 	 * Count usually is less than current->mm->map_count,
1594 	 * we need to move filenames down.
1595 	 */
1596 	n = current->mm->map_count - count;
1597 	if (n != 0) {
1598 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1599 		memmove(name_base - shift_bytes, name_base,
1600 			name_curpos - name_base);
1601 		name_curpos -= shift_bytes;
1602 	}
1603 
1604 	size = name_curpos - (char *)data;
1605 	fill_note(note, "CORE", NT_FILE, size, data);
1606 	return 0;
1607 }
1608 
1609 #ifdef CORE_DUMP_USE_REGSET
1610 #include <linux/regset.h>
1611 
1612 struct elf_thread_core_info {
1613 	struct elf_thread_core_info *next;
1614 	struct task_struct *task;
1615 	struct elf_prstatus prstatus;
1616 	struct memelfnote notes[0];
1617 };
1618 
1619 struct elf_note_info {
1620 	struct elf_thread_core_info *thread;
1621 	struct memelfnote psinfo;
1622 	struct memelfnote signote;
1623 	struct memelfnote auxv;
1624 	struct memelfnote files;
1625 	user_siginfo_t csigdata;
1626 	size_t size;
1627 	int thread_notes;
1628 };
1629 
1630 /*
1631  * When a regset has a writeback hook, we call it on each thread before
1632  * dumping user memory.  On register window machines, this makes sure the
1633  * user memory backing the register data is up to date before we read it.
1634  */
1635 static void do_thread_regset_writeback(struct task_struct *task,
1636 				       const struct user_regset *regset)
1637 {
1638 	if (regset->writeback)
1639 		regset->writeback(task, regset, 1);
1640 }
1641 
1642 #ifndef PRSTATUS_SIZE
1643 #define PRSTATUS_SIZE(S, R) sizeof(S)
1644 #endif
1645 
1646 #ifndef SET_PR_FPVALID
1647 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1648 #endif
1649 
1650 static int fill_thread_core_info(struct elf_thread_core_info *t,
1651 				 const struct user_regset_view *view,
1652 				 long signr, size_t *total)
1653 {
1654 	unsigned int i;
1655 	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1656 
1657 	/*
1658 	 * NT_PRSTATUS is the one special case, because the regset data
1659 	 * goes into the pr_reg field inside the note contents, rather
1660 	 * than being the whole note contents.  We fill the reset in here.
1661 	 * We assume that regset 0 is NT_PRSTATUS.
1662 	 */
1663 	fill_prstatus(&t->prstatus, t->task, signr);
1664 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1665 				    &t->prstatus.pr_reg, NULL);
1666 
1667 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1668 		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1669 	*total += notesize(&t->notes[0]);
1670 
1671 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1672 
1673 	/*
1674 	 * Each other regset might generate a note too.  For each regset
1675 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1676 	 * all zero and we'll know to skip writing it later.
1677 	 */
1678 	for (i = 1; i < view->n; ++i) {
1679 		const struct user_regset *regset = &view->regsets[i];
1680 		do_thread_regset_writeback(t->task, regset);
1681 		if (regset->core_note_type && regset->get &&
1682 		    (!regset->active || regset->active(t->task, regset))) {
1683 			int ret;
1684 			size_t size = regset->n * regset->size;
1685 			void *data = kmalloc(size, GFP_KERNEL);
1686 			if (unlikely(!data))
1687 				return 0;
1688 			ret = regset->get(t->task, regset,
1689 					  0, size, data, NULL);
1690 			if (unlikely(ret))
1691 				kfree(data);
1692 			else {
1693 				if (regset->core_note_type != NT_PRFPREG)
1694 					fill_note(&t->notes[i], "LINUX",
1695 						  regset->core_note_type,
1696 						  size, data);
1697 				else {
1698 					SET_PR_FPVALID(&t->prstatus,
1699 							1, regset_size);
1700 					fill_note(&t->notes[i], "CORE",
1701 						  NT_PRFPREG, size, data);
1702 				}
1703 				*total += notesize(&t->notes[i]);
1704 			}
1705 		}
1706 	}
1707 
1708 	return 1;
1709 }
1710 
1711 static int fill_note_info(struct elfhdr *elf, int phdrs,
1712 			  struct elf_note_info *info,
1713 			  const siginfo_t *siginfo, struct pt_regs *regs)
1714 {
1715 	struct task_struct *dump_task = current;
1716 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1717 	struct elf_thread_core_info *t;
1718 	struct elf_prpsinfo *psinfo;
1719 	struct core_thread *ct;
1720 	unsigned int i;
1721 
1722 	info->size = 0;
1723 	info->thread = NULL;
1724 
1725 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1726 	if (psinfo == NULL) {
1727 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1728 		return 0;
1729 	}
1730 
1731 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1732 
1733 	/*
1734 	 * Figure out how many notes we're going to need for each thread.
1735 	 */
1736 	info->thread_notes = 0;
1737 	for (i = 0; i < view->n; ++i)
1738 		if (view->regsets[i].core_note_type != 0)
1739 			++info->thread_notes;
1740 
1741 	/*
1742 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1743 	 * since it is our one special case.
1744 	 */
1745 	if (unlikely(info->thread_notes == 0) ||
1746 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1747 		WARN_ON(1);
1748 		return 0;
1749 	}
1750 
1751 	/*
1752 	 * Initialize the ELF file header.
1753 	 */
1754 	fill_elf_header(elf, phdrs,
1755 			view->e_machine, view->e_flags);
1756 
1757 	/*
1758 	 * Allocate a structure for each thread.
1759 	 */
1760 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1761 		t = kzalloc(offsetof(struct elf_thread_core_info,
1762 				     notes[info->thread_notes]),
1763 			    GFP_KERNEL);
1764 		if (unlikely(!t))
1765 			return 0;
1766 
1767 		t->task = ct->task;
1768 		if (ct->task == dump_task || !info->thread) {
1769 			t->next = info->thread;
1770 			info->thread = t;
1771 		} else {
1772 			/*
1773 			 * Make sure to keep the original task at
1774 			 * the head of the list.
1775 			 */
1776 			t->next = info->thread->next;
1777 			info->thread->next = t;
1778 		}
1779 	}
1780 
1781 	/*
1782 	 * Now fill in each thread's information.
1783 	 */
1784 	for (t = info->thread; t != NULL; t = t->next)
1785 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1786 			return 0;
1787 
1788 	/*
1789 	 * Fill in the two process-wide notes.
1790 	 */
1791 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1792 	info->size += notesize(&info->psinfo);
1793 
1794 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1795 	info->size += notesize(&info->signote);
1796 
1797 	fill_auxv_note(&info->auxv, current->mm);
1798 	info->size += notesize(&info->auxv);
1799 
1800 	if (fill_files_note(&info->files) == 0)
1801 		info->size += notesize(&info->files);
1802 
1803 	return 1;
1804 }
1805 
1806 static size_t get_note_info_size(struct elf_note_info *info)
1807 {
1808 	return info->size;
1809 }
1810 
1811 /*
1812  * Write all the notes for each thread.  When writing the first thread, the
1813  * process-wide notes are interleaved after the first thread-specific note.
1814  */
1815 static int write_note_info(struct elf_note_info *info,
1816 			   struct coredump_params *cprm)
1817 {
1818 	bool first = true;
1819 	struct elf_thread_core_info *t = info->thread;
1820 
1821 	do {
1822 		int i;
1823 
1824 		if (!writenote(&t->notes[0], cprm))
1825 			return 0;
1826 
1827 		if (first && !writenote(&info->psinfo, cprm))
1828 			return 0;
1829 		if (first && !writenote(&info->signote, cprm))
1830 			return 0;
1831 		if (first && !writenote(&info->auxv, cprm))
1832 			return 0;
1833 		if (first && info->files.data &&
1834 				!writenote(&info->files, cprm))
1835 			return 0;
1836 
1837 		for (i = 1; i < info->thread_notes; ++i)
1838 			if (t->notes[i].data &&
1839 			    !writenote(&t->notes[i], cprm))
1840 				return 0;
1841 
1842 		first = false;
1843 		t = t->next;
1844 	} while (t);
1845 
1846 	return 1;
1847 }
1848 
1849 static void free_note_info(struct elf_note_info *info)
1850 {
1851 	struct elf_thread_core_info *threads = info->thread;
1852 	while (threads) {
1853 		unsigned int i;
1854 		struct elf_thread_core_info *t = threads;
1855 		threads = t->next;
1856 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1857 		for (i = 1; i < info->thread_notes; ++i)
1858 			kfree(t->notes[i].data);
1859 		kfree(t);
1860 	}
1861 	kfree(info->psinfo.data);
1862 	vfree(info->files.data);
1863 }
1864 
1865 #else
1866 
1867 /* Here is the structure in which status of each thread is captured. */
1868 struct elf_thread_status
1869 {
1870 	struct list_head list;
1871 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1872 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1873 	struct task_struct *thread;
1874 #ifdef ELF_CORE_COPY_XFPREGS
1875 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1876 #endif
1877 	struct memelfnote notes[3];
1878 	int num_notes;
1879 };
1880 
1881 /*
1882  * In order to add the specific thread information for the elf file format,
1883  * we need to keep a linked list of every threads pr_status and then create
1884  * a single section for them in the final core file.
1885  */
1886 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1887 {
1888 	int sz = 0;
1889 	struct task_struct *p = t->thread;
1890 	t->num_notes = 0;
1891 
1892 	fill_prstatus(&t->prstatus, p, signr);
1893 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1894 
1895 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1896 		  &(t->prstatus));
1897 	t->num_notes++;
1898 	sz += notesize(&t->notes[0]);
1899 
1900 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1901 								&t->fpu))) {
1902 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1903 			  &(t->fpu));
1904 		t->num_notes++;
1905 		sz += notesize(&t->notes[1]);
1906 	}
1907 
1908 #ifdef ELF_CORE_COPY_XFPREGS
1909 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1910 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1911 			  sizeof(t->xfpu), &t->xfpu);
1912 		t->num_notes++;
1913 		sz += notesize(&t->notes[2]);
1914 	}
1915 #endif
1916 	return sz;
1917 }
1918 
1919 struct elf_note_info {
1920 	struct memelfnote *notes;
1921 	struct memelfnote *notes_files;
1922 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1923 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1924 	struct list_head thread_list;
1925 	elf_fpregset_t *fpu;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 	elf_fpxregset_t *xfpu;
1928 #endif
1929 	user_siginfo_t csigdata;
1930 	int thread_status_size;
1931 	int numnote;
1932 };
1933 
1934 static int elf_note_info_init(struct elf_note_info *info)
1935 {
1936 	memset(info, 0, sizeof(*info));
1937 	INIT_LIST_HEAD(&info->thread_list);
1938 
1939 	/* Allocate space for ELF notes */
1940 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1941 	if (!info->notes)
1942 		return 0;
1943 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1944 	if (!info->psinfo)
1945 		return 0;
1946 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1947 	if (!info->prstatus)
1948 		return 0;
1949 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1950 	if (!info->fpu)
1951 		return 0;
1952 #ifdef ELF_CORE_COPY_XFPREGS
1953 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1954 	if (!info->xfpu)
1955 		return 0;
1956 #endif
1957 	return 1;
1958 }
1959 
1960 static int fill_note_info(struct elfhdr *elf, int phdrs,
1961 			  struct elf_note_info *info,
1962 			  const siginfo_t *siginfo, struct pt_regs *regs)
1963 {
1964 	struct list_head *t;
1965 	struct core_thread *ct;
1966 	struct elf_thread_status *ets;
1967 
1968 	if (!elf_note_info_init(info))
1969 		return 0;
1970 
1971 	for (ct = current->mm->core_state->dumper.next;
1972 					ct; ct = ct->next) {
1973 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1974 		if (!ets)
1975 			return 0;
1976 
1977 		ets->thread = ct->task;
1978 		list_add(&ets->list, &info->thread_list);
1979 	}
1980 
1981 	list_for_each(t, &info->thread_list) {
1982 		int sz;
1983 
1984 		ets = list_entry(t, struct elf_thread_status, list);
1985 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1986 		info->thread_status_size += sz;
1987 	}
1988 	/* now collect the dump for the current */
1989 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1990 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1991 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1992 
1993 	/* Set up header */
1994 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1995 
1996 	/*
1997 	 * Set up the notes in similar form to SVR4 core dumps made
1998 	 * with info from their /proc.
1999 	 */
2000 
2001 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2002 		  sizeof(*info->prstatus), info->prstatus);
2003 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2004 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2005 		  sizeof(*info->psinfo), info->psinfo);
2006 
2007 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2008 	fill_auxv_note(info->notes + 3, current->mm);
2009 	info->numnote = 4;
2010 
2011 	if (fill_files_note(info->notes + info->numnote) == 0) {
2012 		info->notes_files = info->notes + info->numnote;
2013 		info->numnote++;
2014 	}
2015 
2016 	/* Try to dump the FPU. */
2017 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2018 							       info->fpu);
2019 	if (info->prstatus->pr_fpvalid)
2020 		fill_note(info->notes + info->numnote++,
2021 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2022 #ifdef ELF_CORE_COPY_XFPREGS
2023 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2024 		fill_note(info->notes + info->numnote++,
2025 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2026 			  sizeof(*info->xfpu), info->xfpu);
2027 #endif
2028 
2029 	return 1;
2030 }
2031 
2032 static size_t get_note_info_size(struct elf_note_info *info)
2033 {
2034 	int sz = 0;
2035 	int i;
2036 
2037 	for (i = 0; i < info->numnote; i++)
2038 		sz += notesize(info->notes + i);
2039 
2040 	sz += info->thread_status_size;
2041 
2042 	return sz;
2043 }
2044 
2045 static int write_note_info(struct elf_note_info *info,
2046 			   struct coredump_params *cprm)
2047 {
2048 	int i;
2049 	struct list_head *t;
2050 
2051 	for (i = 0; i < info->numnote; i++)
2052 		if (!writenote(info->notes + i, cprm))
2053 			return 0;
2054 
2055 	/* write out the thread status notes section */
2056 	list_for_each(t, &info->thread_list) {
2057 		struct elf_thread_status *tmp =
2058 				list_entry(t, struct elf_thread_status, list);
2059 
2060 		for (i = 0; i < tmp->num_notes; i++)
2061 			if (!writenote(&tmp->notes[i], cprm))
2062 				return 0;
2063 	}
2064 
2065 	return 1;
2066 }
2067 
2068 static void free_note_info(struct elf_note_info *info)
2069 {
2070 	while (!list_empty(&info->thread_list)) {
2071 		struct list_head *tmp = info->thread_list.next;
2072 		list_del(tmp);
2073 		kfree(list_entry(tmp, struct elf_thread_status, list));
2074 	}
2075 
2076 	/* Free data possibly allocated by fill_files_note(): */
2077 	if (info->notes_files)
2078 		vfree(info->notes_files->data);
2079 
2080 	kfree(info->prstatus);
2081 	kfree(info->psinfo);
2082 	kfree(info->notes);
2083 	kfree(info->fpu);
2084 #ifdef ELF_CORE_COPY_XFPREGS
2085 	kfree(info->xfpu);
2086 #endif
2087 }
2088 
2089 #endif
2090 
2091 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2092 					struct vm_area_struct *gate_vma)
2093 {
2094 	struct vm_area_struct *ret = tsk->mm->mmap;
2095 
2096 	if (ret)
2097 		return ret;
2098 	return gate_vma;
2099 }
2100 /*
2101  * Helper function for iterating across a vma list.  It ensures that the caller
2102  * will visit `gate_vma' prior to terminating the search.
2103  */
2104 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2105 					struct vm_area_struct *gate_vma)
2106 {
2107 	struct vm_area_struct *ret;
2108 
2109 	ret = this_vma->vm_next;
2110 	if (ret)
2111 		return ret;
2112 	if (this_vma == gate_vma)
2113 		return NULL;
2114 	return gate_vma;
2115 }
2116 
2117 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2118 			     elf_addr_t e_shoff, int segs)
2119 {
2120 	elf->e_shoff = e_shoff;
2121 	elf->e_shentsize = sizeof(*shdr4extnum);
2122 	elf->e_shnum = 1;
2123 	elf->e_shstrndx = SHN_UNDEF;
2124 
2125 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2126 
2127 	shdr4extnum->sh_type = SHT_NULL;
2128 	shdr4extnum->sh_size = elf->e_shnum;
2129 	shdr4extnum->sh_link = elf->e_shstrndx;
2130 	shdr4extnum->sh_info = segs;
2131 }
2132 
2133 /*
2134  * Actual dumper
2135  *
2136  * This is a two-pass process; first we find the offsets of the bits,
2137  * and then they are actually written out.  If we run out of core limit
2138  * we just truncate.
2139  */
2140 static int elf_core_dump(struct coredump_params *cprm)
2141 {
2142 	int has_dumped = 0;
2143 	mm_segment_t fs;
2144 	int segs, i;
2145 	size_t vma_data_size = 0;
2146 	struct vm_area_struct *vma, *gate_vma;
2147 	struct elfhdr *elf = NULL;
2148 	loff_t offset = 0, dataoff;
2149 	struct elf_note_info info = { };
2150 	struct elf_phdr *phdr4note = NULL;
2151 	struct elf_shdr *shdr4extnum = NULL;
2152 	Elf_Half e_phnum;
2153 	elf_addr_t e_shoff;
2154 	elf_addr_t *vma_filesz = NULL;
2155 
2156 	/*
2157 	 * We no longer stop all VM operations.
2158 	 *
2159 	 * This is because those proceses that could possibly change map_count
2160 	 * or the mmap / vma pages are now blocked in do_exit on current
2161 	 * finishing this core dump.
2162 	 *
2163 	 * Only ptrace can touch these memory addresses, but it doesn't change
2164 	 * the map_count or the pages allocated. So no possibility of crashing
2165 	 * exists while dumping the mm->vm_next areas to the core file.
2166 	 */
2167 
2168 	/* alloc memory for large data structures: too large to be on stack */
2169 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2170 	if (!elf)
2171 		goto out;
2172 	/*
2173 	 * The number of segs are recored into ELF header as 16bit value.
2174 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2175 	 */
2176 	segs = current->mm->map_count;
2177 	segs += elf_core_extra_phdrs();
2178 
2179 	gate_vma = get_gate_vma(current->mm);
2180 	if (gate_vma != NULL)
2181 		segs++;
2182 
2183 	/* for notes section */
2184 	segs++;
2185 
2186 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2187 	 * this, kernel supports extended numbering. Have a look at
2188 	 * include/linux/elf.h for further information. */
2189 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2190 
2191 	/*
2192 	 * Collect all the non-memory information about the process for the
2193 	 * notes.  This also sets up the file header.
2194 	 */
2195 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2196 		goto cleanup;
2197 
2198 	has_dumped = 1;
2199 
2200 	fs = get_fs();
2201 	set_fs(KERNEL_DS);
2202 
2203 	offset += sizeof(*elf);				/* Elf header */
2204 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2205 
2206 	/* Write notes phdr entry */
2207 	{
2208 		size_t sz = get_note_info_size(&info);
2209 
2210 		sz += elf_coredump_extra_notes_size();
2211 
2212 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2213 		if (!phdr4note)
2214 			goto end_coredump;
2215 
2216 		fill_elf_note_phdr(phdr4note, sz, offset);
2217 		offset += sz;
2218 	}
2219 
2220 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2221 
2222 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2223 		goto end_coredump;
2224 	vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2225 	if (!vma_filesz)
2226 		goto end_coredump;
2227 
2228 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2229 			vma = next_vma(vma, gate_vma)) {
2230 		unsigned long dump_size;
2231 
2232 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2233 		vma_filesz[i++] = dump_size;
2234 		vma_data_size += dump_size;
2235 	}
2236 
2237 	offset += vma_data_size;
2238 	offset += elf_core_extra_data_size();
2239 	e_shoff = offset;
2240 
2241 	if (e_phnum == PN_XNUM) {
2242 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2243 		if (!shdr4extnum)
2244 			goto end_coredump;
2245 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2246 	}
2247 
2248 	offset = dataoff;
2249 
2250 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2251 		goto end_coredump;
2252 
2253 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2254 		goto end_coredump;
2255 
2256 	/* Write program headers for segments dump */
2257 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2258 			vma = next_vma(vma, gate_vma)) {
2259 		struct elf_phdr phdr;
2260 
2261 		phdr.p_type = PT_LOAD;
2262 		phdr.p_offset = offset;
2263 		phdr.p_vaddr = vma->vm_start;
2264 		phdr.p_paddr = 0;
2265 		phdr.p_filesz = vma_filesz[i++];
2266 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2267 		offset += phdr.p_filesz;
2268 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2269 		if (vma->vm_flags & VM_WRITE)
2270 			phdr.p_flags |= PF_W;
2271 		if (vma->vm_flags & VM_EXEC)
2272 			phdr.p_flags |= PF_X;
2273 		phdr.p_align = ELF_EXEC_PAGESIZE;
2274 
2275 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2276 			goto end_coredump;
2277 	}
2278 
2279 	if (!elf_core_write_extra_phdrs(cprm, offset))
2280 		goto end_coredump;
2281 
2282  	/* write out the notes section */
2283 	if (!write_note_info(&info, cprm))
2284 		goto end_coredump;
2285 
2286 	if (elf_coredump_extra_notes_write(cprm))
2287 		goto end_coredump;
2288 
2289 	/* Align to page */
2290 	if (!dump_skip(cprm, dataoff - cprm->pos))
2291 		goto end_coredump;
2292 
2293 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2294 			vma = next_vma(vma, gate_vma)) {
2295 		unsigned long addr;
2296 		unsigned long end;
2297 
2298 		end = vma->vm_start + vma_filesz[i++];
2299 
2300 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2301 			struct page *page;
2302 			int stop;
2303 
2304 			page = get_dump_page(addr);
2305 			if (page) {
2306 				void *kaddr = kmap(page);
2307 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2308 				kunmap(page);
2309 				put_page(page);
2310 			} else
2311 				stop = !dump_skip(cprm, PAGE_SIZE);
2312 			if (stop)
2313 				goto end_coredump;
2314 		}
2315 	}
2316 	dump_truncate(cprm);
2317 
2318 	if (!elf_core_write_extra_data(cprm))
2319 		goto end_coredump;
2320 
2321 	if (e_phnum == PN_XNUM) {
2322 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2323 			goto end_coredump;
2324 	}
2325 
2326 end_coredump:
2327 	set_fs(fs);
2328 
2329 cleanup:
2330 	free_note_info(&info);
2331 	kfree(shdr4extnum);
2332 	vfree(vma_filesz);
2333 	kfree(phdr4note);
2334 	kfree(elf);
2335 out:
2336 	return has_dumped;
2337 }
2338 
2339 #endif		/* CONFIG_ELF_CORE */
2340 
2341 static int __init init_elf_binfmt(void)
2342 {
2343 	register_binfmt(&elf_format);
2344 	return 0;
2345 }
2346 
2347 static void __exit exit_elf_binfmt(void)
2348 {
2349 	/* Remove the COFF and ELF loaders. */
2350 	unregister_binfmt(&elf_format);
2351 }
2352 
2353 core_initcall(init_elf_binfmt);
2354 module_exit(exit_elf_binfmt);
2355 MODULE_LICENSE("GPL");
2356