xref: /openbmc/linux/fs/binfmt_elf.c (revision 4161b450)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40 
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47 
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50 				int, int, unsigned long);
51 
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57 
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump	NULL
66 #endif
67 
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN	PAGE_SIZE
72 #endif
73 
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS	0
76 #endif
77 
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81 
82 static struct linux_binfmt elf_format = {
83 	.module		= THIS_MODULE,
84 	.load_binary	= load_elf_binary,
85 	.load_shlib	= load_elf_library,
86 	.core_dump	= elf_core_dump,
87 	.min_coredump	= ELF_EXEC_PAGESIZE,
88 };
89 
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91 
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94 	start = ELF_PAGEALIGN(start);
95 	end = ELF_PAGEALIGN(end);
96 	if (end > start) {
97 		unsigned long addr;
98 		addr = vm_brk(start, end - start);
99 		if (BAD_ADDR(addr))
100 			return addr;
101 	}
102 	current->mm->start_brk = current->mm->brk = end;
103 	return 0;
104 }
105 
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113 	unsigned long nbyte;
114 
115 	nbyte = ELF_PAGEOFFSET(elf_bss);
116 	if (nbyte) {
117 		nbyte = ELF_MIN_ALIGN - nbyte;
118 		if (clear_user((void __user *) elf_bss, nbyte))
119 			return -EFAULT;
120 	}
121 	return 0;
122 }
123 
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131 	old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135 	(((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138 
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147 
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150 		unsigned long load_addr, unsigned long interp_load_addr)
151 {
152 	unsigned long p = bprm->p;
153 	int argc = bprm->argc;
154 	int envc = bprm->envc;
155 	elf_addr_t __user *argv;
156 	elf_addr_t __user *envp;
157 	elf_addr_t __user *sp;
158 	elf_addr_t __user *u_platform;
159 	elf_addr_t __user *u_base_platform;
160 	elf_addr_t __user *u_rand_bytes;
161 	const char *k_platform = ELF_PLATFORM;
162 	const char *k_base_platform = ELF_BASE_PLATFORM;
163 	unsigned char k_rand_bytes[16];
164 	int items;
165 	elf_addr_t *elf_info;
166 	int ei_index = 0;
167 	const struct cred *cred = current_cred();
168 	struct vm_area_struct *vma;
169 
170 	/*
171 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
172 	 * evictions by the processes running on the same package. One
173 	 * thing we can do is to shuffle the initial stack for them.
174 	 */
175 
176 	p = arch_align_stack(p);
177 
178 	/*
179 	 * If this architecture has a platform capability string, copy it
180 	 * to userspace.  In some cases (Sparc), this info is impossible
181 	 * for userspace to get any other way, in others (i386) it is
182 	 * merely difficult.
183 	 */
184 	u_platform = NULL;
185 	if (k_platform) {
186 		size_t len = strlen(k_platform) + 1;
187 
188 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 		if (__copy_to_user(u_platform, k_platform, len))
190 			return -EFAULT;
191 	}
192 
193 	/*
194 	 * If this architecture has a "base" platform capability
195 	 * string, copy it to userspace.
196 	 */
197 	u_base_platform = NULL;
198 	if (k_base_platform) {
199 		size_t len = strlen(k_base_platform) + 1;
200 
201 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 		if (__copy_to_user(u_base_platform, k_base_platform, len))
203 			return -EFAULT;
204 	}
205 
206 	/*
207 	 * Generate 16 random bytes for userspace PRNG seeding.
208 	 */
209 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210 	u_rand_bytes = (elf_addr_t __user *)
211 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
212 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213 		return -EFAULT;
214 
215 	/* Create the ELF interpreter info */
216 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
217 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219 	do { \
220 		elf_info[ei_index++] = id; \
221 		elf_info[ei_index++] = val; \
222 	} while (0)
223 
224 #ifdef ARCH_DLINFO
225 	/*
226 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
227 	 * AUXV.
228 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229 	 * ARCH_DLINFO changes
230 	 */
231 	ARCH_DLINFO;
232 #endif
233 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
240 	NEW_AUX_ENT(AT_FLAGS, 0);
241 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252 	if (k_platform) {
253 		NEW_AUX_ENT(AT_PLATFORM,
254 			    (elf_addr_t)(unsigned long)u_platform);
255 	}
256 	if (k_base_platform) {
257 		NEW_AUX_ENT(AT_BASE_PLATFORM,
258 			    (elf_addr_t)(unsigned long)u_base_platform);
259 	}
260 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262 	}
263 #undef NEW_AUX_ENT
264 	/* AT_NULL is zero; clear the rest too */
265 	memset(&elf_info[ei_index], 0,
266 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267 
268 	/* And advance past the AT_NULL entry.  */
269 	ei_index += 2;
270 
271 	sp = STACK_ADD(p, ei_index);
272 
273 	items = (argc + 1) + (envc + 1) + 1;
274 	bprm->p = STACK_ROUND(sp, items);
275 
276 	/* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281 	sp = (elf_addr_t __user *)bprm->p;
282 #endif
283 
284 
285 	/*
286 	 * Grow the stack manually; some architectures have a limit on how
287 	 * far ahead a user-space access may be in order to grow the stack.
288 	 */
289 	vma = find_extend_vma(current->mm, bprm->p);
290 	if (!vma)
291 		return -EFAULT;
292 
293 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
294 	if (__put_user(argc, sp++))
295 		return -EFAULT;
296 	argv = sp;
297 	envp = argv + argc + 1;
298 
299 	/* Populate argv and envp */
300 	p = current->mm->arg_end = current->mm->arg_start;
301 	while (argc-- > 0) {
302 		size_t len;
303 		if (__put_user((elf_addr_t)p, argv++))
304 			return -EFAULT;
305 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306 		if (!len || len > MAX_ARG_STRLEN)
307 			return -EINVAL;
308 		p += len;
309 	}
310 	if (__put_user(0, argv))
311 		return -EFAULT;
312 	current->mm->arg_end = current->mm->env_start = p;
313 	while (envc-- > 0) {
314 		size_t len;
315 		if (__put_user((elf_addr_t)p, envp++))
316 			return -EFAULT;
317 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318 		if (!len || len > MAX_ARG_STRLEN)
319 			return -EINVAL;
320 		p += len;
321 	}
322 	if (__put_user(0, envp))
323 		return -EFAULT;
324 	current->mm->env_end = p;
325 
326 	/* Put the elf_info on the stack in the right place.  */
327 	sp = (elf_addr_t __user *)envp + 1;
328 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329 		return -EFAULT;
330 	return 0;
331 }
332 
333 #ifndef elf_map
334 
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336 		struct elf_phdr *eppnt, int prot, int type,
337 		unsigned long total_size)
338 {
339 	unsigned long map_addr;
340 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342 	addr = ELF_PAGESTART(addr);
343 	size = ELF_PAGEALIGN(size);
344 
345 	/* mmap() will return -EINVAL if given a zero size, but a
346 	 * segment with zero filesize is perfectly valid */
347 	if (!size)
348 		return addr;
349 
350 	/*
351 	* total_size is the size of the ELF (interpreter) image.
352 	* The _first_ mmap needs to know the full size, otherwise
353 	* randomization might put this image into an overlapping
354 	* position with the ELF binary image. (since size < total_size)
355 	* So we first map the 'big' image - and unmap the remainder at
356 	* the end. (which unmap is needed for ELF images with holes.)
357 	*/
358 	if (total_size) {
359 		total_size = ELF_PAGEALIGN(total_size);
360 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361 		if (!BAD_ADDR(map_addr))
362 			vm_munmap(map_addr+size, total_size-size);
363 	} else
364 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
365 
366 	return(map_addr);
367 }
368 
369 #endif /* !elf_map */
370 
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373 	int i, first_idx = -1, last_idx = -1;
374 
375 	for (i = 0; i < nr; i++) {
376 		if (cmds[i].p_type == PT_LOAD) {
377 			last_idx = i;
378 			if (first_idx == -1)
379 				first_idx = i;
380 		}
381 	}
382 	if (first_idx == -1)
383 		return 0;
384 
385 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388 
389 /**
390  * load_elf_phdrs() - load ELF program headers
391  * @elf_ex:   ELF header of the binary whose program headers should be loaded
392  * @elf_file: the opened ELF binary file
393  *
394  * Loads ELF program headers from the binary file elf_file, which has the ELF
395  * header pointed to by elf_ex, into a newly allocated array. The caller is
396  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
397  */
398 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
399 				       struct file *elf_file)
400 {
401 	struct elf_phdr *elf_phdata = NULL;
402 	int retval, size, err = -1;
403 
404 	/*
405 	 * If the size of this structure has changed, then punt, since
406 	 * we will be doing the wrong thing.
407 	 */
408 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
409 		goto out;
410 
411 	/* Sanity check the number of program headers... */
412 	if (elf_ex->e_phnum < 1 ||
413 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
414 		goto out;
415 
416 	/* ...and their total size. */
417 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
418 	if (size > ELF_MIN_ALIGN)
419 		goto out;
420 
421 	elf_phdata = kmalloc(size, GFP_KERNEL);
422 	if (!elf_phdata)
423 		goto out;
424 
425 	/* Read in the program headers */
426 	retval = kernel_read(elf_file, elf_ex->e_phoff,
427 			     (char *)elf_phdata, size);
428 	if (retval != size) {
429 		err = (retval < 0) ? retval : -EIO;
430 		goto out;
431 	}
432 
433 	/* Success! */
434 	err = 0;
435 out:
436 	if (err) {
437 		kfree(elf_phdata);
438 		elf_phdata = NULL;
439 	}
440 	return elf_phdata;
441 }
442 
443 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
444 
445 /**
446  * struct arch_elf_state - arch-specific ELF loading state
447  *
448  * This structure is used to preserve architecture specific data during
449  * the loading of an ELF file, throughout the checking of architecture
450  * specific ELF headers & through to the point where the ELF load is
451  * known to be proceeding (ie. SET_PERSONALITY).
452  *
453  * This implementation is a dummy for architectures which require no
454  * specific state.
455  */
456 struct arch_elf_state {
457 };
458 
459 #define INIT_ARCH_ELF_STATE {}
460 
461 /**
462  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
463  * @ehdr:	The main ELF header
464  * @phdr:	The program header to check
465  * @elf:	The open ELF file
466  * @is_interp:	True if the phdr is from the interpreter of the ELF being
467  *		loaded, else false.
468  * @state:	Architecture-specific state preserved throughout the process
469  *		of loading the ELF.
470  *
471  * Inspects the program header phdr to validate its correctness and/or
472  * suitability for the system. Called once per ELF program header in the
473  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
474  * interpreter.
475  *
476  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
477  *         with that return code.
478  */
479 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
480 				   struct elf_phdr *phdr,
481 				   struct file *elf, bool is_interp,
482 				   struct arch_elf_state *state)
483 {
484 	/* Dummy implementation, always proceed */
485 	return 0;
486 }
487 
488 /**
489  * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
490  * @ehdr:	The main ELF header
491  * @has_interp:	True if the ELF has an interpreter, else false.
492  * @state:	Architecture-specific state preserved throughout the process
493  *		of loading the ELF.
494  *
495  * Provides a final opportunity for architecture code to reject the loading
496  * of the ELF & cause an exec syscall to return an error. This is called after
497  * all program headers to be checked by arch_elf_pt_proc have been.
498  *
499  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
500  *         with that return code.
501  */
502 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
503 				 struct arch_elf_state *state)
504 {
505 	/* Dummy implementation, always proceed */
506 	return 0;
507 }
508 
509 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
510 
511 /* This is much more generalized than the library routine read function,
512    so we keep this separate.  Technically the library read function
513    is only provided so that we can read a.out libraries that have
514    an ELF header */
515 
516 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
517 		struct file *interpreter, unsigned long *interp_map_addr,
518 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
519 {
520 	struct elf_phdr *eppnt;
521 	unsigned long load_addr = 0;
522 	int load_addr_set = 0;
523 	unsigned long last_bss = 0, elf_bss = 0;
524 	unsigned long error = ~0UL;
525 	unsigned long total_size;
526 	int i;
527 
528 	/* First of all, some simple consistency checks */
529 	if (interp_elf_ex->e_type != ET_EXEC &&
530 	    interp_elf_ex->e_type != ET_DYN)
531 		goto out;
532 	if (!elf_check_arch(interp_elf_ex))
533 		goto out;
534 	if (!interpreter->f_op->mmap)
535 		goto out;
536 
537 	total_size = total_mapping_size(interp_elf_phdata,
538 					interp_elf_ex->e_phnum);
539 	if (!total_size) {
540 		error = -EINVAL;
541 		goto out;
542 	}
543 
544 	eppnt = interp_elf_phdata;
545 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
546 		if (eppnt->p_type == PT_LOAD) {
547 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
548 			int elf_prot = 0;
549 			unsigned long vaddr = 0;
550 			unsigned long k, map_addr;
551 
552 			if (eppnt->p_flags & PF_R)
553 		    		elf_prot = PROT_READ;
554 			if (eppnt->p_flags & PF_W)
555 				elf_prot |= PROT_WRITE;
556 			if (eppnt->p_flags & PF_X)
557 				elf_prot |= PROT_EXEC;
558 			vaddr = eppnt->p_vaddr;
559 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
560 				elf_type |= MAP_FIXED;
561 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
562 				load_addr = -vaddr;
563 
564 			map_addr = elf_map(interpreter, load_addr + vaddr,
565 					eppnt, elf_prot, elf_type, total_size);
566 			total_size = 0;
567 			if (!*interp_map_addr)
568 				*interp_map_addr = map_addr;
569 			error = map_addr;
570 			if (BAD_ADDR(map_addr))
571 				goto out;
572 
573 			if (!load_addr_set &&
574 			    interp_elf_ex->e_type == ET_DYN) {
575 				load_addr = map_addr - ELF_PAGESTART(vaddr);
576 				load_addr_set = 1;
577 			}
578 
579 			/*
580 			 * Check to see if the section's size will overflow the
581 			 * allowed task size. Note that p_filesz must always be
582 			 * <= p_memsize so it's only necessary to check p_memsz.
583 			 */
584 			k = load_addr + eppnt->p_vaddr;
585 			if (BAD_ADDR(k) ||
586 			    eppnt->p_filesz > eppnt->p_memsz ||
587 			    eppnt->p_memsz > TASK_SIZE ||
588 			    TASK_SIZE - eppnt->p_memsz < k) {
589 				error = -ENOMEM;
590 				goto out;
591 			}
592 
593 			/*
594 			 * Find the end of the file mapping for this phdr, and
595 			 * keep track of the largest address we see for this.
596 			 */
597 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
598 			if (k > elf_bss)
599 				elf_bss = k;
600 
601 			/*
602 			 * Do the same thing for the memory mapping - between
603 			 * elf_bss and last_bss is the bss section.
604 			 */
605 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
606 			if (k > last_bss)
607 				last_bss = k;
608 		}
609 	}
610 
611 	if (last_bss > elf_bss) {
612 		/*
613 		 * Now fill out the bss section.  First pad the last page up
614 		 * to the page boundary, and then perform a mmap to make sure
615 		 * that there are zero-mapped pages up to and including the
616 		 * last bss page.
617 		 */
618 		if (padzero(elf_bss)) {
619 			error = -EFAULT;
620 			goto out;
621 		}
622 
623 		/* What we have mapped so far */
624 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
625 
626 		/* Map the last of the bss segment */
627 		error = vm_brk(elf_bss, last_bss - elf_bss);
628 		if (BAD_ADDR(error))
629 			goto out;
630 	}
631 
632 	error = load_addr;
633 out:
634 	return error;
635 }
636 
637 /*
638  * These are the functions used to load ELF style executables and shared
639  * libraries.  There is no binary dependent code anywhere else.
640  */
641 
642 #ifndef STACK_RND_MASK
643 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
644 #endif
645 
646 static unsigned long randomize_stack_top(unsigned long stack_top)
647 {
648 	unsigned int random_variable = 0;
649 
650 	if ((current->flags & PF_RANDOMIZE) &&
651 		!(current->personality & ADDR_NO_RANDOMIZE)) {
652 		random_variable = get_random_int() & STACK_RND_MASK;
653 		random_variable <<= PAGE_SHIFT;
654 	}
655 #ifdef CONFIG_STACK_GROWSUP
656 	return PAGE_ALIGN(stack_top) + random_variable;
657 #else
658 	return PAGE_ALIGN(stack_top) - random_variable;
659 #endif
660 }
661 
662 static int load_elf_binary(struct linux_binprm *bprm)
663 {
664 	struct file *interpreter = NULL; /* to shut gcc up */
665  	unsigned long load_addr = 0, load_bias = 0;
666 	int load_addr_set = 0;
667 	char * elf_interpreter = NULL;
668 	unsigned long error;
669 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
670 	unsigned long elf_bss, elf_brk;
671 	int retval, i;
672 	unsigned long elf_entry;
673 	unsigned long interp_load_addr = 0;
674 	unsigned long start_code, end_code, start_data, end_data;
675 	unsigned long reloc_func_desc __maybe_unused = 0;
676 	int executable_stack = EXSTACK_DEFAULT;
677 	struct pt_regs *regs = current_pt_regs();
678 	struct {
679 		struct elfhdr elf_ex;
680 		struct elfhdr interp_elf_ex;
681 	} *loc;
682 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
683 
684 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
685 	if (!loc) {
686 		retval = -ENOMEM;
687 		goto out_ret;
688 	}
689 
690 	/* Get the exec-header */
691 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
692 
693 	retval = -ENOEXEC;
694 	/* First of all, some simple consistency checks */
695 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
696 		goto out;
697 
698 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
699 		goto out;
700 	if (!elf_check_arch(&loc->elf_ex))
701 		goto out;
702 	if (!bprm->file->f_op->mmap)
703 		goto out;
704 
705 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
706 	if (!elf_phdata)
707 		goto out;
708 
709 	elf_ppnt = elf_phdata;
710 	elf_bss = 0;
711 	elf_brk = 0;
712 
713 	start_code = ~0UL;
714 	end_code = 0;
715 	start_data = 0;
716 	end_data = 0;
717 
718 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
719 		if (elf_ppnt->p_type == PT_INTERP) {
720 			/* This is the program interpreter used for
721 			 * shared libraries - for now assume that this
722 			 * is an a.out format binary
723 			 */
724 			retval = -ENOEXEC;
725 			if (elf_ppnt->p_filesz > PATH_MAX ||
726 			    elf_ppnt->p_filesz < 2)
727 				goto out_free_ph;
728 
729 			retval = -ENOMEM;
730 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
731 						  GFP_KERNEL);
732 			if (!elf_interpreter)
733 				goto out_free_ph;
734 
735 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
736 					     elf_interpreter,
737 					     elf_ppnt->p_filesz);
738 			if (retval != elf_ppnt->p_filesz) {
739 				if (retval >= 0)
740 					retval = -EIO;
741 				goto out_free_interp;
742 			}
743 			/* make sure path is NULL terminated */
744 			retval = -ENOEXEC;
745 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
746 				goto out_free_interp;
747 
748 			interpreter = open_exec(elf_interpreter);
749 			retval = PTR_ERR(interpreter);
750 			if (IS_ERR(interpreter))
751 				goto out_free_interp;
752 
753 			/*
754 			 * If the binary is not readable then enforce
755 			 * mm->dumpable = 0 regardless of the interpreter's
756 			 * permissions.
757 			 */
758 			would_dump(bprm, interpreter);
759 
760 			retval = kernel_read(interpreter, 0, bprm->buf,
761 					     BINPRM_BUF_SIZE);
762 			if (retval != BINPRM_BUF_SIZE) {
763 				if (retval >= 0)
764 					retval = -EIO;
765 				goto out_free_dentry;
766 			}
767 
768 			/* Get the exec headers */
769 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
770 			break;
771 		}
772 		elf_ppnt++;
773 	}
774 
775 	elf_ppnt = elf_phdata;
776 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
777 		switch (elf_ppnt->p_type) {
778 		case PT_GNU_STACK:
779 			if (elf_ppnt->p_flags & PF_X)
780 				executable_stack = EXSTACK_ENABLE_X;
781 			else
782 				executable_stack = EXSTACK_DISABLE_X;
783 			break;
784 
785 		case PT_LOPROC ... PT_HIPROC:
786 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
787 						  bprm->file, false,
788 						  &arch_state);
789 			if (retval)
790 				goto out_free_dentry;
791 			break;
792 		}
793 
794 	/* Some simple consistency checks for the interpreter */
795 	if (elf_interpreter) {
796 		retval = -ELIBBAD;
797 		/* Not an ELF interpreter */
798 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
799 			goto out_free_dentry;
800 		/* Verify the interpreter has a valid arch */
801 		if (!elf_check_arch(&loc->interp_elf_ex))
802 			goto out_free_dentry;
803 
804 		/* Load the interpreter program headers */
805 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
806 						   interpreter);
807 		if (!interp_elf_phdata)
808 			goto out_free_dentry;
809 
810 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
811 		elf_ppnt = interp_elf_phdata;
812 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
813 			switch (elf_ppnt->p_type) {
814 			case PT_LOPROC ... PT_HIPROC:
815 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
816 							  elf_ppnt, interpreter,
817 							  true, &arch_state);
818 				if (retval)
819 					goto out_free_dentry;
820 				break;
821 			}
822 	}
823 
824 	/*
825 	 * Allow arch code to reject the ELF at this point, whilst it's
826 	 * still possible to return an error to the code that invoked
827 	 * the exec syscall.
828 	 */
829 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
830 	if (retval)
831 		goto out_free_dentry;
832 
833 	/* Flush all traces of the currently running executable */
834 	retval = flush_old_exec(bprm);
835 	if (retval)
836 		goto out_free_dentry;
837 
838 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
839 	   may depend on the personality.  */
840 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
841 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
842 		current->personality |= READ_IMPLIES_EXEC;
843 
844 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
845 		current->flags |= PF_RANDOMIZE;
846 
847 	setup_new_exec(bprm);
848 
849 	/* Do this so that we can load the interpreter, if need be.  We will
850 	   change some of these later */
851 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
852 				 executable_stack);
853 	if (retval < 0)
854 		goto out_free_dentry;
855 
856 	current->mm->start_stack = bprm->p;
857 
858 	/* Now we do a little grungy work by mmapping the ELF image into
859 	   the correct location in memory. */
860 	for(i = 0, elf_ppnt = elf_phdata;
861 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
862 		int elf_prot = 0, elf_flags;
863 		unsigned long k, vaddr;
864 
865 		if (elf_ppnt->p_type != PT_LOAD)
866 			continue;
867 
868 		if (unlikely (elf_brk > elf_bss)) {
869 			unsigned long nbyte;
870 
871 			/* There was a PT_LOAD segment with p_memsz > p_filesz
872 			   before this one. Map anonymous pages, if needed,
873 			   and clear the area.  */
874 			retval = set_brk(elf_bss + load_bias,
875 					 elf_brk + load_bias);
876 			if (retval)
877 				goto out_free_dentry;
878 			nbyte = ELF_PAGEOFFSET(elf_bss);
879 			if (nbyte) {
880 				nbyte = ELF_MIN_ALIGN - nbyte;
881 				if (nbyte > elf_brk - elf_bss)
882 					nbyte = elf_brk - elf_bss;
883 				if (clear_user((void __user *)elf_bss +
884 							load_bias, nbyte)) {
885 					/*
886 					 * This bss-zeroing can fail if the ELF
887 					 * file specifies odd protections. So
888 					 * we don't check the return value
889 					 */
890 				}
891 			}
892 		}
893 
894 		if (elf_ppnt->p_flags & PF_R)
895 			elf_prot |= PROT_READ;
896 		if (elf_ppnt->p_flags & PF_W)
897 			elf_prot |= PROT_WRITE;
898 		if (elf_ppnt->p_flags & PF_X)
899 			elf_prot |= PROT_EXEC;
900 
901 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
902 
903 		vaddr = elf_ppnt->p_vaddr;
904 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
905 			elf_flags |= MAP_FIXED;
906 		} else if (loc->elf_ex.e_type == ET_DYN) {
907 			/* Try and get dynamic programs out of the way of the
908 			 * default mmap base, as well as whatever program they
909 			 * might try to exec.  This is because the brk will
910 			 * follow the loader, and is not movable.  */
911 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
912 			/* Memory randomization might have been switched off
913 			 * in runtime via sysctl or explicit setting of
914 			 * personality flags.
915 			 * If that is the case, retain the original non-zero
916 			 * load_bias value in order to establish proper
917 			 * non-randomized mappings.
918 			 */
919 			if (current->flags & PF_RANDOMIZE)
920 				load_bias = 0;
921 			else
922 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
923 #else
924 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
925 #endif
926 		}
927 
928 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
929 				elf_prot, elf_flags, 0);
930 		if (BAD_ADDR(error)) {
931 			retval = IS_ERR((void *)error) ?
932 				PTR_ERR((void*)error) : -EINVAL;
933 			goto out_free_dentry;
934 		}
935 
936 		if (!load_addr_set) {
937 			load_addr_set = 1;
938 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
939 			if (loc->elf_ex.e_type == ET_DYN) {
940 				load_bias += error -
941 				             ELF_PAGESTART(load_bias + vaddr);
942 				load_addr += load_bias;
943 				reloc_func_desc = load_bias;
944 			}
945 		}
946 		k = elf_ppnt->p_vaddr;
947 		if (k < start_code)
948 			start_code = k;
949 		if (start_data < k)
950 			start_data = k;
951 
952 		/*
953 		 * Check to see if the section's size will overflow the
954 		 * allowed task size. Note that p_filesz must always be
955 		 * <= p_memsz so it is only necessary to check p_memsz.
956 		 */
957 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
958 		    elf_ppnt->p_memsz > TASK_SIZE ||
959 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
960 			/* set_brk can never work. Avoid overflows. */
961 			retval = -EINVAL;
962 			goto out_free_dentry;
963 		}
964 
965 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
966 
967 		if (k > elf_bss)
968 			elf_bss = k;
969 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
970 			end_code = k;
971 		if (end_data < k)
972 			end_data = k;
973 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
974 		if (k > elf_brk)
975 			elf_brk = k;
976 	}
977 
978 	loc->elf_ex.e_entry += load_bias;
979 	elf_bss += load_bias;
980 	elf_brk += load_bias;
981 	start_code += load_bias;
982 	end_code += load_bias;
983 	start_data += load_bias;
984 	end_data += load_bias;
985 
986 	/* Calling set_brk effectively mmaps the pages that we need
987 	 * for the bss and break sections.  We must do this before
988 	 * mapping in the interpreter, to make sure it doesn't wind
989 	 * up getting placed where the bss needs to go.
990 	 */
991 	retval = set_brk(elf_bss, elf_brk);
992 	if (retval)
993 		goto out_free_dentry;
994 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
995 		retval = -EFAULT; /* Nobody gets to see this, but.. */
996 		goto out_free_dentry;
997 	}
998 
999 	if (elf_interpreter) {
1000 		unsigned long interp_map_addr = 0;
1001 
1002 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1003 					    interpreter,
1004 					    &interp_map_addr,
1005 					    load_bias, interp_elf_phdata);
1006 		if (!IS_ERR((void *)elf_entry)) {
1007 			/*
1008 			 * load_elf_interp() returns relocation
1009 			 * adjustment
1010 			 */
1011 			interp_load_addr = elf_entry;
1012 			elf_entry += loc->interp_elf_ex.e_entry;
1013 		}
1014 		if (BAD_ADDR(elf_entry)) {
1015 			retval = IS_ERR((void *)elf_entry) ?
1016 					(int)elf_entry : -EINVAL;
1017 			goto out_free_dentry;
1018 		}
1019 		reloc_func_desc = interp_load_addr;
1020 
1021 		allow_write_access(interpreter);
1022 		fput(interpreter);
1023 		kfree(elf_interpreter);
1024 	} else {
1025 		elf_entry = loc->elf_ex.e_entry;
1026 		if (BAD_ADDR(elf_entry)) {
1027 			retval = -EINVAL;
1028 			goto out_free_dentry;
1029 		}
1030 	}
1031 
1032 	kfree(interp_elf_phdata);
1033 	kfree(elf_phdata);
1034 
1035 	set_binfmt(&elf_format);
1036 
1037 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1038 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1039 	if (retval < 0)
1040 		goto out;
1041 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1042 
1043 	install_exec_creds(bprm);
1044 	retval = create_elf_tables(bprm, &loc->elf_ex,
1045 			  load_addr, interp_load_addr);
1046 	if (retval < 0)
1047 		goto out;
1048 	/* N.B. passed_fileno might not be initialized? */
1049 	current->mm->end_code = end_code;
1050 	current->mm->start_code = start_code;
1051 	current->mm->start_data = start_data;
1052 	current->mm->end_data = end_data;
1053 	current->mm->start_stack = bprm->p;
1054 
1055 #ifdef arch_randomize_brk
1056 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1057 		current->mm->brk = current->mm->start_brk =
1058 			arch_randomize_brk(current->mm);
1059 #ifdef CONFIG_COMPAT_BRK
1060 		current->brk_randomized = 1;
1061 #endif
1062 	}
1063 #endif
1064 
1065 	if (current->personality & MMAP_PAGE_ZERO) {
1066 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1067 		   and some applications "depend" upon this behavior.
1068 		   Since we do not have the power to recompile these, we
1069 		   emulate the SVr4 behavior. Sigh. */
1070 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1071 				MAP_FIXED | MAP_PRIVATE, 0);
1072 	}
1073 
1074 #ifdef ELF_PLAT_INIT
1075 	/*
1076 	 * The ABI may specify that certain registers be set up in special
1077 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1078 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1079 	 * that the e_entry field is the address of the function descriptor
1080 	 * for the startup routine, rather than the address of the startup
1081 	 * routine itself.  This macro performs whatever initialization to
1082 	 * the regs structure is required as well as any relocations to the
1083 	 * function descriptor entries when executing dynamically links apps.
1084 	 */
1085 	ELF_PLAT_INIT(regs, reloc_func_desc);
1086 #endif
1087 
1088 	start_thread(regs, elf_entry, bprm->p);
1089 	retval = 0;
1090 out:
1091 	kfree(loc);
1092 out_ret:
1093 	return retval;
1094 
1095 	/* error cleanup */
1096 out_free_dentry:
1097 	kfree(interp_elf_phdata);
1098 	allow_write_access(interpreter);
1099 	if (interpreter)
1100 		fput(interpreter);
1101 out_free_interp:
1102 	kfree(elf_interpreter);
1103 out_free_ph:
1104 	kfree(elf_phdata);
1105 	goto out;
1106 }
1107 
1108 #ifdef CONFIG_USELIB
1109 /* This is really simpleminded and specialized - we are loading an
1110    a.out library that is given an ELF header. */
1111 static int load_elf_library(struct file *file)
1112 {
1113 	struct elf_phdr *elf_phdata;
1114 	struct elf_phdr *eppnt;
1115 	unsigned long elf_bss, bss, len;
1116 	int retval, error, i, j;
1117 	struct elfhdr elf_ex;
1118 
1119 	error = -ENOEXEC;
1120 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1121 	if (retval != sizeof(elf_ex))
1122 		goto out;
1123 
1124 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1125 		goto out;
1126 
1127 	/* First of all, some simple consistency checks */
1128 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1129 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1130 		goto out;
1131 
1132 	/* Now read in all of the header information */
1133 
1134 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1135 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1136 
1137 	error = -ENOMEM;
1138 	elf_phdata = kmalloc(j, GFP_KERNEL);
1139 	if (!elf_phdata)
1140 		goto out;
1141 
1142 	eppnt = elf_phdata;
1143 	error = -ENOEXEC;
1144 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1145 	if (retval != j)
1146 		goto out_free_ph;
1147 
1148 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1149 		if ((eppnt + i)->p_type == PT_LOAD)
1150 			j++;
1151 	if (j != 1)
1152 		goto out_free_ph;
1153 
1154 	while (eppnt->p_type != PT_LOAD)
1155 		eppnt++;
1156 
1157 	/* Now use mmap to map the library into memory. */
1158 	error = vm_mmap(file,
1159 			ELF_PAGESTART(eppnt->p_vaddr),
1160 			(eppnt->p_filesz +
1161 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1162 			PROT_READ | PROT_WRITE | PROT_EXEC,
1163 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1164 			(eppnt->p_offset -
1165 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1166 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1167 		goto out_free_ph;
1168 
1169 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1170 	if (padzero(elf_bss)) {
1171 		error = -EFAULT;
1172 		goto out_free_ph;
1173 	}
1174 
1175 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1176 			    ELF_MIN_ALIGN - 1);
1177 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1178 	if (bss > len)
1179 		vm_brk(len, bss - len);
1180 	error = 0;
1181 
1182 out_free_ph:
1183 	kfree(elf_phdata);
1184 out:
1185 	return error;
1186 }
1187 #endif /* #ifdef CONFIG_USELIB */
1188 
1189 #ifdef CONFIG_ELF_CORE
1190 /*
1191  * ELF core dumper
1192  *
1193  * Modelled on fs/exec.c:aout_core_dump()
1194  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1195  */
1196 
1197 /*
1198  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1199  * that are useful for post-mortem analysis are included in every core dump.
1200  * In that way we ensure that the core dump is fully interpretable later
1201  * without matching up the same kernel and hardware config to see what PC values
1202  * meant. These special mappings include - vDSO, vsyscall, and other
1203  * architecture specific mappings
1204  */
1205 static bool always_dump_vma(struct vm_area_struct *vma)
1206 {
1207 	/* Any vsyscall mappings? */
1208 	if (vma == get_gate_vma(vma->vm_mm))
1209 		return true;
1210 
1211 	/*
1212 	 * Assume that all vmas with a .name op should always be dumped.
1213 	 * If this changes, a new vm_ops field can easily be added.
1214 	 */
1215 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1216 		return true;
1217 
1218 	/*
1219 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1220 	 * such as vDSO sections.
1221 	 */
1222 	if (arch_vma_name(vma))
1223 		return true;
1224 
1225 	return false;
1226 }
1227 
1228 /*
1229  * Decide what to dump of a segment, part, all or none.
1230  */
1231 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1232 				   unsigned long mm_flags)
1233 {
1234 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1235 
1236 	/* always dump the vdso and vsyscall sections */
1237 	if (always_dump_vma(vma))
1238 		goto whole;
1239 
1240 	if (vma->vm_flags & VM_DONTDUMP)
1241 		return 0;
1242 
1243 	/* Hugetlb memory check */
1244 	if (vma->vm_flags & VM_HUGETLB) {
1245 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1246 			goto whole;
1247 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1248 			goto whole;
1249 		return 0;
1250 	}
1251 
1252 	/* Do not dump I/O mapped devices or special mappings */
1253 	if (vma->vm_flags & VM_IO)
1254 		return 0;
1255 
1256 	/* By default, dump shared memory if mapped from an anonymous file. */
1257 	if (vma->vm_flags & VM_SHARED) {
1258 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1259 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1260 			goto whole;
1261 		return 0;
1262 	}
1263 
1264 	/* Dump segments that have been written to.  */
1265 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1266 		goto whole;
1267 	if (vma->vm_file == NULL)
1268 		return 0;
1269 
1270 	if (FILTER(MAPPED_PRIVATE))
1271 		goto whole;
1272 
1273 	/*
1274 	 * If this looks like the beginning of a DSO or executable mapping,
1275 	 * check for an ELF header.  If we find one, dump the first page to
1276 	 * aid in determining what was mapped here.
1277 	 */
1278 	if (FILTER(ELF_HEADERS) &&
1279 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1280 		u32 __user *header = (u32 __user *) vma->vm_start;
1281 		u32 word;
1282 		mm_segment_t fs = get_fs();
1283 		/*
1284 		 * Doing it this way gets the constant folded by GCC.
1285 		 */
1286 		union {
1287 			u32 cmp;
1288 			char elfmag[SELFMAG];
1289 		} magic;
1290 		BUILD_BUG_ON(SELFMAG != sizeof word);
1291 		magic.elfmag[EI_MAG0] = ELFMAG0;
1292 		magic.elfmag[EI_MAG1] = ELFMAG1;
1293 		magic.elfmag[EI_MAG2] = ELFMAG2;
1294 		magic.elfmag[EI_MAG3] = ELFMAG3;
1295 		/*
1296 		 * Switch to the user "segment" for get_user(),
1297 		 * then put back what elf_core_dump() had in place.
1298 		 */
1299 		set_fs(USER_DS);
1300 		if (unlikely(get_user(word, header)))
1301 			word = 0;
1302 		set_fs(fs);
1303 		if (word == magic.cmp)
1304 			return PAGE_SIZE;
1305 	}
1306 
1307 #undef	FILTER
1308 
1309 	return 0;
1310 
1311 whole:
1312 	return vma->vm_end - vma->vm_start;
1313 }
1314 
1315 /* An ELF note in memory */
1316 struct memelfnote
1317 {
1318 	const char *name;
1319 	int type;
1320 	unsigned int datasz;
1321 	void *data;
1322 };
1323 
1324 static int notesize(struct memelfnote *en)
1325 {
1326 	int sz;
1327 
1328 	sz = sizeof(struct elf_note);
1329 	sz += roundup(strlen(en->name) + 1, 4);
1330 	sz += roundup(en->datasz, 4);
1331 
1332 	return sz;
1333 }
1334 
1335 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1336 {
1337 	struct elf_note en;
1338 	en.n_namesz = strlen(men->name) + 1;
1339 	en.n_descsz = men->datasz;
1340 	en.n_type = men->type;
1341 
1342 	return dump_emit(cprm, &en, sizeof(en)) &&
1343 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1344 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1345 }
1346 
1347 static void fill_elf_header(struct elfhdr *elf, int segs,
1348 			    u16 machine, u32 flags)
1349 {
1350 	memset(elf, 0, sizeof(*elf));
1351 
1352 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1353 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1354 	elf->e_ident[EI_DATA] = ELF_DATA;
1355 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1356 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1357 
1358 	elf->e_type = ET_CORE;
1359 	elf->e_machine = machine;
1360 	elf->e_version = EV_CURRENT;
1361 	elf->e_phoff = sizeof(struct elfhdr);
1362 	elf->e_flags = flags;
1363 	elf->e_ehsize = sizeof(struct elfhdr);
1364 	elf->e_phentsize = sizeof(struct elf_phdr);
1365 	elf->e_phnum = segs;
1366 
1367 	return;
1368 }
1369 
1370 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1371 {
1372 	phdr->p_type = PT_NOTE;
1373 	phdr->p_offset = offset;
1374 	phdr->p_vaddr = 0;
1375 	phdr->p_paddr = 0;
1376 	phdr->p_filesz = sz;
1377 	phdr->p_memsz = 0;
1378 	phdr->p_flags = 0;
1379 	phdr->p_align = 0;
1380 	return;
1381 }
1382 
1383 static void fill_note(struct memelfnote *note, const char *name, int type,
1384 		unsigned int sz, void *data)
1385 {
1386 	note->name = name;
1387 	note->type = type;
1388 	note->datasz = sz;
1389 	note->data = data;
1390 	return;
1391 }
1392 
1393 /*
1394  * fill up all the fields in prstatus from the given task struct, except
1395  * registers which need to be filled up separately.
1396  */
1397 static void fill_prstatus(struct elf_prstatus *prstatus,
1398 		struct task_struct *p, long signr)
1399 {
1400 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1401 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1402 	prstatus->pr_sighold = p->blocked.sig[0];
1403 	rcu_read_lock();
1404 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1405 	rcu_read_unlock();
1406 	prstatus->pr_pid = task_pid_vnr(p);
1407 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1408 	prstatus->pr_sid = task_session_vnr(p);
1409 	if (thread_group_leader(p)) {
1410 		struct task_cputime cputime;
1411 
1412 		/*
1413 		 * This is the record for the group leader.  It shows the
1414 		 * group-wide total, not its individual thread total.
1415 		 */
1416 		thread_group_cputime(p, &cputime);
1417 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1418 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1419 	} else {
1420 		cputime_t utime, stime;
1421 
1422 		task_cputime(p, &utime, &stime);
1423 		cputime_to_timeval(utime, &prstatus->pr_utime);
1424 		cputime_to_timeval(stime, &prstatus->pr_stime);
1425 	}
1426 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1427 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1428 }
1429 
1430 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1431 		       struct mm_struct *mm)
1432 {
1433 	const struct cred *cred;
1434 	unsigned int i, len;
1435 
1436 	/* first copy the parameters from user space */
1437 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1438 
1439 	len = mm->arg_end - mm->arg_start;
1440 	if (len >= ELF_PRARGSZ)
1441 		len = ELF_PRARGSZ-1;
1442 	if (copy_from_user(&psinfo->pr_psargs,
1443 		           (const char __user *)mm->arg_start, len))
1444 		return -EFAULT;
1445 	for(i = 0; i < len; i++)
1446 		if (psinfo->pr_psargs[i] == 0)
1447 			psinfo->pr_psargs[i] = ' ';
1448 	psinfo->pr_psargs[len] = 0;
1449 
1450 	rcu_read_lock();
1451 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1452 	rcu_read_unlock();
1453 	psinfo->pr_pid = task_pid_vnr(p);
1454 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1455 	psinfo->pr_sid = task_session_vnr(p);
1456 
1457 	i = p->state ? ffz(~p->state) + 1 : 0;
1458 	psinfo->pr_state = i;
1459 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1460 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1461 	psinfo->pr_nice = task_nice(p);
1462 	psinfo->pr_flag = p->flags;
1463 	rcu_read_lock();
1464 	cred = __task_cred(p);
1465 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1466 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1467 	rcu_read_unlock();
1468 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1469 
1470 	return 0;
1471 }
1472 
1473 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1474 {
1475 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1476 	int i = 0;
1477 	do
1478 		i += 2;
1479 	while (auxv[i - 2] != AT_NULL);
1480 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1481 }
1482 
1483 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1484 		const siginfo_t *siginfo)
1485 {
1486 	mm_segment_t old_fs = get_fs();
1487 	set_fs(KERNEL_DS);
1488 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1489 	set_fs(old_fs);
1490 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1491 }
1492 
1493 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1494 /*
1495  * Format of NT_FILE note:
1496  *
1497  * long count     -- how many files are mapped
1498  * long page_size -- units for file_ofs
1499  * array of [COUNT] elements of
1500  *   long start
1501  *   long end
1502  *   long file_ofs
1503  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1504  */
1505 static int fill_files_note(struct memelfnote *note)
1506 {
1507 	struct vm_area_struct *vma;
1508 	unsigned count, size, names_ofs, remaining, n;
1509 	user_long_t *data;
1510 	user_long_t *start_end_ofs;
1511 	char *name_base, *name_curpos;
1512 
1513 	/* *Estimated* file count and total data size needed */
1514 	count = current->mm->map_count;
1515 	size = count * 64;
1516 
1517 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1518  alloc:
1519 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1520 		return -EINVAL;
1521 	size = round_up(size, PAGE_SIZE);
1522 	data = vmalloc(size);
1523 	if (!data)
1524 		return -ENOMEM;
1525 
1526 	start_end_ofs = data + 2;
1527 	name_base = name_curpos = ((char *)data) + names_ofs;
1528 	remaining = size - names_ofs;
1529 	count = 0;
1530 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1531 		struct file *file;
1532 		const char *filename;
1533 
1534 		file = vma->vm_file;
1535 		if (!file)
1536 			continue;
1537 		filename = d_path(&file->f_path, name_curpos, remaining);
1538 		if (IS_ERR(filename)) {
1539 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1540 				vfree(data);
1541 				size = size * 5 / 4;
1542 				goto alloc;
1543 			}
1544 			continue;
1545 		}
1546 
1547 		/* d_path() fills at the end, move name down */
1548 		/* n = strlen(filename) + 1: */
1549 		n = (name_curpos + remaining) - filename;
1550 		remaining = filename - name_curpos;
1551 		memmove(name_curpos, filename, n);
1552 		name_curpos += n;
1553 
1554 		*start_end_ofs++ = vma->vm_start;
1555 		*start_end_ofs++ = vma->vm_end;
1556 		*start_end_ofs++ = vma->vm_pgoff;
1557 		count++;
1558 	}
1559 
1560 	/* Now we know exact count of files, can store it */
1561 	data[0] = count;
1562 	data[1] = PAGE_SIZE;
1563 	/*
1564 	 * Count usually is less than current->mm->map_count,
1565 	 * we need to move filenames down.
1566 	 */
1567 	n = current->mm->map_count - count;
1568 	if (n != 0) {
1569 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1570 		memmove(name_base - shift_bytes, name_base,
1571 			name_curpos - name_base);
1572 		name_curpos -= shift_bytes;
1573 	}
1574 
1575 	size = name_curpos - (char *)data;
1576 	fill_note(note, "CORE", NT_FILE, size, data);
1577 	return 0;
1578 }
1579 
1580 #ifdef CORE_DUMP_USE_REGSET
1581 #include <linux/regset.h>
1582 
1583 struct elf_thread_core_info {
1584 	struct elf_thread_core_info *next;
1585 	struct task_struct *task;
1586 	struct elf_prstatus prstatus;
1587 	struct memelfnote notes[0];
1588 };
1589 
1590 struct elf_note_info {
1591 	struct elf_thread_core_info *thread;
1592 	struct memelfnote psinfo;
1593 	struct memelfnote signote;
1594 	struct memelfnote auxv;
1595 	struct memelfnote files;
1596 	user_siginfo_t csigdata;
1597 	size_t size;
1598 	int thread_notes;
1599 };
1600 
1601 /*
1602  * When a regset has a writeback hook, we call it on each thread before
1603  * dumping user memory.  On register window machines, this makes sure the
1604  * user memory backing the register data is up to date before we read it.
1605  */
1606 static void do_thread_regset_writeback(struct task_struct *task,
1607 				       const struct user_regset *regset)
1608 {
1609 	if (regset->writeback)
1610 		regset->writeback(task, regset, 1);
1611 }
1612 
1613 #ifndef PR_REG_SIZE
1614 #define PR_REG_SIZE(S) sizeof(S)
1615 #endif
1616 
1617 #ifndef PRSTATUS_SIZE
1618 #define PRSTATUS_SIZE(S) sizeof(S)
1619 #endif
1620 
1621 #ifndef PR_REG_PTR
1622 #define PR_REG_PTR(S) (&((S)->pr_reg))
1623 #endif
1624 
1625 #ifndef SET_PR_FPVALID
1626 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1627 #endif
1628 
1629 static int fill_thread_core_info(struct elf_thread_core_info *t,
1630 				 const struct user_regset_view *view,
1631 				 long signr, size_t *total)
1632 {
1633 	unsigned int i;
1634 
1635 	/*
1636 	 * NT_PRSTATUS is the one special case, because the regset data
1637 	 * goes into the pr_reg field inside the note contents, rather
1638 	 * than being the whole note contents.  We fill the reset in here.
1639 	 * We assume that regset 0 is NT_PRSTATUS.
1640 	 */
1641 	fill_prstatus(&t->prstatus, t->task, signr);
1642 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1643 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1644 				    PR_REG_PTR(&t->prstatus), NULL);
1645 
1646 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1647 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1648 	*total += notesize(&t->notes[0]);
1649 
1650 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1651 
1652 	/*
1653 	 * Each other regset might generate a note too.  For each regset
1654 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1655 	 * all zero and we'll know to skip writing it later.
1656 	 */
1657 	for (i = 1; i < view->n; ++i) {
1658 		const struct user_regset *regset = &view->regsets[i];
1659 		do_thread_regset_writeback(t->task, regset);
1660 		if (regset->core_note_type && regset->get &&
1661 		    (!regset->active || regset->active(t->task, regset))) {
1662 			int ret;
1663 			size_t size = regset->n * regset->size;
1664 			void *data = kmalloc(size, GFP_KERNEL);
1665 			if (unlikely(!data))
1666 				return 0;
1667 			ret = regset->get(t->task, regset,
1668 					  0, size, data, NULL);
1669 			if (unlikely(ret))
1670 				kfree(data);
1671 			else {
1672 				if (regset->core_note_type != NT_PRFPREG)
1673 					fill_note(&t->notes[i], "LINUX",
1674 						  regset->core_note_type,
1675 						  size, data);
1676 				else {
1677 					SET_PR_FPVALID(&t->prstatus, 1);
1678 					fill_note(&t->notes[i], "CORE",
1679 						  NT_PRFPREG, size, data);
1680 				}
1681 				*total += notesize(&t->notes[i]);
1682 			}
1683 		}
1684 	}
1685 
1686 	return 1;
1687 }
1688 
1689 static int fill_note_info(struct elfhdr *elf, int phdrs,
1690 			  struct elf_note_info *info,
1691 			  const siginfo_t *siginfo, struct pt_regs *regs)
1692 {
1693 	struct task_struct *dump_task = current;
1694 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1695 	struct elf_thread_core_info *t;
1696 	struct elf_prpsinfo *psinfo;
1697 	struct core_thread *ct;
1698 	unsigned int i;
1699 
1700 	info->size = 0;
1701 	info->thread = NULL;
1702 
1703 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1704 	if (psinfo == NULL) {
1705 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1706 		return 0;
1707 	}
1708 
1709 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1710 
1711 	/*
1712 	 * Figure out how many notes we're going to need for each thread.
1713 	 */
1714 	info->thread_notes = 0;
1715 	for (i = 0; i < view->n; ++i)
1716 		if (view->regsets[i].core_note_type != 0)
1717 			++info->thread_notes;
1718 
1719 	/*
1720 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1721 	 * since it is our one special case.
1722 	 */
1723 	if (unlikely(info->thread_notes == 0) ||
1724 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1725 		WARN_ON(1);
1726 		return 0;
1727 	}
1728 
1729 	/*
1730 	 * Initialize the ELF file header.
1731 	 */
1732 	fill_elf_header(elf, phdrs,
1733 			view->e_machine, view->e_flags);
1734 
1735 	/*
1736 	 * Allocate a structure for each thread.
1737 	 */
1738 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1739 		t = kzalloc(offsetof(struct elf_thread_core_info,
1740 				     notes[info->thread_notes]),
1741 			    GFP_KERNEL);
1742 		if (unlikely(!t))
1743 			return 0;
1744 
1745 		t->task = ct->task;
1746 		if (ct->task == dump_task || !info->thread) {
1747 			t->next = info->thread;
1748 			info->thread = t;
1749 		} else {
1750 			/*
1751 			 * Make sure to keep the original task at
1752 			 * the head of the list.
1753 			 */
1754 			t->next = info->thread->next;
1755 			info->thread->next = t;
1756 		}
1757 	}
1758 
1759 	/*
1760 	 * Now fill in each thread's information.
1761 	 */
1762 	for (t = info->thread; t != NULL; t = t->next)
1763 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1764 			return 0;
1765 
1766 	/*
1767 	 * Fill in the two process-wide notes.
1768 	 */
1769 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1770 	info->size += notesize(&info->psinfo);
1771 
1772 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1773 	info->size += notesize(&info->signote);
1774 
1775 	fill_auxv_note(&info->auxv, current->mm);
1776 	info->size += notesize(&info->auxv);
1777 
1778 	if (fill_files_note(&info->files) == 0)
1779 		info->size += notesize(&info->files);
1780 
1781 	return 1;
1782 }
1783 
1784 static size_t get_note_info_size(struct elf_note_info *info)
1785 {
1786 	return info->size;
1787 }
1788 
1789 /*
1790  * Write all the notes for each thread.  When writing the first thread, the
1791  * process-wide notes are interleaved after the first thread-specific note.
1792  */
1793 static int write_note_info(struct elf_note_info *info,
1794 			   struct coredump_params *cprm)
1795 {
1796 	bool first = true;
1797 	struct elf_thread_core_info *t = info->thread;
1798 
1799 	do {
1800 		int i;
1801 
1802 		if (!writenote(&t->notes[0], cprm))
1803 			return 0;
1804 
1805 		if (first && !writenote(&info->psinfo, cprm))
1806 			return 0;
1807 		if (first && !writenote(&info->signote, cprm))
1808 			return 0;
1809 		if (first && !writenote(&info->auxv, cprm))
1810 			return 0;
1811 		if (first && info->files.data &&
1812 				!writenote(&info->files, cprm))
1813 			return 0;
1814 
1815 		for (i = 1; i < info->thread_notes; ++i)
1816 			if (t->notes[i].data &&
1817 			    !writenote(&t->notes[i], cprm))
1818 				return 0;
1819 
1820 		first = false;
1821 		t = t->next;
1822 	} while (t);
1823 
1824 	return 1;
1825 }
1826 
1827 static void free_note_info(struct elf_note_info *info)
1828 {
1829 	struct elf_thread_core_info *threads = info->thread;
1830 	while (threads) {
1831 		unsigned int i;
1832 		struct elf_thread_core_info *t = threads;
1833 		threads = t->next;
1834 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1835 		for (i = 1; i < info->thread_notes; ++i)
1836 			kfree(t->notes[i].data);
1837 		kfree(t);
1838 	}
1839 	kfree(info->psinfo.data);
1840 	vfree(info->files.data);
1841 }
1842 
1843 #else
1844 
1845 /* Here is the structure in which status of each thread is captured. */
1846 struct elf_thread_status
1847 {
1848 	struct list_head list;
1849 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1850 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1851 	struct task_struct *thread;
1852 #ifdef ELF_CORE_COPY_XFPREGS
1853 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1854 #endif
1855 	struct memelfnote notes[3];
1856 	int num_notes;
1857 };
1858 
1859 /*
1860  * In order to add the specific thread information for the elf file format,
1861  * we need to keep a linked list of every threads pr_status and then create
1862  * a single section for them in the final core file.
1863  */
1864 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1865 {
1866 	int sz = 0;
1867 	struct task_struct *p = t->thread;
1868 	t->num_notes = 0;
1869 
1870 	fill_prstatus(&t->prstatus, p, signr);
1871 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1872 
1873 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1874 		  &(t->prstatus));
1875 	t->num_notes++;
1876 	sz += notesize(&t->notes[0]);
1877 
1878 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1879 								&t->fpu))) {
1880 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1881 			  &(t->fpu));
1882 		t->num_notes++;
1883 		sz += notesize(&t->notes[1]);
1884 	}
1885 
1886 #ifdef ELF_CORE_COPY_XFPREGS
1887 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1888 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1889 			  sizeof(t->xfpu), &t->xfpu);
1890 		t->num_notes++;
1891 		sz += notesize(&t->notes[2]);
1892 	}
1893 #endif
1894 	return sz;
1895 }
1896 
1897 struct elf_note_info {
1898 	struct memelfnote *notes;
1899 	struct memelfnote *notes_files;
1900 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1901 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1902 	struct list_head thread_list;
1903 	elf_fpregset_t *fpu;
1904 #ifdef ELF_CORE_COPY_XFPREGS
1905 	elf_fpxregset_t *xfpu;
1906 #endif
1907 	user_siginfo_t csigdata;
1908 	int thread_status_size;
1909 	int numnote;
1910 };
1911 
1912 static int elf_note_info_init(struct elf_note_info *info)
1913 {
1914 	memset(info, 0, sizeof(*info));
1915 	INIT_LIST_HEAD(&info->thread_list);
1916 
1917 	/* Allocate space for ELF notes */
1918 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1919 	if (!info->notes)
1920 		return 0;
1921 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1922 	if (!info->psinfo)
1923 		return 0;
1924 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1925 	if (!info->prstatus)
1926 		return 0;
1927 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1928 	if (!info->fpu)
1929 		return 0;
1930 #ifdef ELF_CORE_COPY_XFPREGS
1931 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1932 	if (!info->xfpu)
1933 		return 0;
1934 #endif
1935 	return 1;
1936 }
1937 
1938 static int fill_note_info(struct elfhdr *elf, int phdrs,
1939 			  struct elf_note_info *info,
1940 			  const siginfo_t *siginfo, struct pt_regs *regs)
1941 {
1942 	struct list_head *t;
1943 	struct core_thread *ct;
1944 	struct elf_thread_status *ets;
1945 
1946 	if (!elf_note_info_init(info))
1947 		return 0;
1948 
1949 	for (ct = current->mm->core_state->dumper.next;
1950 					ct; ct = ct->next) {
1951 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1952 		if (!ets)
1953 			return 0;
1954 
1955 		ets->thread = ct->task;
1956 		list_add(&ets->list, &info->thread_list);
1957 	}
1958 
1959 	list_for_each(t, &info->thread_list) {
1960 		int sz;
1961 
1962 		ets = list_entry(t, struct elf_thread_status, list);
1963 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1964 		info->thread_status_size += sz;
1965 	}
1966 	/* now collect the dump for the current */
1967 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1968 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1969 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1970 
1971 	/* Set up header */
1972 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1973 
1974 	/*
1975 	 * Set up the notes in similar form to SVR4 core dumps made
1976 	 * with info from their /proc.
1977 	 */
1978 
1979 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1980 		  sizeof(*info->prstatus), info->prstatus);
1981 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1982 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1983 		  sizeof(*info->psinfo), info->psinfo);
1984 
1985 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1986 	fill_auxv_note(info->notes + 3, current->mm);
1987 	info->numnote = 4;
1988 
1989 	if (fill_files_note(info->notes + info->numnote) == 0) {
1990 		info->notes_files = info->notes + info->numnote;
1991 		info->numnote++;
1992 	}
1993 
1994 	/* Try to dump the FPU. */
1995 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1996 							       info->fpu);
1997 	if (info->prstatus->pr_fpvalid)
1998 		fill_note(info->notes + info->numnote++,
1999 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2000 #ifdef ELF_CORE_COPY_XFPREGS
2001 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2002 		fill_note(info->notes + info->numnote++,
2003 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2004 			  sizeof(*info->xfpu), info->xfpu);
2005 #endif
2006 
2007 	return 1;
2008 }
2009 
2010 static size_t get_note_info_size(struct elf_note_info *info)
2011 {
2012 	int sz = 0;
2013 	int i;
2014 
2015 	for (i = 0; i < info->numnote; i++)
2016 		sz += notesize(info->notes + i);
2017 
2018 	sz += info->thread_status_size;
2019 
2020 	return sz;
2021 }
2022 
2023 static int write_note_info(struct elf_note_info *info,
2024 			   struct coredump_params *cprm)
2025 {
2026 	int i;
2027 	struct list_head *t;
2028 
2029 	for (i = 0; i < info->numnote; i++)
2030 		if (!writenote(info->notes + i, cprm))
2031 			return 0;
2032 
2033 	/* write out the thread status notes section */
2034 	list_for_each(t, &info->thread_list) {
2035 		struct elf_thread_status *tmp =
2036 				list_entry(t, struct elf_thread_status, list);
2037 
2038 		for (i = 0; i < tmp->num_notes; i++)
2039 			if (!writenote(&tmp->notes[i], cprm))
2040 				return 0;
2041 	}
2042 
2043 	return 1;
2044 }
2045 
2046 static void free_note_info(struct elf_note_info *info)
2047 {
2048 	while (!list_empty(&info->thread_list)) {
2049 		struct list_head *tmp = info->thread_list.next;
2050 		list_del(tmp);
2051 		kfree(list_entry(tmp, struct elf_thread_status, list));
2052 	}
2053 
2054 	/* Free data possibly allocated by fill_files_note(): */
2055 	if (info->notes_files)
2056 		vfree(info->notes_files->data);
2057 
2058 	kfree(info->prstatus);
2059 	kfree(info->psinfo);
2060 	kfree(info->notes);
2061 	kfree(info->fpu);
2062 #ifdef ELF_CORE_COPY_XFPREGS
2063 	kfree(info->xfpu);
2064 #endif
2065 }
2066 
2067 #endif
2068 
2069 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2070 					struct vm_area_struct *gate_vma)
2071 {
2072 	struct vm_area_struct *ret = tsk->mm->mmap;
2073 
2074 	if (ret)
2075 		return ret;
2076 	return gate_vma;
2077 }
2078 /*
2079  * Helper function for iterating across a vma list.  It ensures that the caller
2080  * will visit `gate_vma' prior to terminating the search.
2081  */
2082 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2083 					struct vm_area_struct *gate_vma)
2084 {
2085 	struct vm_area_struct *ret;
2086 
2087 	ret = this_vma->vm_next;
2088 	if (ret)
2089 		return ret;
2090 	if (this_vma == gate_vma)
2091 		return NULL;
2092 	return gate_vma;
2093 }
2094 
2095 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2096 			     elf_addr_t e_shoff, int segs)
2097 {
2098 	elf->e_shoff = e_shoff;
2099 	elf->e_shentsize = sizeof(*shdr4extnum);
2100 	elf->e_shnum = 1;
2101 	elf->e_shstrndx = SHN_UNDEF;
2102 
2103 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2104 
2105 	shdr4extnum->sh_type = SHT_NULL;
2106 	shdr4extnum->sh_size = elf->e_shnum;
2107 	shdr4extnum->sh_link = elf->e_shstrndx;
2108 	shdr4extnum->sh_info = segs;
2109 }
2110 
2111 /*
2112  * Actual dumper
2113  *
2114  * This is a two-pass process; first we find the offsets of the bits,
2115  * and then they are actually written out.  If we run out of core limit
2116  * we just truncate.
2117  */
2118 static int elf_core_dump(struct coredump_params *cprm)
2119 {
2120 	int has_dumped = 0;
2121 	mm_segment_t fs;
2122 	int segs, i;
2123 	size_t vma_data_size = 0;
2124 	struct vm_area_struct *vma, *gate_vma;
2125 	struct elfhdr *elf = NULL;
2126 	loff_t offset = 0, dataoff;
2127 	struct elf_note_info info = { };
2128 	struct elf_phdr *phdr4note = NULL;
2129 	struct elf_shdr *shdr4extnum = NULL;
2130 	Elf_Half e_phnum;
2131 	elf_addr_t e_shoff;
2132 	elf_addr_t *vma_filesz = NULL;
2133 
2134 	/*
2135 	 * We no longer stop all VM operations.
2136 	 *
2137 	 * This is because those proceses that could possibly change map_count
2138 	 * or the mmap / vma pages are now blocked in do_exit on current
2139 	 * finishing this core dump.
2140 	 *
2141 	 * Only ptrace can touch these memory addresses, but it doesn't change
2142 	 * the map_count or the pages allocated. So no possibility of crashing
2143 	 * exists while dumping the mm->vm_next areas to the core file.
2144 	 */
2145 
2146 	/* alloc memory for large data structures: too large to be on stack */
2147 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2148 	if (!elf)
2149 		goto out;
2150 	/*
2151 	 * The number of segs are recored into ELF header as 16bit value.
2152 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2153 	 */
2154 	segs = current->mm->map_count;
2155 	segs += elf_core_extra_phdrs();
2156 
2157 	gate_vma = get_gate_vma(current->mm);
2158 	if (gate_vma != NULL)
2159 		segs++;
2160 
2161 	/* for notes section */
2162 	segs++;
2163 
2164 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2165 	 * this, kernel supports extended numbering. Have a look at
2166 	 * include/linux/elf.h for further information. */
2167 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2168 
2169 	/*
2170 	 * Collect all the non-memory information about the process for the
2171 	 * notes.  This also sets up the file header.
2172 	 */
2173 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2174 		goto cleanup;
2175 
2176 	has_dumped = 1;
2177 
2178 	fs = get_fs();
2179 	set_fs(KERNEL_DS);
2180 
2181 	offset += sizeof(*elf);				/* Elf header */
2182 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2183 
2184 	/* Write notes phdr entry */
2185 	{
2186 		size_t sz = get_note_info_size(&info);
2187 
2188 		sz += elf_coredump_extra_notes_size();
2189 
2190 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2191 		if (!phdr4note)
2192 			goto end_coredump;
2193 
2194 		fill_elf_note_phdr(phdr4note, sz, offset);
2195 		offset += sz;
2196 	}
2197 
2198 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2199 
2200 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2201 	if (!vma_filesz)
2202 		goto end_coredump;
2203 
2204 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2205 			vma = next_vma(vma, gate_vma)) {
2206 		unsigned long dump_size;
2207 
2208 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2209 		vma_filesz[i++] = dump_size;
2210 		vma_data_size += dump_size;
2211 	}
2212 
2213 	offset += vma_data_size;
2214 	offset += elf_core_extra_data_size();
2215 	e_shoff = offset;
2216 
2217 	if (e_phnum == PN_XNUM) {
2218 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2219 		if (!shdr4extnum)
2220 			goto end_coredump;
2221 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2222 	}
2223 
2224 	offset = dataoff;
2225 
2226 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2227 		goto end_coredump;
2228 
2229 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2230 		goto end_coredump;
2231 
2232 	/* Write program headers for segments dump */
2233 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2234 			vma = next_vma(vma, gate_vma)) {
2235 		struct elf_phdr phdr;
2236 
2237 		phdr.p_type = PT_LOAD;
2238 		phdr.p_offset = offset;
2239 		phdr.p_vaddr = vma->vm_start;
2240 		phdr.p_paddr = 0;
2241 		phdr.p_filesz = vma_filesz[i++];
2242 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2243 		offset += phdr.p_filesz;
2244 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2245 		if (vma->vm_flags & VM_WRITE)
2246 			phdr.p_flags |= PF_W;
2247 		if (vma->vm_flags & VM_EXEC)
2248 			phdr.p_flags |= PF_X;
2249 		phdr.p_align = ELF_EXEC_PAGESIZE;
2250 
2251 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2252 			goto end_coredump;
2253 	}
2254 
2255 	if (!elf_core_write_extra_phdrs(cprm, offset))
2256 		goto end_coredump;
2257 
2258  	/* write out the notes section */
2259 	if (!write_note_info(&info, cprm))
2260 		goto end_coredump;
2261 
2262 	if (elf_coredump_extra_notes_write(cprm))
2263 		goto end_coredump;
2264 
2265 	/* Align to page */
2266 	if (!dump_skip(cprm, dataoff - cprm->written))
2267 		goto end_coredump;
2268 
2269 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2270 			vma = next_vma(vma, gate_vma)) {
2271 		unsigned long addr;
2272 		unsigned long end;
2273 
2274 		end = vma->vm_start + vma_filesz[i++];
2275 
2276 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2277 			struct page *page;
2278 			int stop;
2279 
2280 			page = get_dump_page(addr);
2281 			if (page) {
2282 				void *kaddr = kmap(page);
2283 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2284 				kunmap(page);
2285 				page_cache_release(page);
2286 			} else
2287 				stop = !dump_skip(cprm, PAGE_SIZE);
2288 			if (stop)
2289 				goto end_coredump;
2290 		}
2291 	}
2292 
2293 	if (!elf_core_write_extra_data(cprm))
2294 		goto end_coredump;
2295 
2296 	if (e_phnum == PN_XNUM) {
2297 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2298 			goto end_coredump;
2299 	}
2300 
2301 end_coredump:
2302 	set_fs(fs);
2303 
2304 cleanup:
2305 	free_note_info(&info);
2306 	kfree(shdr4extnum);
2307 	kfree(vma_filesz);
2308 	kfree(phdr4note);
2309 	kfree(elf);
2310 out:
2311 	return has_dumped;
2312 }
2313 
2314 #endif		/* CONFIG_ELF_CORE */
2315 
2316 static int __init init_elf_binfmt(void)
2317 {
2318 	register_binfmt(&elf_format);
2319 	return 0;
2320 }
2321 
2322 static void __exit exit_elf_binfmt(void)
2323 {
2324 	/* Remove the COFF and ELF loaders. */
2325 	unregister_binfmt(&elf_format);
2326 }
2327 
2328 core_initcall(init_elf_binfmt);
2329 module_exit(exit_elf_binfmt);
2330 MODULE_LICENSE("GPL");
2331