xref: /openbmc/linux/fs/binfmt_elf.c (revision df3305156f989339529b3d6744b898d498fb1f7b)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40 
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47 
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50 				int, int, unsigned long);
51 
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57 
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump	NULL
66 #endif
67 
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN	PAGE_SIZE
72 #endif
73 
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS	0
76 #endif
77 
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81 
82 static struct linux_binfmt elf_format = {
83 	.module		= THIS_MODULE,
84 	.load_binary	= load_elf_binary,
85 	.load_shlib	= load_elf_library,
86 	.core_dump	= elf_core_dump,
87 	.min_coredump	= ELF_EXEC_PAGESIZE,
88 };
89 
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91 
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94 	start = ELF_PAGEALIGN(start);
95 	end = ELF_PAGEALIGN(end);
96 	if (end > start) {
97 		unsigned long addr;
98 		addr = vm_brk(start, end - start);
99 		if (BAD_ADDR(addr))
100 			return addr;
101 	}
102 	current->mm->start_brk = current->mm->brk = end;
103 	return 0;
104 }
105 
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113 	unsigned long nbyte;
114 
115 	nbyte = ELF_PAGEOFFSET(elf_bss);
116 	if (nbyte) {
117 		nbyte = ELF_MIN_ALIGN - nbyte;
118 		if (clear_user((void __user *) elf_bss, nbyte))
119 			return -EFAULT;
120 	}
121 	return 0;
122 }
123 
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131 	old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135 	(((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138 
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147 
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150 		unsigned long load_addr, unsigned long interp_load_addr)
151 {
152 	unsigned long p = bprm->p;
153 	int argc = bprm->argc;
154 	int envc = bprm->envc;
155 	elf_addr_t __user *argv;
156 	elf_addr_t __user *envp;
157 	elf_addr_t __user *sp;
158 	elf_addr_t __user *u_platform;
159 	elf_addr_t __user *u_base_platform;
160 	elf_addr_t __user *u_rand_bytes;
161 	const char *k_platform = ELF_PLATFORM;
162 	const char *k_base_platform = ELF_BASE_PLATFORM;
163 	unsigned char k_rand_bytes[16];
164 	int items;
165 	elf_addr_t *elf_info;
166 	int ei_index = 0;
167 	const struct cred *cred = current_cred();
168 	struct vm_area_struct *vma;
169 
170 	/*
171 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
172 	 * evictions by the processes running on the same package. One
173 	 * thing we can do is to shuffle the initial stack for them.
174 	 */
175 
176 	p = arch_align_stack(p);
177 
178 	/*
179 	 * If this architecture has a platform capability string, copy it
180 	 * to userspace.  In some cases (Sparc), this info is impossible
181 	 * for userspace to get any other way, in others (i386) it is
182 	 * merely difficult.
183 	 */
184 	u_platform = NULL;
185 	if (k_platform) {
186 		size_t len = strlen(k_platform) + 1;
187 
188 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 		if (__copy_to_user(u_platform, k_platform, len))
190 			return -EFAULT;
191 	}
192 
193 	/*
194 	 * If this architecture has a "base" platform capability
195 	 * string, copy it to userspace.
196 	 */
197 	u_base_platform = NULL;
198 	if (k_base_platform) {
199 		size_t len = strlen(k_base_platform) + 1;
200 
201 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 		if (__copy_to_user(u_base_platform, k_base_platform, len))
203 			return -EFAULT;
204 	}
205 
206 	/*
207 	 * Generate 16 random bytes for userspace PRNG seeding.
208 	 */
209 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210 	u_rand_bytes = (elf_addr_t __user *)
211 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
212 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213 		return -EFAULT;
214 
215 	/* Create the ELF interpreter info */
216 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
217 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219 	do { \
220 		elf_info[ei_index++] = id; \
221 		elf_info[ei_index++] = val; \
222 	} while (0)
223 
224 #ifdef ARCH_DLINFO
225 	/*
226 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
227 	 * AUXV.
228 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229 	 * ARCH_DLINFO changes
230 	 */
231 	ARCH_DLINFO;
232 #endif
233 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
240 	NEW_AUX_ENT(AT_FLAGS, 0);
241 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252 	if (k_platform) {
253 		NEW_AUX_ENT(AT_PLATFORM,
254 			    (elf_addr_t)(unsigned long)u_platform);
255 	}
256 	if (k_base_platform) {
257 		NEW_AUX_ENT(AT_BASE_PLATFORM,
258 			    (elf_addr_t)(unsigned long)u_base_platform);
259 	}
260 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262 	}
263 #undef NEW_AUX_ENT
264 	/* AT_NULL is zero; clear the rest too */
265 	memset(&elf_info[ei_index], 0,
266 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267 
268 	/* And advance past the AT_NULL entry.  */
269 	ei_index += 2;
270 
271 	sp = STACK_ADD(p, ei_index);
272 
273 	items = (argc + 1) + (envc + 1) + 1;
274 	bprm->p = STACK_ROUND(sp, items);
275 
276 	/* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281 	sp = (elf_addr_t __user *)bprm->p;
282 #endif
283 
284 
285 	/*
286 	 * Grow the stack manually; some architectures have a limit on how
287 	 * far ahead a user-space access may be in order to grow the stack.
288 	 */
289 	vma = find_extend_vma(current->mm, bprm->p);
290 	if (!vma)
291 		return -EFAULT;
292 
293 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
294 	if (__put_user(argc, sp++))
295 		return -EFAULT;
296 	argv = sp;
297 	envp = argv + argc + 1;
298 
299 	/* Populate argv and envp */
300 	p = current->mm->arg_end = current->mm->arg_start;
301 	while (argc-- > 0) {
302 		size_t len;
303 		if (__put_user((elf_addr_t)p, argv++))
304 			return -EFAULT;
305 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306 		if (!len || len > MAX_ARG_STRLEN)
307 			return -EINVAL;
308 		p += len;
309 	}
310 	if (__put_user(0, argv))
311 		return -EFAULT;
312 	current->mm->arg_end = current->mm->env_start = p;
313 	while (envc-- > 0) {
314 		size_t len;
315 		if (__put_user((elf_addr_t)p, envp++))
316 			return -EFAULT;
317 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318 		if (!len || len > MAX_ARG_STRLEN)
319 			return -EINVAL;
320 		p += len;
321 	}
322 	if (__put_user(0, envp))
323 		return -EFAULT;
324 	current->mm->env_end = p;
325 
326 	/* Put the elf_info on the stack in the right place.  */
327 	sp = (elf_addr_t __user *)envp + 1;
328 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329 		return -EFAULT;
330 	return 0;
331 }
332 
333 #ifndef elf_map
334 
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336 		struct elf_phdr *eppnt, int prot, int type,
337 		unsigned long total_size)
338 {
339 	unsigned long map_addr;
340 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342 	addr = ELF_PAGESTART(addr);
343 	size = ELF_PAGEALIGN(size);
344 
345 	/* mmap() will return -EINVAL if given a zero size, but a
346 	 * segment with zero filesize is perfectly valid */
347 	if (!size)
348 		return addr;
349 
350 	/*
351 	* total_size is the size of the ELF (interpreter) image.
352 	* The _first_ mmap needs to know the full size, otherwise
353 	* randomization might put this image into an overlapping
354 	* position with the ELF binary image. (since size < total_size)
355 	* So we first map the 'big' image - and unmap the remainder at
356 	* the end. (which unmap is needed for ELF images with holes.)
357 	*/
358 	if (total_size) {
359 		total_size = ELF_PAGEALIGN(total_size);
360 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361 		if (!BAD_ADDR(map_addr))
362 			vm_munmap(map_addr+size, total_size-size);
363 	} else
364 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
365 
366 	return(map_addr);
367 }
368 
369 #endif /* !elf_map */
370 
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373 	int i, first_idx = -1, last_idx = -1;
374 
375 	for (i = 0; i < nr; i++) {
376 		if (cmds[i].p_type == PT_LOAD) {
377 			last_idx = i;
378 			if (first_idx == -1)
379 				first_idx = i;
380 		}
381 	}
382 	if (first_idx == -1)
383 		return 0;
384 
385 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388 
389 /**
390  * load_elf_phdrs() - load ELF program headers
391  * @elf_ex:   ELF header of the binary whose program headers should be loaded
392  * @elf_file: the opened ELF binary file
393  *
394  * Loads ELF program headers from the binary file elf_file, which has the ELF
395  * header pointed to by elf_ex, into a newly allocated array. The caller is
396  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
397  */
398 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
399 				       struct file *elf_file)
400 {
401 	struct elf_phdr *elf_phdata = NULL;
402 	int retval, size, err = -1;
403 
404 	/*
405 	 * If the size of this structure has changed, then punt, since
406 	 * we will be doing the wrong thing.
407 	 */
408 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
409 		goto out;
410 
411 	/* Sanity check the number of program headers... */
412 	if (elf_ex->e_phnum < 1 ||
413 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
414 		goto out;
415 
416 	/* ...and their total size. */
417 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
418 	if (size > ELF_MIN_ALIGN)
419 		goto out;
420 
421 	elf_phdata = kmalloc(size, GFP_KERNEL);
422 	if (!elf_phdata)
423 		goto out;
424 
425 	/* Read in the program headers */
426 	retval = kernel_read(elf_file, elf_ex->e_phoff,
427 			     (char *)elf_phdata, size);
428 	if (retval != size) {
429 		err = (retval < 0) ? retval : -EIO;
430 		goto out;
431 	}
432 
433 	/* Success! */
434 	err = 0;
435 out:
436 	if (err) {
437 		kfree(elf_phdata);
438 		elf_phdata = NULL;
439 	}
440 	return elf_phdata;
441 }
442 
443 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
444 
445 /**
446  * struct arch_elf_state - arch-specific ELF loading state
447  *
448  * This structure is used to preserve architecture specific data during
449  * the loading of an ELF file, throughout the checking of architecture
450  * specific ELF headers & through to the point where the ELF load is
451  * known to be proceeding (ie. SET_PERSONALITY).
452  *
453  * This implementation is a dummy for architectures which require no
454  * specific state.
455  */
456 struct arch_elf_state {
457 };
458 
459 #define INIT_ARCH_ELF_STATE {}
460 
461 /**
462  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
463  * @ehdr:	The main ELF header
464  * @phdr:	The program header to check
465  * @elf:	The open ELF file
466  * @is_interp:	True if the phdr is from the interpreter of the ELF being
467  *		loaded, else false.
468  * @state:	Architecture-specific state preserved throughout the process
469  *		of loading the ELF.
470  *
471  * Inspects the program header phdr to validate its correctness and/or
472  * suitability for the system. Called once per ELF program header in the
473  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
474  * interpreter.
475  *
476  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
477  *         with that return code.
478  */
479 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
480 				   struct elf_phdr *phdr,
481 				   struct file *elf, bool is_interp,
482 				   struct arch_elf_state *state)
483 {
484 	/* Dummy implementation, always proceed */
485 	return 0;
486 }
487 
488 /**
489  * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
490  * @ehdr:	The main ELF header
491  * @has_interp:	True if the ELF has an interpreter, else false.
492  * @state:	Architecture-specific state preserved throughout the process
493  *		of loading the ELF.
494  *
495  * Provides a final opportunity for architecture code to reject the loading
496  * of the ELF & cause an exec syscall to return an error. This is called after
497  * all program headers to be checked by arch_elf_pt_proc have been.
498  *
499  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
500  *         with that return code.
501  */
502 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
503 				 struct arch_elf_state *state)
504 {
505 	/* Dummy implementation, always proceed */
506 	return 0;
507 }
508 
509 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
510 
511 /* This is much more generalized than the library routine read function,
512    so we keep this separate.  Technically the library read function
513    is only provided so that we can read a.out libraries that have
514    an ELF header */
515 
516 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
517 		struct file *interpreter, unsigned long *interp_map_addr,
518 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
519 {
520 	struct elf_phdr *eppnt;
521 	unsigned long load_addr = 0;
522 	int load_addr_set = 0;
523 	unsigned long last_bss = 0, elf_bss = 0;
524 	unsigned long error = ~0UL;
525 	unsigned long total_size;
526 	int i;
527 
528 	/* First of all, some simple consistency checks */
529 	if (interp_elf_ex->e_type != ET_EXEC &&
530 	    interp_elf_ex->e_type != ET_DYN)
531 		goto out;
532 	if (!elf_check_arch(interp_elf_ex))
533 		goto out;
534 	if (!interpreter->f_op->mmap)
535 		goto out;
536 
537 	total_size = total_mapping_size(interp_elf_phdata,
538 					interp_elf_ex->e_phnum);
539 	if (!total_size) {
540 		error = -EINVAL;
541 		goto out;
542 	}
543 
544 	eppnt = interp_elf_phdata;
545 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
546 		if (eppnt->p_type == PT_LOAD) {
547 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
548 			int elf_prot = 0;
549 			unsigned long vaddr = 0;
550 			unsigned long k, map_addr;
551 
552 			if (eppnt->p_flags & PF_R)
553 		    		elf_prot = PROT_READ;
554 			if (eppnt->p_flags & PF_W)
555 				elf_prot |= PROT_WRITE;
556 			if (eppnt->p_flags & PF_X)
557 				elf_prot |= PROT_EXEC;
558 			vaddr = eppnt->p_vaddr;
559 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
560 				elf_type |= MAP_FIXED;
561 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
562 				load_addr = -vaddr;
563 
564 			map_addr = elf_map(interpreter, load_addr + vaddr,
565 					eppnt, elf_prot, elf_type, total_size);
566 			total_size = 0;
567 			if (!*interp_map_addr)
568 				*interp_map_addr = map_addr;
569 			error = map_addr;
570 			if (BAD_ADDR(map_addr))
571 				goto out;
572 
573 			if (!load_addr_set &&
574 			    interp_elf_ex->e_type == ET_DYN) {
575 				load_addr = map_addr - ELF_PAGESTART(vaddr);
576 				load_addr_set = 1;
577 			}
578 
579 			/*
580 			 * Check to see if the section's size will overflow the
581 			 * allowed task size. Note that p_filesz must always be
582 			 * <= p_memsize so it's only necessary to check p_memsz.
583 			 */
584 			k = load_addr + eppnt->p_vaddr;
585 			if (BAD_ADDR(k) ||
586 			    eppnt->p_filesz > eppnt->p_memsz ||
587 			    eppnt->p_memsz > TASK_SIZE ||
588 			    TASK_SIZE - eppnt->p_memsz < k) {
589 				error = -ENOMEM;
590 				goto out;
591 			}
592 
593 			/*
594 			 * Find the end of the file mapping for this phdr, and
595 			 * keep track of the largest address we see for this.
596 			 */
597 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
598 			if (k > elf_bss)
599 				elf_bss = k;
600 
601 			/*
602 			 * Do the same thing for the memory mapping - between
603 			 * elf_bss and last_bss is the bss section.
604 			 */
605 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
606 			if (k > last_bss)
607 				last_bss = k;
608 		}
609 	}
610 
611 	if (last_bss > elf_bss) {
612 		/*
613 		 * Now fill out the bss section.  First pad the last page up
614 		 * to the page boundary, and then perform a mmap to make sure
615 		 * that there are zero-mapped pages up to and including the
616 		 * last bss page.
617 		 */
618 		if (padzero(elf_bss)) {
619 			error = -EFAULT;
620 			goto out;
621 		}
622 
623 		/* What we have mapped so far */
624 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
625 
626 		/* Map the last of the bss segment */
627 		error = vm_brk(elf_bss, last_bss - elf_bss);
628 		if (BAD_ADDR(error))
629 			goto out;
630 	}
631 
632 	error = load_addr;
633 out:
634 	return error;
635 }
636 
637 /*
638  * These are the functions used to load ELF style executables and shared
639  * libraries.  There is no binary dependent code anywhere else.
640  */
641 
642 #ifndef STACK_RND_MASK
643 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
644 #endif
645 
646 static unsigned long randomize_stack_top(unsigned long stack_top)
647 {
648 	unsigned long random_variable = 0;
649 
650 	if ((current->flags & PF_RANDOMIZE) &&
651 		!(current->personality & ADDR_NO_RANDOMIZE)) {
652 		random_variable = (unsigned long) get_random_int();
653 		random_variable &= STACK_RND_MASK;
654 		random_variable <<= PAGE_SHIFT;
655 	}
656 #ifdef CONFIG_STACK_GROWSUP
657 	return PAGE_ALIGN(stack_top) + random_variable;
658 #else
659 	return PAGE_ALIGN(stack_top) - random_variable;
660 #endif
661 }
662 
663 static int load_elf_binary(struct linux_binprm *bprm)
664 {
665 	struct file *interpreter = NULL; /* to shut gcc up */
666  	unsigned long load_addr = 0, load_bias = 0;
667 	int load_addr_set = 0;
668 	char * elf_interpreter = NULL;
669 	unsigned long error;
670 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
671 	unsigned long elf_bss, elf_brk;
672 	int retval, i;
673 	unsigned long elf_entry;
674 	unsigned long interp_load_addr = 0;
675 	unsigned long start_code, end_code, start_data, end_data;
676 	unsigned long reloc_func_desc __maybe_unused = 0;
677 	int executable_stack = EXSTACK_DEFAULT;
678 	struct pt_regs *regs = current_pt_regs();
679 	struct {
680 		struct elfhdr elf_ex;
681 		struct elfhdr interp_elf_ex;
682 	} *loc;
683 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
684 
685 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
686 	if (!loc) {
687 		retval = -ENOMEM;
688 		goto out_ret;
689 	}
690 
691 	/* Get the exec-header */
692 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
693 
694 	retval = -ENOEXEC;
695 	/* First of all, some simple consistency checks */
696 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
697 		goto out;
698 
699 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
700 		goto out;
701 	if (!elf_check_arch(&loc->elf_ex))
702 		goto out;
703 	if (!bprm->file->f_op->mmap)
704 		goto out;
705 
706 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
707 	if (!elf_phdata)
708 		goto out;
709 
710 	elf_ppnt = elf_phdata;
711 	elf_bss = 0;
712 	elf_brk = 0;
713 
714 	start_code = ~0UL;
715 	end_code = 0;
716 	start_data = 0;
717 	end_data = 0;
718 
719 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
720 		if (elf_ppnt->p_type == PT_INTERP) {
721 			/* This is the program interpreter used for
722 			 * shared libraries - for now assume that this
723 			 * is an a.out format binary
724 			 */
725 			retval = -ENOEXEC;
726 			if (elf_ppnt->p_filesz > PATH_MAX ||
727 			    elf_ppnt->p_filesz < 2)
728 				goto out_free_ph;
729 
730 			retval = -ENOMEM;
731 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
732 						  GFP_KERNEL);
733 			if (!elf_interpreter)
734 				goto out_free_ph;
735 
736 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
737 					     elf_interpreter,
738 					     elf_ppnt->p_filesz);
739 			if (retval != elf_ppnt->p_filesz) {
740 				if (retval >= 0)
741 					retval = -EIO;
742 				goto out_free_interp;
743 			}
744 			/* make sure path is NULL terminated */
745 			retval = -ENOEXEC;
746 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
747 				goto out_free_interp;
748 
749 			interpreter = open_exec(elf_interpreter);
750 			retval = PTR_ERR(interpreter);
751 			if (IS_ERR(interpreter))
752 				goto out_free_interp;
753 
754 			/*
755 			 * If the binary is not readable then enforce
756 			 * mm->dumpable = 0 regardless of the interpreter's
757 			 * permissions.
758 			 */
759 			would_dump(bprm, interpreter);
760 
761 			retval = kernel_read(interpreter, 0, bprm->buf,
762 					     BINPRM_BUF_SIZE);
763 			if (retval != BINPRM_BUF_SIZE) {
764 				if (retval >= 0)
765 					retval = -EIO;
766 				goto out_free_dentry;
767 			}
768 
769 			/* Get the exec headers */
770 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
771 			break;
772 		}
773 		elf_ppnt++;
774 	}
775 
776 	elf_ppnt = elf_phdata;
777 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
778 		switch (elf_ppnt->p_type) {
779 		case PT_GNU_STACK:
780 			if (elf_ppnt->p_flags & PF_X)
781 				executable_stack = EXSTACK_ENABLE_X;
782 			else
783 				executable_stack = EXSTACK_DISABLE_X;
784 			break;
785 
786 		case PT_LOPROC ... PT_HIPROC:
787 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
788 						  bprm->file, false,
789 						  &arch_state);
790 			if (retval)
791 				goto out_free_dentry;
792 			break;
793 		}
794 
795 	/* Some simple consistency checks for the interpreter */
796 	if (elf_interpreter) {
797 		retval = -ELIBBAD;
798 		/* Not an ELF interpreter */
799 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
800 			goto out_free_dentry;
801 		/* Verify the interpreter has a valid arch */
802 		if (!elf_check_arch(&loc->interp_elf_ex))
803 			goto out_free_dentry;
804 
805 		/* Load the interpreter program headers */
806 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
807 						   interpreter);
808 		if (!interp_elf_phdata)
809 			goto out_free_dentry;
810 
811 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
812 		elf_ppnt = interp_elf_phdata;
813 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
814 			switch (elf_ppnt->p_type) {
815 			case PT_LOPROC ... PT_HIPROC:
816 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
817 							  elf_ppnt, interpreter,
818 							  true, &arch_state);
819 				if (retval)
820 					goto out_free_dentry;
821 				break;
822 			}
823 	}
824 
825 	/*
826 	 * Allow arch code to reject the ELF at this point, whilst it's
827 	 * still possible to return an error to the code that invoked
828 	 * the exec syscall.
829 	 */
830 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
831 	if (retval)
832 		goto out_free_dentry;
833 
834 	/* Flush all traces of the currently running executable */
835 	retval = flush_old_exec(bprm);
836 	if (retval)
837 		goto out_free_dentry;
838 
839 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
840 	   may depend on the personality.  */
841 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
842 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
843 		current->personality |= READ_IMPLIES_EXEC;
844 
845 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
846 		current->flags |= PF_RANDOMIZE;
847 
848 	setup_new_exec(bprm);
849 
850 	/* Do this so that we can load the interpreter, if need be.  We will
851 	   change some of these later */
852 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
853 				 executable_stack);
854 	if (retval < 0)
855 		goto out_free_dentry;
856 
857 	current->mm->start_stack = bprm->p;
858 
859 	/* Now we do a little grungy work by mmapping the ELF image into
860 	   the correct location in memory. */
861 	for(i = 0, elf_ppnt = elf_phdata;
862 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 		int elf_prot = 0, elf_flags;
864 		unsigned long k, vaddr;
865 
866 		if (elf_ppnt->p_type != PT_LOAD)
867 			continue;
868 
869 		if (unlikely (elf_brk > elf_bss)) {
870 			unsigned long nbyte;
871 
872 			/* There was a PT_LOAD segment with p_memsz > p_filesz
873 			   before this one. Map anonymous pages, if needed,
874 			   and clear the area.  */
875 			retval = set_brk(elf_bss + load_bias,
876 					 elf_brk + load_bias);
877 			if (retval)
878 				goto out_free_dentry;
879 			nbyte = ELF_PAGEOFFSET(elf_bss);
880 			if (nbyte) {
881 				nbyte = ELF_MIN_ALIGN - nbyte;
882 				if (nbyte > elf_brk - elf_bss)
883 					nbyte = elf_brk - elf_bss;
884 				if (clear_user((void __user *)elf_bss +
885 							load_bias, nbyte)) {
886 					/*
887 					 * This bss-zeroing can fail if the ELF
888 					 * file specifies odd protections. So
889 					 * we don't check the return value
890 					 */
891 				}
892 			}
893 		}
894 
895 		if (elf_ppnt->p_flags & PF_R)
896 			elf_prot |= PROT_READ;
897 		if (elf_ppnt->p_flags & PF_W)
898 			elf_prot |= PROT_WRITE;
899 		if (elf_ppnt->p_flags & PF_X)
900 			elf_prot |= PROT_EXEC;
901 
902 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
903 
904 		vaddr = elf_ppnt->p_vaddr;
905 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
906 			elf_flags |= MAP_FIXED;
907 		} else if (loc->elf_ex.e_type == ET_DYN) {
908 			/* Try and get dynamic programs out of the way of the
909 			 * default mmap base, as well as whatever program they
910 			 * might try to exec.  This is because the brk will
911 			 * follow the loader, and is not movable.  */
912 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
913 			/* Memory randomization might have been switched off
914 			 * in runtime via sysctl or explicit setting of
915 			 * personality flags.
916 			 * If that is the case, retain the original non-zero
917 			 * load_bias value in order to establish proper
918 			 * non-randomized mappings.
919 			 */
920 			if (current->flags & PF_RANDOMIZE)
921 				load_bias = 0;
922 			else
923 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
924 #else
925 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
926 #endif
927 		}
928 
929 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
930 				elf_prot, elf_flags, 0);
931 		if (BAD_ADDR(error)) {
932 			retval = IS_ERR((void *)error) ?
933 				PTR_ERR((void*)error) : -EINVAL;
934 			goto out_free_dentry;
935 		}
936 
937 		if (!load_addr_set) {
938 			load_addr_set = 1;
939 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
940 			if (loc->elf_ex.e_type == ET_DYN) {
941 				load_bias += error -
942 				             ELF_PAGESTART(load_bias + vaddr);
943 				load_addr += load_bias;
944 				reloc_func_desc = load_bias;
945 			}
946 		}
947 		k = elf_ppnt->p_vaddr;
948 		if (k < start_code)
949 			start_code = k;
950 		if (start_data < k)
951 			start_data = k;
952 
953 		/*
954 		 * Check to see if the section's size will overflow the
955 		 * allowed task size. Note that p_filesz must always be
956 		 * <= p_memsz so it is only necessary to check p_memsz.
957 		 */
958 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
959 		    elf_ppnt->p_memsz > TASK_SIZE ||
960 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
961 			/* set_brk can never work. Avoid overflows. */
962 			retval = -EINVAL;
963 			goto out_free_dentry;
964 		}
965 
966 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
967 
968 		if (k > elf_bss)
969 			elf_bss = k;
970 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 			end_code = k;
972 		if (end_data < k)
973 			end_data = k;
974 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
975 		if (k > elf_brk)
976 			elf_brk = k;
977 	}
978 
979 	loc->elf_ex.e_entry += load_bias;
980 	elf_bss += load_bias;
981 	elf_brk += load_bias;
982 	start_code += load_bias;
983 	end_code += load_bias;
984 	start_data += load_bias;
985 	end_data += load_bias;
986 
987 	/* Calling set_brk effectively mmaps the pages that we need
988 	 * for the bss and break sections.  We must do this before
989 	 * mapping in the interpreter, to make sure it doesn't wind
990 	 * up getting placed where the bss needs to go.
991 	 */
992 	retval = set_brk(elf_bss, elf_brk);
993 	if (retval)
994 		goto out_free_dentry;
995 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
996 		retval = -EFAULT; /* Nobody gets to see this, but.. */
997 		goto out_free_dentry;
998 	}
999 
1000 	if (elf_interpreter) {
1001 		unsigned long interp_map_addr = 0;
1002 
1003 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1004 					    interpreter,
1005 					    &interp_map_addr,
1006 					    load_bias, interp_elf_phdata);
1007 		if (!IS_ERR((void *)elf_entry)) {
1008 			/*
1009 			 * load_elf_interp() returns relocation
1010 			 * adjustment
1011 			 */
1012 			interp_load_addr = elf_entry;
1013 			elf_entry += loc->interp_elf_ex.e_entry;
1014 		}
1015 		if (BAD_ADDR(elf_entry)) {
1016 			retval = IS_ERR((void *)elf_entry) ?
1017 					(int)elf_entry : -EINVAL;
1018 			goto out_free_dentry;
1019 		}
1020 		reloc_func_desc = interp_load_addr;
1021 
1022 		allow_write_access(interpreter);
1023 		fput(interpreter);
1024 		kfree(elf_interpreter);
1025 	} else {
1026 		elf_entry = loc->elf_ex.e_entry;
1027 		if (BAD_ADDR(elf_entry)) {
1028 			retval = -EINVAL;
1029 			goto out_free_dentry;
1030 		}
1031 	}
1032 
1033 	kfree(interp_elf_phdata);
1034 	kfree(elf_phdata);
1035 
1036 	set_binfmt(&elf_format);
1037 
1038 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1039 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1040 	if (retval < 0)
1041 		goto out;
1042 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1043 
1044 	install_exec_creds(bprm);
1045 	retval = create_elf_tables(bprm, &loc->elf_ex,
1046 			  load_addr, interp_load_addr);
1047 	if (retval < 0)
1048 		goto out;
1049 	/* N.B. passed_fileno might not be initialized? */
1050 	current->mm->end_code = end_code;
1051 	current->mm->start_code = start_code;
1052 	current->mm->start_data = start_data;
1053 	current->mm->end_data = end_data;
1054 	current->mm->start_stack = bprm->p;
1055 
1056 #ifdef arch_randomize_brk
1057 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1058 		current->mm->brk = current->mm->start_brk =
1059 			arch_randomize_brk(current->mm);
1060 #ifdef CONFIG_COMPAT_BRK
1061 		current->brk_randomized = 1;
1062 #endif
1063 	}
1064 #endif
1065 
1066 	if (current->personality & MMAP_PAGE_ZERO) {
1067 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1068 		   and some applications "depend" upon this behavior.
1069 		   Since we do not have the power to recompile these, we
1070 		   emulate the SVr4 behavior. Sigh. */
1071 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1072 				MAP_FIXED | MAP_PRIVATE, 0);
1073 	}
1074 
1075 #ifdef ELF_PLAT_INIT
1076 	/*
1077 	 * The ABI may specify that certain registers be set up in special
1078 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1079 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1080 	 * that the e_entry field is the address of the function descriptor
1081 	 * for the startup routine, rather than the address of the startup
1082 	 * routine itself.  This macro performs whatever initialization to
1083 	 * the regs structure is required as well as any relocations to the
1084 	 * function descriptor entries when executing dynamically links apps.
1085 	 */
1086 	ELF_PLAT_INIT(regs, reloc_func_desc);
1087 #endif
1088 
1089 	start_thread(regs, elf_entry, bprm->p);
1090 	retval = 0;
1091 out:
1092 	kfree(loc);
1093 out_ret:
1094 	return retval;
1095 
1096 	/* error cleanup */
1097 out_free_dentry:
1098 	kfree(interp_elf_phdata);
1099 	allow_write_access(interpreter);
1100 	if (interpreter)
1101 		fput(interpreter);
1102 out_free_interp:
1103 	kfree(elf_interpreter);
1104 out_free_ph:
1105 	kfree(elf_phdata);
1106 	goto out;
1107 }
1108 
1109 #ifdef CONFIG_USELIB
1110 /* This is really simpleminded and specialized - we are loading an
1111    a.out library that is given an ELF header. */
1112 static int load_elf_library(struct file *file)
1113 {
1114 	struct elf_phdr *elf_phdata;
1115 	struct elf_phdr *eppnt;
1116 	unsigned long elf_bss, bss, len;
1117 	int retval, error, i, j;
1118 	struct elfhdr elf_ex;
1119 
1120 	error = -ENOEXEC;
1121 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1122 	if (retval != sizeof(elf_ex))
1123 		goto out;
1124 
1125 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1126 		goto out;
1127 
1128 	/* First of all, some simple consistency checks */
1129 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1130 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1131 		goto out;
1132 
1133 	/* Now read in all of the header information */
1134 
1135 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1136 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1137 
1138 	error = -ENOMEM;
1139 	elf_phdata = kmalloc(j, GFP_KERNEL);
1140 	if (!elf_phdata)
1141 		goto out;
1142 
1143 	eppnt = elf_phdata;
1144 	error = -ENOEXEC;
1145 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1146 	if (retval != j)
1147 		goto out_free_ph;
1148 
1149 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1150 		if ((eppnt + i)->p_type == PT_LOAD)
1151 			j++;
1152 	if (j != 1)
1153 		goto out_free_ph;
1154 
1155 	while (eppnt->p_type != PT_LOAD)
1156 		eppnt++;
1157 
1158 	/* Now use mmap to map the library into memory. */
1159 	error = vm_mmap(file,
1160 			ELF_PAGESTART(eppnt->p_vaddr),
1161 			(eppnt->p_filesz +
1162 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1163 			PROT_READ | PROT_WRITE | PROT_EXEC,
1164 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1165 			(eppnt->p_offset -
1166 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1167 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1168 		goto out_free_ph;
1169 
1170 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1171 	if (padzero(elf_bss)) {
1172 		error = -EFAULT;
1173 		goto out_free_ph;
1174 	}
1175 
1176 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1177 			    ELF_MIN_ALIGN - 1);
1178 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1179 	if (bss > len)
1180 		vm_brk(len, bss - len);
1181 	error = 0;
1182 
1183 out_free_ph:
1184 	kfree(elf_phdata);
1185 out:
1186 	return error;
1187 }
1188 #endif /* #ifdef CONFIG_USELIB */
1189 
1190 #ifdef CONFIG_ELF_CORE
1191 /*
1192  * ELF core dumper
1193  *
1194  * Modelled on fs/exec.c:aout_core_dump()
1195  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1196  */
1197 
1198 /*
1199  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1200  * that are useful for post-mortem analysis are included in every core dump.
1201  * In that way we ensure that the core dump is fully interpretable later
1202  * without matching up the same kernel and hardware config to see what PC values
1203  * meant. These special mappings include - vDSO, vsyscall, and other
1204  * architecture specific mappings
1205  */
1206 static bool always_dump_vma(struct vm_area_struct *vma)
1207 {
1208 	/* Any vsyscall mappings? */
1209 	if (vma == get_gate_vma(vma->vm_mm))
1210 		return true;
1211 
1212 	/*
1213 	 * Assume that all vmas with a .name op should always be dumped.
1214 	 * If this changes, a new vm_ops field can easily be added.
1215 	 */
1216 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1217 		return true;
1218 
1219 	/*
1220 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1221 	 * such as vDSO sections.
1222 	 */
1223 	if (arch_vma_name(vma))
1224 		return true;
1225 
1226 	return false;
1227 }
1228 
1229 /*
1230  * Decide what to dump of a segment, part, all or none.
1231  */
1232 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1233 				   unsigned long mm_flags)
1234 {
1235 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1236 
1237 	/* always dump the vdso and vsyscall sections */
1238 	if (always_dump_vma(vma))
1239 		goto whole;
1240 
1241 	if (vma->vm_flags & VM_DONTDUMP)
1242 		return 0;
1243 
1244 	/* Hugetlb memory check */
1245 	if (vma->vm_flags & VM_HUGETLB) {
1246 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1247 			goto whole;
1248 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1249 			goto whole;
1250 		return 0;
1251 	}
1252 
1253 	/* Do not dump I/O mapped devices or special mappings */
1254 	if (vma->vm_flags & VM_IO)
1255 		return 0;
1256 
1257 	/* By default, dump shared memory if mapped from an anonymous file. */
1258 	if (vma->vm_flags & VM_SHARED) {
1259 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1260 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1261 			goto whole;
1262 		return 0;
1263 	}
1264 
1265 	/* Dump segments that have been written to.  */
1266 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1267 		goto whole;
1268 	if (vma->vm_file == NULL)
1269 		return 0;
1270 
1271 	if (FILTER(MAPPED_PRIVATE))
1272 		goto whole;
1273 
1274 	/*
1275 	 * If this looks like the beginning of a DSO or executable mapping,
1276 	 * check for an ELF header.  If we find one, dump the first page to
1277 	 * aid in determining what was mapped here.
1278 	 */
1279 	if (FILTER(ELF_HEADERS) &&
1280 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1281 		u32 __user *header = (u32 __user *) vma->vm_start;
1282 		u32 word;
1283 		mm_segment_t fs = get_fs();
1284 		/*
1285 		 * Doing it this way gets the constant folded by GCC.
1286 		 */
1287 		union {
1288 			u32 cmp;
1289 			char elfmag[SELFMAG];
1290 		} magic;
1291 		BUILD_BUG_ON(SELFMAG != sizeof word);
1292 		magic.elfmag[EI_MAG0] = ELFMAG0;
1293 		magic.elfmag[EI_MAG1] = ELFMAG1;
1294 		magic.elfmag[EI_MAG2] = ELFMAG2;
1295 		magic.elfmag[EI_MAG3] = ELFMAG3;
1296 		/*
1297 		 * Switch to the user "segment" for get_user(),
1298 		 * then put back what elf_core_dump() had in place.
1299 		 */
1300 		set_fs(USER_DS);
1301 		if (unlikely(get_user(word, header)))
1302 			word = 0;
1303 		set_fs(fs);
1304 		if (word == magic.cmp)
1305 			return PAGE_SIZE;
1306 	}
1307 
1308 #undef	FILTER
1309 
1310 	return 0;
1311 
1312 whole:
1313 	return vma->vm_end - vma->vm_start;
1314 }
1315 
1316 /* An ELF note in memory */
1317 struct memelfnote
1318 {
1319 	const char *name;
1320 	int type;
1321 	unsigned int datasz;
1322 	void *data;
1323 };
1324 
1325 static int notesize(struct memelfnote *en)
1326 {
1327 	int sz;
1328 
1329 	sz = sizeof(struct elf_note);
1330 	sz += roundup(strlen(en->name) + 1, 4);
1331 	sz += roundup(en->datasz, 4);
1332 
1333 	return sz;
1334 }
1335 
1336 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1337 {
1338 	struct elf_note en;
1339 	en.n_namesz = strlen(men->name) + 1;
1340 	en.n_descsz = men->datasz;
1341 	en.n_type = men->type;
1342 
1343 	return dump_emit(cprm, &en, sizeof(en)) &&
1344 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1345 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1346 }
1347 
1348 static void fill_elf_header(struct elfhdr *elf, int segs,
1349 			    u16 machine, u32 flags)
1350 {
1351 	memset(elf, 0, sizeof(*elf));
1352 
1353 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1354 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1355 	elf->e_ident[EI_DATA] = ELF_DATA;
1356 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1357 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1358 
1359 	elf->e_type = ET_CORE;
1360 	elf->e_machine = machine;
1361 	elf->e_version = EV_CURRENT;
1362 	elf->e_phoff = sizeof(struct elfhdr);
1363 	elf->e_flags = flags;
1364 	elf->e_ehsize = sizeof(struct elfhdr);
1365 	elf->e_phentsize = sizeof(struct elf_phdr);
1366 	elf->e_phnum = segs;
1367 
1368 	return;
1369 }
1370 
1371 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1372 {
1373 	phdr->p_type = PT_NOTE;
1374 	phdr->p_offset = offset;
1375 	phdr->p_vaddr = 0;
1376 	phdr->p_paddr = 0;
1377 	phdr->p_filesz = sz;
1378 	phdr->p_memsz = 0;
1379 	phdr->p_flags = 0;
1380 	phdr->p_align = 0;
1381 	return;
1382 }
1383 
1384 static void fill_note(struct memelfnote *note, const char *name, int type,
1385 		unsigned int sz, void *data)
1386 {
1387 	note->name = name;
1388 	note->type = type;
1389 	note->datasz = sz;
1390 	note->data = data;
1391 	return;
1392 }
1393 
1394 /*
1395  * fill up all the fields in prstatus from the given task struct, except
1396  * registers which need to be filled up separately.
1397  */
1398 static void fill_prstatus(struct elf_prstatus *prstatus,
1399 		struct task_struct *p, long signr)
1400 {
1401 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1402 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1403 	prstatus->pr_sighold = p->blocked.sig[0];
1404 	rcu_read_lock();
1405 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1406 	rcu_read_unlock();
1407 	prstatus->pr_pid = task_pid_vnr(p);
1408 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1409 	prstatus->pr_sid = task_session_vnr(p);
1410 	if (thread_group_leader(p)) {
1411 		struct task_cputime cputime;
1412 
1413 		/*
1414 		 * This is the record for the group leader.  It shows the
1415 		 * group-wide total, not its individual thread total.
1416 		 */
1417 		thread_group_cputime(p, &cputime);
1418 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1419 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1420 	} else {
1421 		cputime_t utime, stime;
1422 
1423 		task_cputime(p, &utime, &stime);
1424 		cputime_to_timeval(utime, &prstatus->pr_utime);
1425 		cputime_to_timeval(stime, &prstatus->pr_stime);
1426 	}
1427 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1428 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1429 }
1430 
1431 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1432 		       struct mm_struct *mm)
1433 {
1434 	const struct cred *cred;
1435 	unsigned int i, len;
1436 
1437 	/* first copy the parameters from user space */
1438 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1439 
1440 	len = mm->arg_end - mm->arg_start;
1441 	if (len >= ELF_PRARGSZ)
1442 		len = ELF_PRARGSZ-1;
1443 	if (copy_from_user(&psinfo->pr_psargs,
1444 		           (const char __user *)mm->arg_start, len))
1445 		return -EFAULT;
1446 	for(i = 0; i < len; i++)
1447 		if (psinfo->pr_psargs[i] == 0)
1448 			psinfo->pr_psargs[i] = ' ';
1449 	psinfo->pr_psargs[len] = 0;
1450 
1451 	rcu_read_lock();
1452 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1453 	rcu_read_unlock();
1454 	psinfo->pr_pid = task_pid_vnr(p);
1455 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1456 	psinfo->pr_sid = task_session_vnr(p);
1457 
1458 	i = p->state ? ffz(~p->state) + 1 : 0;
1459 	psinfo->pr_state = i;
1460 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1461 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1462 	psinfo->pr_nice = task_nice(p);
1463 	psinfo->pr_flag = p->flags;
1464 	rcu_read_lock();
1465 	cred = __task_cred(p);
1466 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1467 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1468 	rcu_read_unlock();
1469 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1470 
1471 	return 0;
1472 }
1473 
1474 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1475 {
1476 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1477 	int i = 0;
1478 	do
1479 		i += 2;
1480 	while (auxv[i - 2] != AT_NULL);
1481 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1482 }
1483 
1484 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1485 		const siginfo_t *siginfo)
1486 {
1487 	mm_segment_t old_fs = get_fs();
1488 	set_fs(KERNEL_DS);
1489 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1490 	set_fs(old_fs);
1491 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1492 }
1493 
1494 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1495 /*
1496  * Format of NT_FILE note:
1497  *
1498  * long count     -- how many files are mapped
1499  * long page_size -- units for file_ofs
1500  * array of [COUNT] elements of
1501  *   long start
1502  *   long end
1503  *   long file_ofs
1504  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1505  */
1506 static int fill_files_note(struct memelfnote *note)
1507 {
1508 	struct vm_area_struct *vma;
1509 	unsigned count, size, names_ofs, remaining, n;
1510 	user_long_t *data;
1511 	user_long_t *start_end_ofs;
1512 	char *name_base, *name_curpos;
1513 
1514 	/* *Estimated* file count and total data size needed */
1515 	count = current->mm->map_count;
1516 	size = count * 64;
1517 
1518 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1519  alloc:
1520 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1521 		return -EINVAL;
1522 	size = round_up(size, PAGE_SIZE);
1523 	data = vmalloc(size);
1524 	if (!data)
1525 		return -ENOMEM;
1526 
1527 	start_end_ofs = data + 2;
1528 	name_base = name_curpos = ((char *)data) + names_ofs;
1529 	remaining = size - names_ofs;
1530 	count = 0;
1531 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1532 		struct file *file;
1533 		const char *filename;
1534 
1535 		file = vma->vm_file;
1536 		if (!file)
1537 			continue;
1538 		filename = d_path(&file->f_path, name_curpos, remaining);
1539 		if (IS_ERR(filename)) {
1540 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1541 				vfree(data);
1542 				size = size * 5 / 4;
1543 				goto alloc;
1544 			}
1545 			continue;
1546 		}
1547 
1548 		/* d_path() fills at the end, move name down */
1549 		/* n = strlen(filename) + 1: */
1550 		n = (name_curpos + remaining) - filename;
1551 		remaining = filename - name_curpos;
1552 		memmove(name_curpos, filename, n);
1553 		name_curpos += n;
1554 
1555 		*start_end_ofs++ = vma->vm_start;
1556 		*start_end_ofs++ = vma->vm_end;
1557 		*start_end_ofs++ = vma->vm_pgoff;
1558 		count++;
1559 	}
1560 
1561 	/* Now we know exact count of files, can store it */
1562 	data[0] = count;
1563 	data[1] = PAGE_SIZE;
1564 	/*
1565 	 * Count usually is less than current->mm->map_count,
1566 	 * we need to move filenames down.
1567 	 */
1568 	n = current->mm->map_count - count;
1569 	if (n != 0) {
1570 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1571 		memmove(name_base - shift_bytes, name_base,
1572 			name_curpos - name_base);
1573 		name_curpos -= shift_bytes;
1574 	}
1575 
1576 	size = name_curpos - (char *)data;
1577 	fill_note(note, "CORE", NT_FILE, size, data);
1578 	return 0;
1579 }
1580 
1581 #ifdef CORE_DUMP_USE_REGSET
1582 #include <linux/regset.h>
1583 
1584 struct elf_thread_core_info {
1585 	struct elf_thread_core_info *next;
1586 	struct task_struct *task;
1587 	struct elf_prstatus prstatus;
1588 	struct memelfnote notes[0];
1589 };
1590 
1591 struct elf_note_info {
1592 	struct elf_thread_core_info *thread;
1593 	struct memelfnote psinfo;
1594 	struct memelfnote signote;
1595 	struct memelfnote auxv;
1596 	struct memelfnote files;
1597 	user_siginfo_t csigdata;
1598 	size_t size;
1599 	int thread_notes;
1600 };
1601 
1602 /*
1603  * When a regset has a writeback hook, we call it on each thread before
1604  * dumping user memory.  On register window machines, this makes sure the
1605  * user memory backing the register data is up to date before we read it.
1606  */
1607 static void do_thread_regset_writeback(struct task_struct *task,
1608 				       const struct user_regset *regset)
1609 {
1610 	if (regset->writeback)
1611 		regset->writeback(task, regset, 1);
1612 }
1613 
1614 #ifndef PR_REG_SIZE
1615 #define PR_REG_SIZE(S) sizeof(S)
1616 #endif
1617 
1618 #ifndef PRSTATUS_SIZE
1619 #define PRSTATUS_SIZE(S) sizeof(S)
1620 #endif
1621 
1622 #ifndef PR_REG_PTR
1623 #define PR_REG_PTR(S) (&((S)->pr_reg))
1624 #endif
1625 
1626 #ifndef SET_PR_FPVALID
1627 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1628 #endif
1629 
1630 static int fill_thread_core_info(struct elf_thread_core_info *t,
1631 				 const struct user_regset_view *view,
1632 				 long signr, size_t *total)
1633 {
1634 	unsigned int i;
1635 
1636 	/*
1637 	 * NT_PRSTATUS is the one special case, because the regset data
1638 	 * goes into the pr_reg field inside the note contents, rather
1639 	 * than being the whole note contents.  We fill the reset in here.
1640 	 * We assume that regset 0 is NT_PRSTATUS.
1641 	 */
1642 	fill_prstatus(&t->prstatus, t->task, signr);
1643 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1644 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1645 				    PR_REG_PTR(&t->prstatus), NULL);
1646 
1647 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1648 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1649 	*total += notesize(&t->notes[0]);
1650 
1651 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1652 
1653 	/*
1654 	 * Each other regset might generate a note too.  For each regset
1655 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1656 	 * all zero and we'll know to skip writing it later.
1657 	 */
1658 	for (i = 1; i < view->n; ++i) {
1659 		const struct user_regset *regset = &view->regsets[i];
1660 		do_thread_regset_writeback(t->task, regset);
1661 		if (regset->core_note_type && regset->get &&
1662 		    (!regset->active || regset->active(t->task, regset))) {
1663 			int ret;
1664 			size_t size = regset->n * regset->size;
1665 			void *data = kmalloc(size, GFP_KERNEL);
1666 			if (unlikely(!data))
1667 				return 0;
1668 			ret = regset->get(t->task, regset,
1669 					  0, size, data, NULL);
1670 			if (unlikely(ret))
1671 				kfree(data);
1672 			else {
1673 				if (regset->core_note_type != NT_PRFPREG)
1674 					fill_note(&t->notes[i], "LINUX",
1675 						  regset->core_note_type,
1676 						  size, data);
1677 				else {
1678 					SET_PR_FPVALID(&t->prstatus, 1);
1679 					fill_note(&t->notes[i], "CORE",
1680 						  NT_PRFPREG, size, data);
1681 				}
1682 				*total += notesize(&t->notes[i]);
1683 			}
1684 		}
1685 	}
1686 
1687 	return 1;
1688 }
1689 
1690 static int fill_note_info(struct elfhdr *elf, int phdrs,
1691 			  struct elf_note_info *info,
1692 			  const siginfo_t *siginfo, struct pt_regs *regs)
1693 {
1694 	struct task_struct *dump_task = current;
1695 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1696 	struct elf_thread_core_info *t;
1697 	struct elf_prpsinfo *psinfo;
1698 	struct core_thread *ct;
1699 	unsigned int i;
1700 
1701 	info->size = 0;
1702 	info->thread = NULL;
1703 
1704 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1705 	if (psinfo == NULL) {
1706 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1707 		return 0;
1708 	}
1709 
1710 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1711 
1712 	/*
1713 	 * Figure out how many notes we're going to need for each thread.
1714 	 */
1715 	info->thread_notes = 0;
1716 	for (i = 0; i < view->n; ++i)
1717 		if (view->regsets[i].core_note_type != 0)
1718 			++info->thread_notes;
1719 
1720 	/*
1721 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1722 	 * since it is our one special case.
1723 	 */
1724 	if (unlikely(info->thread_notes == 0) ||
1725 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1726 		WARN_ON(1);
1727 		return 0;
1728 	}
1729 
1730 	/*
1731 	 * Initialize the ELF file header.
1732 	 */
1733 	fill_elf_header(elf, phdrs,
1734 			view->e_machine, view->e_flags);
1735 
1736 	/*
1737 	 * Allocate a structure for each thread.
1738 	 */
1739 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1740 		t = kzalloc(offsetof(struct elf_thread_core_info,
1741 				     notes[info->thread_notes]),
1742 			    GFP_KERNEL);
1743 		if (unlikely(!t))
1744 			return 0;
1745 
1746 		t->task = ct->task;
1747 		if (ct->task == dump_task || !info->thread) {
1748 			t->next = info->thread;
1749 			info->thread = t;
1750 		} else {
1751 			/*
1752 			 * Make sure to keep the original task at
1753 			 * the head of the list.
1754 			 */
1755 			t->next = info->thread->next;
1756 			info->thread->next = t;
1757 		}
1758 	}
1759 
1760 	/*
1761 	 * Now fill in each thread's information.
1762 	 */
1763 	for (t = info->thread; t != NULL; t = t->next)
1764 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1765 			return 0;
1766 
1767 	/*
1768 	 * Fill in the two process-wide notes.
1769 	 */
1770 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1771 	info->size += notesize(&info->psinfo);
1772 
1773 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1774 	info->size += notesize(&info->signote);
1775 
1776 	fill_auxv_note(&info->auxv, current->mm);
1777 	info->size += notesize(&info->auxv);
1778 
1779 	if (fill_files_note(&info->files) == 0)
1780 		info->size += notesize(&info->files);
1781 
1782 	return 1;
1783 }
1784 
1785 static size_t get_note_info_size(struct elf_note_info *info)
1786 {
1787 	return info->size;
1788 }
1789 
1790 /*
1791  * Write all the notes for each thread.  When writing the first thread, the
1792  * process-wide notes are interleaved after the first thread-specific note.
1793  */
1794 static int write_note_info(struct elf_note_info *info,
1795 			   struct coredump_params *cprm)
1796 {
1797 	bool first = true;
1798 	struct elf_thread_core_info *t = info->thread;
1799 
1800 	do {
1801 		int i;
1802 
1803 		if (!writenote(&t->notes[0], cprm))
1804 			return 0;
1805 
1806 		if (first && !writenote(&info->psinfo, cprm))
1807 			return 0;
1808 		if (first && !writenote(&info->signote, cprm))
1809 			return 0;
1810 		if (first && !writenote(&info->auxv, cprm))
1811 			return 0;
1812 		if (first && info->files.data &&
1813 				!writenote(&info->files, cprm))
1814 			return 0;
1815 
1816 		for (i = 1; i < info->thread_notes; ++i)
1817 			if (t->notes[i].data &&
1818 			    !writenote(&t->notes[i], cprm))
1819 				return 0;
1820 
1821 		first = false;
1822 		t = t->next;
1823 	} while (t);
1824 
1825 	return 1;
1826 }
1827 
1828 static void free_note_info(struct elf_note_info *info)
1829 {
1830 	struct elf_thread_core_info *threads = info->thread;
1831 	while (threads) {
1832 		unsigned int i;
1833 		struct elf_thread_core_info *t = threads;
1834 		threads = t->next;
1835 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1836 		for (i = 1; i < info->thread_notes; ++i)
1837 			kfree(t->notes[i].data);
1838 		kfree(t);
1839 	}
1840 	kfree(info->psinfo.data);
1841 	vfree(info->files.data);
1842 }
1843 
1844 #else
1845 
1846 /* Here is the structure in which status of each thread is captured. */
1847 struct elf_thread_status
1848 {
1849 	struct list_head list;
1850 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1851 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1852 	struct task_struct *thread;
1853 #ifdef ELF_CORE_COPY_XFPREGS
1854 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1855 #endif
1856 	struct memelfnote notes[3];
1857 	int num_notes;
1858 };
1859 
1860 /*
1861  * In order to add the specific thread information for the elf file format,
1862  * we need to keep a linked list of every threads pr_status and then create
1863  * a single section for them in the final core file.
1864  */
1865 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1866 {
1867 	int sz = 0;
1868 	struct task_struct *p = t->thread;
1869 	t->num_notes = 0;
1870 
1871 	fill_prstatus(&t->prstatus, p, signr);
1872 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1873 
1874 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1875 		  &(t->prstatus));
1876 	t->num_notes++;
1877 	sz += notesize(&t->notes[0]);
1878 
1879 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1880 								&t->fpu))) {
1881 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1882 			  &(t->fpu));
1883 		t->num_notes++;
1884 		sz += notesize(&t->notes[1]);
1885 	}
1886 
1887 #ifdef ELF_CORE_COPY_XFPREGS
1888 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1889 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1890 			  sizeof(t->xfpu), &t->xfpu);
1891 		t->num_notes++;
1892 		sz += notesize(&t->notes[2]);
1893 	}
1894 #endif
1895 	return sz;
1896 }
1897 
1898 struct elf_note_info {
1899 	struct memelfnote *notes;
1900 	struct memelfnote *notes_files;
1901 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1902 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1903 	struct list_head thread_list;
1904 	elf_fpregset_t *fpu;
1905 #ifdef ELF_CORE_COPY_XFPREGS
1906 	elf_fpxregset_t *xfpu;
1907 #endif
1908 	user_siginfo_t csigdata;
1909 	int thread_status_size;
1910 	int numnote;
1911 };
1912 
1913 static int elf_note_info_init(struct elf_note_info *info)
1914 {
1915 	memset(info, 0, sizeof(*info));
1916 	INIT_LIST_HEAD(&info->thread_list);
1917 
1918 	/* Allocate space for ELF notes */
1919 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1920 	if (!info->notes)
1921 		return 0;
1922 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1923 	if (!info->psinfo)
1924 		return 0;
1925 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1926 	if (!info->prstatus)
1927 		return 0;
1928 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1929 	if (!info->fpu)
1930 		return 0;
1931 #ifdef ELF_CORE_COPY_XFPREGS
1932 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1933 	if (!info->xfpu)
1934 		return 0;
1935 #endif
1936 	return 1;
1937 }
1938 
1939 static int fill_note_info(struct elfhdr *elf, int phdrs,
1940 			  struct elf_note_info *info,
1941 			  const siginfo_t *siginfo, struct pt_regs *regs)
1942 {
1943 	struct list_head *t;
1944 	struct core_thread *ct;
1945 	struct elf_thread_status *ets;
1946 
1947 	if (!elf_note_info_init(info))
1948 		return 0;
1949 
1950 	for (ct = current->mm->core_state->dumper.next;
1951 					ct; ct = ct->next) {
1952 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1953 		if (!ets)
1954 			return 0;
1955 
1956 		ets->thread = ct->task;
1957 		list_add(&ets->list, &info->thread_list);
1958 	}
1959 
1960 	list_for_each(t, &info->thread_list) {
1961 		int sz;
1962 
1963 		ets = list_entry(t, struct elf_thread_status, list);
1964 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1965 		info->thread_status_size += sz;
1966 	}
1967 	/* now collect the dump for the current */
1968 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1969 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1970 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1971 
1972 	/* Set up header */
1973 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1974 
1975 	/*
1976 	 * Set up the notes in similar form to SVR4 core dumps made
1977 	 * with info from their /proc.
1978 	 */
1979 
1980 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1981 		  sizeof(*info->prstatus), info->prstatus);
1982 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1983 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1984 		  sizeof(*info->psinfo), info->psinfo);
1985 
1986 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1987 	fill_auxv_note(info->notes + 3, current->mm);
1988 	info->numnote = 4;
1989 
1990 	if (fill_files_note(info->notes + info->numnote) == 0) {
1991 		info->notes_files = info->notes + info->numnote;
1992 		info->numnote++;
1993 	}
1994 
1995 	/* Try to dump the FPU. */
1996 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1997 							       info->fpu);
1998 	if (info->prstatus->pr_fpvalid)
1999 		fill_note(info->notes + info->numnote++,
2000 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2001 #ifdef ELF_CORE_COPY_XFPREGS
2002 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2003 		fill_note(info->notes + info->numnote++,
2004 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2005 			  sizeof(*info->xfpu), info->xfpu);
2006 #endif
2007 
2008 	return 1;
2009 }
2010 
2011 static size_t get_note_info_size(struct elf_note_info *info)
2012 {
2013 	int sz = 0;
2014 	int i;
2015 
2016 	for (i = 0; i < info->numnote; i++)
2017 		sz += notesize(info->notes + i);
2018 
2019 	sz += info->thread_status_size;
2020 
2021 	return sz;
2022 }
2023 
2024 static int write_note_info(struct elf_note_info *info,
2025 			   struct coredump_params *cprm)
2026 {
2027 	int i;
2028 	struct list_head *t;
2029 
2030 	for (i = 0; i < info->numnote; i++)
2031 		if (!writenote(info->notes + i, cprm))
2032 			return 0;
2033 
2034 	/* write out the thread status notes section */
2035 	list_for_each(t, &info->thread_list) {
2036 		struct elf_thread_status *tmp =
2037 				list_entry(t, struct elf_thread_status, list);
2038 
2039 		for (i = 0; i < tmp->num_notes; i++)
2040 			if (!writenote(&tmp->notes[i], cprm))
2041 				return 0;
2042 	}
2043 
2044 	return 1;
2045 }
2046 
2047 static void free_note_info(struct elf_note_info *info)
2048 {
2049 	while (!list_empty(&info->thread_list)) {
2050 		struct list_head *tmp = info->thread_list.next;
2051 		list_del(tmp);
2052 		kfree(list_entry(tmp, struct elf_thread_status, list));
2053 	}
2054 
2055 	/* Free data possibly allocated by fill_files_note(): */
2056 	if (info->notes_files)
2057 		vfree(info->notes_files->data);
2058 
2059 	kfree(info->prstatus);
2060 	kfree(info->psinfo);
2061 	kfree(info->notes);
2062 	kfree(info->fpu);
2063 #ifdef ELF_CORE_COPY_XFPREGS
2064 	kfree(info->xfpu);
2065 #endif
2066 }
2067 
2068 #endif
2069 
2070 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2071 					struct vm_area_struct *gate_vma)
2072 {
2073 	struct vm_area_struct *ret = tsk->mm->mmap;
2074 
2075 	if (ret)
2076 		return ret;
2077 	return gate_vma;
2078 }
2079 /*
2080  * Helper function for iterating across a vma list.  It ensures that the caller
2081  * will visit `gate_vma' prior to terminating the search.
2082  */
2083 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2084 					struct vm_area_struct *gate_vma)
2085 {
2086 	struct vm_area_struct *ret;
2087 
2088 	ret = this_vma->vm_next;
2089 	if (ret)
2090 		return ret;
2091 	if (this_vma == gate_vma)
2092 		return NULL;
2093 	return gate_vma;
2094 }
2095 
2096 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2097 			     elf_addr_t e_shoff, int segs)
2098 {
2099 	elf->e_shoff = e_shoff;
2100 	elf->e_shentsize = sizeof(*shdr4extnum);
2101 	elf->e_shnum = 1;
2102 	elf->e_shstrndx = SHN_UNDEF;
2103 
2104 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2105 
2106 	shdr4extnum->sh_type = SHT_NULL;
2107 	shdr4extnum->sh_size = elf->e_shnum;
2108 	shdr4extnum->sh_link = elf->e_shstrndx;
2109 	shdr4extnum->sh_info = segs;
2110 }
2111 
2112 /*
2113  * Actual dumper
2114  *
2115  * This is a two-pass process; first we find the offsets of the bits,
2116  * and then they are actually written out.  If we run out of core limit
2117  * we just truncate.
2118  */
2119 static int elf_core_dump(struct coredump_params *cprm)
2120 {
2121 	int has_dumped = 0;
2122 	mm_segment_t fs;
2123 	int segs, i;
2124 	size_t vma_data_size = 0;
2125 	struct vm_area_struct *vma, *gate_vma;
2126 	struct elfhdr *elf = NULL;
2127 	loff_t offset = 0, dataoff;
2128 	struct elf_note_info info = { };
2129 	struct elf_phdr *phdr4note = NULL;
2130 	struct elf_shdr *shdr4extnum = NULL;
2131 	Elf_Half e_phnum;
2132 	elf_addr_t e_shoff;
2133 	elf_addr_t *vma_filesz = NULL;
2134 
2135 	/*
2136 	 * We no longer stop all VM operations.
2137 	 *
2138 	 * This is because those proceses that could possibly change map_count
2139 	 * or the mmap / vma pages are now blocked in do_exit on current
2140 	 * finishing this core dump.
2141 	 *
2142 	 * Only ptrace can touch these memory addresses, but it doesn't change
2143 	 * the map_count or the pages allocated. So no possibility of crashing
2144 	 * exists while dumping the mm->vm_next areas to the core file.
2145 	 */
2146 
2147 	/* alloc memory for large data structures: too large to be on stack */
2148 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2149 	if (!elf)
2150 		goto out;
2151 	/*
2152 	 * The number of segs are recored into ELF header as 16bit value.
2153 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2154 	 */
2155 	segs = current->mm->map_count;
2156 	segs += elf_core_extra_phdrs();
2157 
2158 	gate_vma = get_gate_vma(current->mm);
2159 	if (gate_vma != NULL)
2160 		segs++;
2161 
2162 	/* for notes section */
2163 	segs++;
2164 
2165 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2166 	 * this, kernel supports extended numbering. Have a look at
2167 	 * include/linux/elf.h for further information. */
2168 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2169 
2170 	/*
2171 	 * Collect all the non-memory information about the process for the
2172 	 * notes.  This also sets up the file header.
2173 	 */
2174 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2175 		goto cleanup;
2176 
2177 	has_dumped = 1;
2178 
2179 	fs = get_fs();
2180 	set_fs(KERNEL_DS);
2181 
2182 	offset += sizeof(*elf);				/* Elf header */
2183 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2184 
2185 	/* Write notes phdr entry */
2186 	{
2187 		size_t sz = get_note_info_size(&info);
2188 
2189 		sz += elf_coredump_extra_notes_size();
2190 
2191 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2192 		if (!phdr4note)
2193 			goto end_coredump;
2194 
2195 		fill_elf_note_phdr(phdr4note, sz, offset);
2196 		offset += sz;
2197 	}
2198 
2199 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2200 
2201 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2202 	if (!vma_filesz)
2203 		goto end_coredump;
2204 
2205 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2206 			vma = next_vma(vma, gate_vma)) {
2207 		unsigned long dump_size;
2208 
2209 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2210 		vma_filesz[i++] = dump_size;
2211 		vma_data_size += dump_size;
2212 	}
2213 
2214 	offset += vma_data_size;
2215 	offset += elf_core_extra_data_size();
2216 	e_shoff = offset;
2217 
2218 	if (e_phnum == PN_XNUM) {
2219 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2220 		if (!shdr4extnum)
2221 			goto end_coredump;
2222 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2223 	}
2224 
2225 	offset = dataoff;
2226 
2227 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2228 		goto end_coredump;
2229 
2230 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2231 		goto end_coredump;
2232 
2233 	/* Write program headers for segments dump */
2234 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2235 			vma = next_vma(vma, gate_vma)) {
2236 		struct elf_phdr phdr;
2237 
2238 		phdr.p_type = PT_LOAD;
2239 		phdr.p_offset = offset;
2240 		phdr.p_vaddr = vma->vm_start;
2241 		phdr.p_paddr = 0;
2242 		phdr.p_filesz = vma_filesz[i++];
2243 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2244 		offset += phdr.p_filesz;
2245 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2246 		if (vma->vm_flags & VM_WRITE)
2247 			phdr.p_flags |= PF_W;
2248 		if (vma->vm_flags & VM_EXEC)
2249 			phdr.p_flags |= PF_X;
2250 		phdr.p_align = ELF_EXEC_PAGESIZE;
2251 
2252 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2253 			goto end_coredump;
2254 	}
2255 
2256 	if (!elf_core_write_extra_phdrs(cprm, offset))
2257 		goto end_coredump;
2258 
2259  	/* write out the notes section */
2260 	if (!write_note_info(&info, cprm))
2261 		goto end_coredump;
2262 
2263 	if (elf_coredump_extra_notes_write(cprm))
2264 		goto end_coredump;
2265 
2266 	/* Align to page */
2267 	if (!dump_skip(cprm, dataoff - cprm->written))
2268 		goto end_coredump;
2269 
2270 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2271 			vma = next_vma(vma, gate_vma)) {
2272 		unsigned long addr;
2273 		unsigned long end;
2274 
2275 		end = vma->vm_start + vma_filesz[i++];
2276 
2277 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2278 			struct page *page;
2279 			int stop;
2280 
2281 			page = get_dump_page(addr);
2282 			if (page) {
2283 				void *kaddr = kmap(page);
2284 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2285 				kunmap(page);
2286 				page_cache_release(page);
2287 			} else
2288 				stop = !dump_skip(cprm, PAGE_SIZE);
2289 			if (stop)
2290 				goto end_coredump;
2291 		}
2292 	}
2293 
2294 	if (!elf_core_write_extra_data(cprm))
2295 		goto end_coredump;
2296 
2297 	if (e_phnum == PN_XNUM) {
2298 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2299 			goto end_coredump;
2300 	}
2301 
2302 end_coredump:
2303 	set_fs(fs);
2304 
2305 cleanup:
2306 	free_note_info(&info);
2307 	kfree(shdr4extnum);
2308 	kfree(vma_filesz);
2309 	kfree(phdr4note);
2310 	kfree(elf);
2311 out:
2312 	return has_dumped;
2313 }
2314 
2315 #endif		/* CONFIG_ELF_CORE */
2316 
2317 static int __init init_elf_binfmt(void)
2318 {
2319 	register_binfmt(&elf_format);
2320 	return 0;
2321 }
2322 
2323 static void __exit exit_elf_binfmt(void)
2324 {
2325 	/* Remove the COFF and ELF loaders. */
2326 	unregister_binfmt(&elf_format);
2327 }
2328 
2329 core_initcall(init_elf_binfmt);
2330 module_exit(exit_elf_binfmt);
2331 MODULE_LICENSE("GPL");
2332