xref: /openbmc/linux/fs/binfmt_elf.c (revision cc8bbe1a)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42 
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49 
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 				int, int, unsigned long);
53 
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59 
60 /*
61  * If we don't support core dumping, then supply a NULL so we
62  * don't even try.
63  */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump	NULL
68 #endif
69 
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN	PAGE_SIZE
74 #endif
75 
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS	0
78 #endif
79 
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 
84 static struct linux_binfmt elf_format = {
85 	.module		= THIS_MODULE,
86 	.load_binary	= load_elf_binary,
87 	.load_shlib	= load_elf_library,
88 	.core_dump	= elf_core_dump,
89 	.min_coredump	= ELF_EXEC_PAGESIZE,
90 };
91 
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 
94 static int set_brk(unsigned long start, unsigned long end)
95 {
96 	start = ELF_PAGEALIGN(start);
97 	end = ELF_PAGEALIGN(end);
98 	if (end > start) {
99 		unsigned long addr;
100 		addr = vm_brk(start, end - start);
101 		if (BAD_ADDR(addr))
102 			return addr;
103 	}
104 	current->mm->start_brk = current->mm->brk = end;
105 	return 0;
106 }
107 
108 /* We need to explicitly zero any fractional pages
109    after the data section (i.e. bss).  This would
110    contain the junk from the file that should not
111    be in memory
112  */
113 static int padzero(unsigned long elf_bss)
114 {
115 	unsigned long nbyte;
116 
117 	nbyte = ELF_PAGEOFFSET(elf_bss);
118 	if (nbyte) {
119 		nbyte = ELF_MIN_ALIGN - nbyte;
120 		if (clear_user((void __user *) elf_bss, nbyte))
121 			return -EFAULT;
122 	}
123 	return 0;
124 }
125 
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 	old_sp; })
134 #else
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 	(((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #endif
140 
141 #ifndef ELF_BASE_PLATFORM
142 /*
143  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145  * will be copied to the user stack in the same manner as AT_PLATFORM.
146  */
147 #define ELF_BASE_PLATFORM NULL
148 #endif
149 
150 static int
151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 		unsigned long load_addr, unsigned long interp_load_addr)
153 {
154 	unsigned long p = bprm->p;
155 	int argc = bprm->argc;
156 	int envc = bprm->envc;
157 	elf_addr_t __user *argv;
158 	elf_addr_t __user *envp;
159 	elf_addr_t __user *sp;
160 	elf_addr_t __user *u_platform;
161 	elf_addr_t __user *u_base_platform;
162 	elf_addr_t __user *u_rand_bytes;
163 	const char *k_platform = ELF_PLATFORM;
164 	const char *k_base_platform = ELF_BASE_PLATFORM;
165 	unsigned char k_rand_bytes[16];
166 	int items;
167 	elf_addr_t *elf_info;
168 	int ei_index = 0;
169 	const struct cred *cred = current_cred();
170 	struct vm_area_struct *vma;
171 
172 	/*
173 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 	 * evictions by the processes running on the same package. One
175 	 * thing we can do is to shuffle the initial stack for them.
176 	 */
177 
178 	p = arch_align_stack(p);
179 
180 	/*
181 	 * If this architecture has a platform capability string, copy it
182 	 * to userspace.  In some cases (Sparc), this info is impossible
183 	 * for userspace to get any other way, in others (i386) it is
184 	 * merely difficult.
185 	 */
186 	u_platform = NULL;
187 	if (k_platform) {
188 		size_t len = strlen(k_platform) + 1;
189 
190 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 		if (__copy_to_user(u_platform, k_platform, len))
192 			return -EFAULT;
193 	}
194 
195 	/*
196 	 * If this architecture has a "base" platform capability
197 	 * string, copy it to userspace.
198 	 */
199 	u_base_platform = NULL;
200 	if (k_base_platform) {
201 		size_t len = strlen(k_base_platform) + 1;
202 
203 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 		if (__copy_to_user(u_base_platform, k_base_platform, len))
205 			return -EFAULT;
206 	}
207 
208 	/*
209 	 * Generate 16 random bytes for userspace PRNG seeding.
210 	 */
211 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 	u_rand_bytes = (elf_addr_t __user *)
213 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
214 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 		return -EFAULT;
216 
217 	/* Create the ELF interpreter info */
218 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
221 	do { \
222 		elf_info[ei_index++] = id; \
223 		elf_info[ei_index++] = val; \
224 	} while (0)
225 
226 #ifdef ARCH_DLINFO
227 	/*
228 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 	 * AUXV.
230 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 	 * ARCH_DLINFO changes
232 	 */
233 	ARCH_DLINFO;
234 #endif
235 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 	NEW_AUX_ENT(AT_FLAGS, 0);
243 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 #ifdef ELF_HWCAP2
251 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 #endif
253 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 	if (k_platform) {
255 		NEW_AUX_ENT(AT_PLATFORM,
256 			    (elf_addr_t)(unsigned long)u_platform);
257 	}
258 	if (k_base_platform) {
259 		NEW_AUX_ENT(AT_BASE_PLATFORM,
260 			    (elf_addr_t)(unsigned long)u_base_platform);
261 	}
262 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 	}
265 #undef NEW_AUX_ENT
266 	/* AT_NULL is zero; clear the rest too */
267 	memset(&elf_info[ei_index], 0,
268 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269 
270 	/* And advance past the AT_NULL entry.  */
271 	ei_index += 2;
272 
273 	sp = STACK_ADD(p, ei_index);
274 
275 	items = (argc + 1) + (envc + 1) + 1;
276 	bprm->p = STACK_ROUND(sp, items);
277 
278 	/* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 #else
283 	sp = (elf_addr_t __user *)bprm->p;
284 #endif
285 
286 
287 	/*
288 	 * Grow the stack manually; some architectures have a limit on how
289 	 * far ahead a user-space access may be in order to grow the stack.
290 	 */
291 	vma = find_extend_vma(current->mm, bprm->p);
292 	if (!vma)
293 		return -EFAULT;
294 
295 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 	if (__put_user(argc, sp++))
297 		return -EFAULT;
298 	argv = sp;
299 	envp = argv + argc + 1;
300 
301 	/* Populate argv and envp */
302 	p = current->mm->arg_end = current->mm->arg_start;
303 	while (argc-- > 0) {
304 		size_t len;
305 		if (__put_user((elf_addr_t)p, argv++))
306 			return -EFAULT;
307 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 		if (!len || len > MAX_ARG_STRLEN)
309 			return -EINVAL;
310 		p += len;
311 	}
312 	if (__put_user(0, argv))
313 		return -EFAULT;
314 	current->mm->arg_end = current->mm->env_start = p;
315 	while (envc-- > 0) {
316 		size_t len;
317 		if (__put_user((elf_addr_t)p, envp++))
318 			return -EFAULT;
319 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 		if (!len || len > MAX_ARG_STRLEN)
321 			return -EINVAL;
322 		p += len;
323 	}
324 	if (__put_user(0, envp))
325 		return -EFAULT;
326 	current->mm->env_end = p;
327 
328 	/* Put the elf_info on the stack in the right place.  */
329 	sp = (elf_addr_t __user *)envp + 1;
330 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331 		return -EFAULT;
332 	return 0;
333 }
334 
335 #ifndef elf_map
336 
337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 		struct elf_phdr *eppnt, int prot, int type,
339 		unsigned long total_size)
340 {
341 	unsigned long map_addr;
342 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 	addr = ELF_PAGESTART(addr);
345 	size = ELF_PAGEALIGN(size);
346 
347 	/* mmap() will return -EINVAL if given a zero size, but a
348 	 * segment with zero filesize is perfectly valid */
349 	if (!size)
350 		return addr;
351 
352 	/*
353 	* total_size is the size of the ELF (interpreter) image.
354 	* The _first_ mmap needs to know the full size, otherwise
355 	* randomization might put this image into an overlapping
356 	* position with the ELF binary image. (since size < total_size)
357 	* So we first map the 'big' image - and unmap the remainder at
358 	* the end. (which unmap is needed for ELF images with holes.)
359 	*/
360 	if (total_size) {
361 		total_size = ELF_PAGEALIGN(total_size);
362 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 		if (!BAD_ADDR(map_addr))
364 			vm_munmap(map_addr+size, total_size-size);
365 	} else
366 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
367 
368 	return(map_addr);
369 }
370 
371 #endif /* !elf_map */
372 
373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 {
375 	int i, first_idx = -1, last_idx = -1;
376 
377 	for (i = 0; i < nr; i++) {
378 		if (cmds[i].p_type == PT_LOAD) {
379 			last_idx = i;
380 			if (first_idx == -1)
381 				first_idx = i;
382 		}
383 	}
384 	if (first_idx == -1)
385 		return 0;
386 
387 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
389 }
390 
391 /**
392  * load_elf_phdrs() - load ELF program headers
393  * @elf_ex:   ELF header of the binary whose program headers should be loaded
394  * @elf_file: the opened ELF binary file
395  *
396  * Loads ELF program headers from the binary file elf_file, which has the ELF
397  * header pointed to by elf_ex, into a newly allocated array. The caller is
398  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399  */
400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 				       struct file *elf_file)
402 {
403 	struct elf_phdr *elf_phdata = NULL;
404 	int retval, size, err = -1;
405 
406 	/*
407 	 * If the size of this structure has changed, then punt, since
408 	 * we will be doing the wrong thing.
409 	 */
410 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411 		goto out;
412 
413 	/* Sanity check the number of program headers... */
414 	if (elf_ex->e_phnum < 1 ||
415 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416 		goto out;
417 
418 	/* ...and their total size. */
419 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 	if (size > ELF_MIN_ALIGN)
421 		goto out;
422 
423 	elf_phdata = kmalloc(size, GFP_KERNEL);
424 	if (!elf_phdata)
425 		goto out;
426 
427 	/* Read in the program headers */
428 	retval = kernel_read(elf_file, elf_ex->e_phoff,
429 			     (char *)elf_phdata, size);
430 	if (retval != size) {
431 		err = (retval < 0) ? retval : -EIO;
432 		goto out;
433 	}
434 
435 	/* Success! */
436 	err = 0;
437 out:
438 	if (err) {
439 		kfree(elf_phdata);
440 		elf_phdata = NULL;
441 	}
442 	return elf_phdata;
443 }
444 
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446 
447 /**
448  * struct arch_elf_state - arch-specific ELF loading state
449  *
450  * This structure is used to preserve architecture specific data during
451  * the loading of an ELF file, throughout the checking of architecture
452  * specific ELF headers & through to the point where the ELF load is
453  * known to be proceeding (ie. SET_PERSONALITY).
454  *
455  * This implementation is a dummy for architectures which require no
456  * specific state.
457  */
458 struct arch_elf_state {
459 };
460 
461 #define INIT_ARCH_ELF_STATE {}
462 
463 /**
464  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465  * @ehdr:	The main ELF header
466  * @phdr:	The program header to check
467  * @elf:	The open ELF file
468  * @is_interp:	True if the phdr is from the interpreter of the ELF being
469  *		loaded, else false.
470  * @state:	Architecture-specific state preserved throughout the process
471  *		of loading the ELF.
472  *
473  * Inspects the program header phdr to validate its correctness and/or
474  * suitability for the system. Called once per ELF program header in the
475  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476  * interpreter.
477  *
478  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479  *         with that return code.
480  */
481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 				   struct elf_phdr *phdr,
483 				   struct file *elf, bool is_interp,
484 				   struct arch_elf_state *state)
485 {
486 	/* Dummy implementation, always proceed */
487 	return 0;
488 }
489 
490 /**
491  * arch_check_elf() - check an ELF executable
492  * @ehdr:	The main ELF header
493  * @has_interp:	True if the ELF has an interpreter, else false.
494  * @interp_ehdr: The interpreter's ELF header
495  * @state:	Architecture-specific state preserved throughout the process
496  *		of loading the ELF.
497  *
498  * Provides a final opportunity for architecture code to reject the loading
499  * of the ELF & cause an exec syscall to return an error. This is called after
500  * all program headers to be checked by arch_elf_pt_proc have been.
501  *
502  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
503  *         with that return code.
504  */
505 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
506 				 struct elfhdr *interp_ehdr,
507 				 struct arch_elf_state *state)
508 {
509 	/* Dummy implementation, always proceed */
510 	return 0;
511 }
512 
513 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
514 
515 /* This is much more generalized than the library routine read function,
516    so we keep this separate.  Technically the library read function
517    is only provided so that we can read a.out libraries that have
518    an ELF header */
519 
520 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
521 		struct file *interpreter, unsigned long *interp_map_addr,
522 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
523 {
524 	struct elf_phdr *eppnt;
525 	unsigned long load_addr = 0;
526 	int load_addr_set = 0;
527 	unsigned long last_bss = 0, elf_bss = 0;
528 	unsigned long error = ~0UL;
529 	unsigned long total_size;
530 	int i;
531 
532 	/* First of all, some simple consistency checks */
533 	if (interp_elf_ex->e_type != ET_EXEC &&
534 	    interp_elf_ex->e_type != ET_DYN)
535 		goto out;
536 	if (!elf_check_arch(interp_elf_ex))
537 		goto out;
538 	if (!interpreter->f_op->mmap)
539 		goto out;
540 
541 	total_size = total_mapping_size(interp_elf_phdata,
542 					interp_elf_ex->e_phnum);
543 	if (!total_size) {
544 		error = -EINVAL;
545 		goto out;
546 	}
547 
548 	eppnt = interp_elf_phdata;
549 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
550 		if (eppnt->p_type == PT_LOAD) {
551 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
552 			int elf_prot = 0;
553 			unsigned long vaddr = 0;
554 			unsigned long k, map_addr;
555 
556 			if (eppnt->p_flags & PF_R)
557 		    		elf_prot = PROT_READ;
558 			if (eppnt->p_flags & PF_W)
559 				elf_prot |= PROT_WRITE;
560 			if (eppnt->p_flags & PF_X)
561 				elf_prot |= PROT_EXEC;
562 			vaddr = eppnt->p_vaddr;
563 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
564 				elf_type |= MAP_FIXED;
565 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
566 				load_addr = -vaddr;
567 
568 			map_addr = elf_map(interpreter, load_addr + vaddr,
569 					eppnt, elf_prot, elf_type, total_size);
570 			total_size = 0;
571 			if (!*interp_map_addr)
572 				*interp_map_addr = map_addr;
573 			error = map_addr;
574 			if (BAD_ADDR(map_addr))
575 				goto out;
576 
577 			if (!load_addr_set &&
578 			    interp_elf_ex->e_type == ET_DYN) {
579 				load_addr = map_addr - ELF_PAGESTART(vaddr);
580 				load_addr_set = 1;
581 			}
582 
583 			/*
584 			 * Check to see if the section's size will overflow the
585 			 * allowed task size. Note that p_filesz must always be
586 			 * <= p_memsize so it's only necessary to check p_memsz.
587 			 */
588 			k = load_addr + eppnt->p_vaddr;
589 			if (BAD_ADDR(k) ||
590 			    eppnt->p_filesz > eppnt->p_memsz ||
591 			    eppnt->p_memsz > TASK_SIZE ||
592 			    TASK_SIZE - eppnt->p_memsz < k) {
593 				error = -ENOMEM;
594 				goto out;
595 			}
596 
597 			/*
598 			 * Find the end of the file mapping for this phdr, and
599 			 * keep track of the largest address we see for this.
600 			 */
601 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
602 			if (k > elf_bss)
603 				elf_bss = k;
604 
605 			/*
606 			 * Do the same thing for the memory mapping - between
607 			 * elf_bss and last_bss is the bss section.
608 			 */
609 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
610 			if (k > last_bss)
611 				last_bss = k;
612 		}
613 	}
614 
615 	if (last_bss > elf_bss) {
616 		/*
617 		 * Now fill out the bss section.  First pad the last page up
618 		 * to the page boundary, and then perform a mmap to make sure
619 		 * that there are zero-mapped pages up to and including the
620 		 * last bss page.
621 		 */
622 		if (padzero(elf_bss)) {
623 			error = -EFAULT;
624 			goto out;
625 		}
626 
627 		/* What we have mapped so far */
628 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
629 
630 		/* Map the last of the bss segment */
631 		error = vm_brk(elf_bss, last_bss - elf_bss);
632 		if (BAD_ADDR(error))
633 			goto out;
634 	}
635 
636 	error = load_addr;
637 out:
638 	return error;
639 }
640 
641 /*
642  * These are the functions used to load ELF style executables and shared
643  * libraries.  There is no binary dependent code anywhere else.
644  */
645 
646 #ifndef STACK_RND_MASK
647 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
648 #endif
649 
650 static unsigned long randomize_stack_top(unsigned long stack_top)
651 {
652 	unsigned long random_variable = 0;
653 
654 	if ((current->flags & PF_RANDOMIZE) &&
655 		!(current->personality & ADDR_NO_RANDOMIZE)) {
656 		random_variable = (unsigned long) get_random_int();
657 		random_variable &= STACK_RND_MASK;
658 		random_variable <<= PAGE_SHIFT;
659 	}
660 #ifdef CONFIG_STACK_GROWSUP
661 	return PAGE_ALIGN(stack_top) + random_variable;
662 #else
663 	return PAGE_ALIGN(stack_top) - random_variable;
664 #endif
665 }
666 
667 static int load_elf_binary(struct linux_binprm *bprm)
668 {
669 	struct file *interpreter = NULL; /* to shut gcc up */
670  	unsigned long load_addr = 0, load_bias = 0;
671 	int load_addr_set = 0;
672 	char * elf_interpreter = NULL;
673 	unsigned long error;
674 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
675 	unsigned long elf_bss, elf_brk;
676 	int retval, i;
677 	unsigned long elf_entry;
678 	unsigned long interp_load_addr = 0;
679 	unsigned long start_code, end_code, start_data, end_data;
680 	unsigned long reloc_func_desc __maybe_unused = 0;
681 	int executable_stack = EXSTACK_DEFAULT;
682 	struct pt_regs *regs = current_pt_regs();
683 	struct {
684 		struct elfhdr elf_ex;
685 		struct elfhdr interp_elf_ex;
686 	} *loc;
687 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
688 
689 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
690 	if (!loc) {
691 		retval = -ENOMEM;
692 		goto out_ret;
693 	}
694 
695 	/* Get the exec-header */
696 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
697 
698 	retval = -ENOEXEC;
699 	/* First of all, some simple consistency checks */
700 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
701 		goto out;
702 
703 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
704 		goto out;
705 	if (!elf_check_arch(&loc->elf_ex))
706 		goto out;
707 	if (!bprm->file->f_op->mmap)
708 		goto out;
709 
710 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
711 	if (!elf_phdata)
712 		goto out;
713 
714 	elf_ppnt = elf_phdata;
715 	elf_bss = 0;
716 	elf_brk = 0;
717 
718 	start_code = ~0UL;
719 	end_code = 0;
720 	start_data = 0;
721 	end_data = 0;
722 
723 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
724 		if (elf_ppnt->p_type == PT_INTERP) {
725 			/* This is the program interpreter used for
726 			 * shared libraries - for now assume that this
727 			 * is an a.out format binary
728 			 */
729 			retval = -ENOEXEC;
730 			if (elf_ppnt->p_filesz > PATH_MAX ||
731 			    elf_ppnt->p_filesz < 2)
732 				goto out_free_ph;
733 
734 			retval = -ENOMEM;
735 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
736 						  GFP_KERNEL);
737 			if (!elf_interpreter)
738 				goto out_free_ph;
739 
740 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
741 					     elf_interpreter,
742 					     elf_ppnt->p_filesz);
743 			if (retval != elf_ppnt->p_filesz) {
744 				if (retval >= 0)
745 					retval = -EIO;
746 				goto out_free_interp;
747 			}
748 			/* make sure path is NULL terminated */
749 			retval = -ENOEXEC;
750 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
751 				goto out_free_interp;
752 
753 			interpreter = open_exec(elf_interpreter);
754 			retval = PTR_ERR(interpreter);
755 			if (IS_ERR(interpreter))
756 				goto out_free_interp;
757 
758 			/*
759 			 * If the binary is not readable then enforce
760 			 * mm->dumpable = 0 regardless of the interpreter's
761 			 * permissions.
762 			 */
763 			would_dump(bprm, interpreter);
764 
765 			/* Get the exec headers */
766 			retval = kernel_read(interpreter, 0,
767 					     (void *)&loc->interp_elf_ex,
768 					     sizeof(loc->interp_elf_ex));
769 			if (retval != sizeof(loc->interp_elf_ex)) {
770 				if (retval >= 0)
771 					retval = -EIO;
772 				goto out_free_dentry;
773 			}
774 
775 			break;
776 		}
777 		elf_ppnt++;
778 	}
779 
780 	elf_ppnt = elf_phdata;
781 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
782 		switch (elf_ppnt->p_type) {
783 		case PT_GNU_STACK:
784 			if (elf_ppnt->p_flags & PF_X)
785 				executable_stack = EXSTACK_ENABLE_X;
786 			else
787 				executable_stack = EXSTACK_DISABLE_X;
788 			break;
789 
790 		case PT_LOPROC ... PT_HIPROC:
791 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
792 						  bprm->file, false,
793 						  &arch_state);
794 			if (retval)
795 				goto out_free_dentry;
796 			break;
797 		}
798 
799 	/* Some simple consistency checks for the interpreter */
800 	if (elf_interpreter) {
801 		retval = -ELIBBAD;
802 		/* Not an ELF interpreter */
803 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
804 			goto out_free_dentry;
805 		/* Verify the interpreter has a valid arch */
806 		if (!elf_check_arch(&loc->interp_elf_ex))
807 			goto out_free_dentry;
808 
809 		/* Load the interpreter program headers */
810 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
811 						   interpreter);
812 		if (!interp_elf_phdata)
813 			goto out_free_dentry;
814 
815 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
816 		elf_ppnt = interp_elf_phdata;
817 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
818 			switch (elf_ppnt->p_type) {
819 			case PT_LOPROC ... PT_HIPROC:
820 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
821 							  elf_ppnt, interpreter,
822 							  true, &arch_state);
823 				if (retval)
824 					goto out_free_dentry;
825 				break;
826 			}
827 	}
828 
829 	/*
830 	 * Allow arch code to reject the ELF at this point, whilst it's
831 	 * still possible to return an error to the code that invoked
832 	 * the exec syscall.
833 	 */
834 	retval = arch_check_elf(&loc->elf_ex,
835 				!!interpreter, &loc->interp_elf_ex,
836 				&arch_state);
837 	if (retval)
838 		goto out_free_dentry;
839 
840 	/* Flush all traces of the currently running executable */
841 	retval = flush_old_exec(bprm);
842 	if (retval)
843 		goto out_free_dentry;
844 
845 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
846 	   may depend on the personality.  */
847 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
848 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
849 		current->personality |= READ_IMPLIES_EXEC;
850 
851 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
852 		current->flags |= PF_RANDOMIZE;
853 
854 	setup_new_exec(bprm);
855 
856 	/* Do this so that we can load the interpreter, if need be.  We will
857 	   change some of these later */
858 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
859 				 executable_stack);
860 	if (retval < 0)
861 		goto out_free_dentry;
862 
863 	current->mm->start_stack = bprm->p;
864 
865 	/* Now we do a little grungy work by mmapping the ELF image into
866 	   the correct location in memory. */
867 	for(i = 0, elf_ppnt = elf_phdata;
868 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
869 		int elf_prot = 0, elf_flags;
870 		unsigned long k, vaddr;
871 		unsigned long total_size = 0;
872 
873 		if (elf_ppnt->p_type != PT_LOAD)
874 			continue;
875 
876 		if (unlikely (elf_brk > elf_bss)) {
877 			unsigned long nbyte;
878 
879 			/* There was a PT_LOAD segment with p_memsz > p_filesz
880 			   before this one. Map anonymous pages, if needed,
881 			   and clear the area.  */
882 			retval = set_brk(elf_bss + load_bias,
883 					 elf_brk + load_bias);
884 			if (retval)
885 				goto out_free_dentry;
886 			nbyte = ELF_PAGEOFFSET(elf_bss);
887 			if (nbyte) {
888 				nbyte = ELF_MIN_ALIGN - nbyte;
889 				if (nbyte > elf_brk - elf_bss)
890 					nbyte = elf_brk - elf_bss;
891 				if (clear_user((void __user *)elf_bss +
892 							load_bias, nbyte)) {
893 					/*
894 					 * This bss-zeroing can fail if the ELF
895 					 * file specifies odd protections. So
896 					 * we don't check the return value
897 					 */
898 				}
899 			}
900 		}
901 
902 		if (elf_ppnt->p_flags & PF_R)
903 			elf_prot |= PROT_READ;
904 		if (elf_ppnt->p_flags & PF_W)
905 			elf_prot |= PROT_WRITE;
906 		if (elf_ppnt->p_flags & PF_X)
907 			elf_prot |= PROT_EXEC;
908 
909 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
910 
911 		vaddr = elf_ppnt->p_vaddr;
912 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
913 			elf_flags |= MAP_FIXED;
914 		} else if (loc->elf_ex.e_type == ET_DYN) {
915 			/* Try and get dynamic programs out of the way of the
916 			 * default mmap base, as well as whatever program they
917 			 * might try to exec.  This is because the brk will
918 			 * follow the loader, and is not movable.  */
919 			load_bias = ELF_ET_DYN_BASE - vaddr;
920 			if (current->flags & PF_RANDOMIZE)
921 				load_bias += arch_mmap_rnd();
922 			load_bias = ELF_PAGESTART(load_bias);
923 			total_size = total_mapping_size(elf_phdata,
924 							loc->elf_ex.e_phnum);
925 			if (!total_size) {
926 				retval = -EINVAL;
927 				goto out_free_dentry;
928 			}
929 		}
930 
931 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
932 				elf_prot, elf_flags, total_size);
933 		if (BAD_ADDR(error)) {
934 			retval = IS_ERR((void *)error) ?
935 				PTR_ERR((void*)error) : -EINVAL;
936 			goto out_free_dentry;
937 		}
938 
939 		if (!load_addr_set) {
940 			load_addr_set = 1;
941 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
942 			if (loc->elf_ex.e_type == ET_DYN) {
943 				load_bias += error -
944 				             ELF_PAGESTART(load_bias + vaddr);
945 				load_addr += load_bias;
946 				reloc_func_desc = load_bias;
947 			}
948 		}
949 		k = elf_ppnt->p_vaddr;
950 		if (k < start_code)
951 			start_code = k;
952 		if (start_data < k)
953 			start_data = k;
954 
955 		/*
956 		 * Check to see if the section's size will overflow the
957 		 * allowed task size. Note that p_filesz must always be
958 		 * <= p_memsz so it is only necessary to check p_memsz.
959 		 */
960 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
961 		    elf_ppnt->p_memsz > TASK_SIZE ||
962 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
963 			/* set_brk can never work. Avoid overflows. */
964 			retval = -EINVAL;
965 			goto out_free_dentry;
966 		}
967 
968 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
969 
970 		if (k > elf_bss)
971 			elf_bss = k;
972 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
973 			end_code = k;
974 		if (end_data < k)
975 			end_data = k;
976 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
977 		if (k > elf_brk)
978 			elf_brk = k;
979 	}
980 
981 	loc->elf_ex.e_entry += load_bias;
982 	elf_bss += load_bias;
983 	elf_brk += load_bias;
984 	start_code += load_bias;
985 	end_code += load_bias;
986 	start_data += load_bias;
987 	end_data += load_bias;
988 
989 	/* Calling set_brk effectively mmaps the pages that we need
990 	 * for the bss and break sections.  We must do this before
991 	 * mapping in the interpreter, to make sure it doesn't wind
992 	 * up getting placed where the bss needs to go.
993 	 */
994 	retval = set_brk(elf_bss, elf_brk);
995 	if (retval)
996 		goto out_free_dentry;
997 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
998 		retval = -EFAULT; /* Nobody gets to see this, but.. */
999 		goto out_free_dentry;
1000 	}
1001 
1002 	if (elf_interpreter) {
1003 		unsigned long interp_map_addr = 0;
1004 
1005 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1006 					    interpreter,
1007 					    &interp_map_addr,
1008 					    load_bias, interp_elf_phdata);
1009 		if (!IS_ERR((void *)elf_entry)) {
1010 			/*
1011 			 * load_elf_interp() returns relocation
1012 			 * adjustment
1013 			 */
1014 			interp_load_addr = elf_entry;
1015 			elf_entry += loc->interp_elf_ex.e_entry;
1016 		}
1017 		if (BAD_ADDR(elf_entry)) {
1018 			retval = IS_ERR((void *)elf_entry) ?
1019 					(int)elf_entry : -EINVAL;
1020 			goto out_free_dentry;
1021 		}
1022 		reloc_func_desc = interp_load_addr;
1023 
1024 		allow_write_access(interpreter);
1025 		fput(interpreter);
1026 		kfree(elf_interpreter);
1027 	} else {
1028 		elf_entry = loc->elf_ex.e_entry;
1029 		if (BAD_ADDR(elf_entry)) {
1030 			retval = -EINVAL;
1031 			goto out_free_dentry;
1032 		}
1033 	}
1034 
1035 	kfree(interp_elf_phdata);
1036 	kfree(elf_phdata);
1037 
1038 	set_binfmt(&elf_format);
1039 
1040 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1041 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1042 	if (retval < 0)
1043 		goto out;
1044 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1045 
1046 	install_exec_creds(bprm);
1047 	retval = create_elf_tables(bprm, &loc->elf_ex,
1048 			  load_addr, interp_load_addr);
1049 	if (retval < 0)
1050 		goto out;
1051 	/* N.B. passed_fileno might not be initialized? */
1052 	current->mm->end_code = end_code;
1053 	current->mm->start_code = start_code;
1054 	current->mm->start_data = start_data;
1055 	current->mm->end_data = end_data;
1056 	current->mm->start_stack = bprm->p;
1057 
1058 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1059 		current->mm->brk = current->mm->start_brk =
1060 			arch_randomize_brk(current->mm);
1061 #ifdef compat_brk_randomized
1062 		current->brk_randomized = 1;
1063 #endif
1064 	}
1065 
1066 	if (current->personality & MMAP_PAGE_ZERO) {
1067 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1068 		   and some applications "depend" upon this behavior.
1069 		   Since we do not have the power to recompile these, we
1070 		   emulate the SVr4 behavior. Sigh. */
1071 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1072 				MAP_FIXED | MAP_PRIVATE, 0);
1073 	}
1074 
1075 #ifdef ELF_PLAT_INIT
1076 	/*
1077 	 * The ABI may specify that certain registers be set up in special
1078 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1079 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1080 	 * that the e_entry field is the address of the function descriptor
1081 	 * for the startup routine, rather than the address of the startup
1082 	 * routine itself.  This macro performs whatever initialization to
1083 	 * the regs structure is required as well as any relocations to the
1084 	 * function descriptor entries when executing dynamically links apps.
1085 	 */
1086 	ELF_PLAT_INIT(regs, reloc_func_desc);
1087 #endif
1088 
1089 	start_thread(regs, elf_entry, bprm->p);
1090 	retval = 0;
1091 out:
1092 	kfree(loc);
1093 out_ret:
1094 	return retval;
1095 
1096 	/* error cleanup */
1097 out_free_dentry:
1098 	kfree(interp_elf_phdata);
1099 	allow_write_access(interpreter);
1100 	if (interpreter)
1101 		fput(interpreter);
1102 out_free_interp:
1103 	kfree(elf_interpreter);
1104 out_free_ph:
1105 	kfree(elf_phdata);
1106 	goto out;
1107 }
1108 
1109 #ifdef CONFIG_USELIB
1110 /* This is really simpleminded and specialized - we are loading an
1111    a.out library that is given an ELF header. */
1112 static int load_elf_library(struct file *file)
1113 {
1114 	struct elf_phdr *elf_phdata;
1115 	struct elf_phdr *eppnt;
1116 	unsigned long elf_bss, bss, len;
1117 	int retval, error, i, j;
1118 	struct elfhdr elf_ex;
1119 
1120 	error = -ENOEXEC;
1121 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1122 	if (retval != sizeof(elf_ex))
1123 		goto out;
1124 
1125 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1126 		goto out;
1127 
1128 	/* First of all, some simple consistency checks */
1129 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1130 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1131 		goto out;
1132 
1133 	/* Now read in all of the header information */
1134 
1135 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1136 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1137 
1138 	error = -ENOMEM;
1139 	elf_phdata = kmalloc(j, GFP_KERNEL);
1140 	if (!elf_phdata)
1141 		goto out;
1142 
1143 	eppnt = elf_phdata;
1144 	error = -ENOEXEC;
1145 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1146 	if (retval != j)
1147 		goto out_free_ph;
1148 
1149 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1150 		if ((eppnt + i)->p_type == PT_LOAD)
1151 			j++;
1152 	if (j != 1)
1153 		goto out_free_ph;
1154 
1155 	while (eppnt->p_type != PT_LOAD)
1156 		eppnt++;
1157 
1158 	/* Now use mmap to map the library into memory. */
1159 	error = vm_mmap(file,
1160 			ELF_PAGESTART(eppnt->p_vaddr),
1161 			(eppnt->p_filesz +
1162 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1163 			PROT_READ | PROT_WRITE | PROT_EXEC,
1164 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1165 			(eppnt->p_offset -
1166 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1167 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1168 		goto out_free_ph;
1169 
1170 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1171 	if (padzero(elf_bss)) {
1172 		error = -EFAULT;
1173 		goto out_free_ph;
1174 	}
1175 
1176 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1177 			    ELF_MIN_ALIGN - 1);
1178 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1179 	if (bss > len)
1180 		vm_brk(len, bss - len);
1181 	error = 0;
1182 
1183 out_free_ph:
1184 	kfree(elf_phdata);
1185 out:
1186 	return error;
1187 }
1188 #endif /* #ifdef CONFIG_USELIB */
1189 
1190 #ifdef CONFIG_ELF_CORE
1191 /*
1192  * ELF core dumper
1193  *
1194  * Modelled on fs/exec.c:aout_core_dump()
1195  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1196  */
1197 
1198 /*
1199  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1200  * that are useful for post-mortem analysis are included in every core dump.
1201  * In that way we ensure that the core dump is fully interpretable later
1202  * without matching up the same kernel and hardware config to see what PC values
1203  * meant. These special mappings include - vDSO, vsyscall, and other
1204  * architecture specific mappings
1205  */
1206 static bool always_dump_vma(struct vm_area_struct *vma)
1207 {
1208 	/* Any vsyscall mappings? */
1209 	if (vma == get_gate_vma(vma->vm_mm))
1210 		return true;
1211 
1212 	/*
1213 	 * Assume that all vmas with a .name op should always be dumped.
1214 	 * If this changes, a new vm_ops field can easily be added.
1215 	 */
1216 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1217 		return true;
1218 
1219 	/*
1220 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1221 	 * such as vDSO sections.
1222 	 */
1223 	if (arch_vma_name(vma))
1224 		return true;
1225 
1226 	return false;
1227 }
1228 
1229 /*
1230  * Decide what to dump of a segment, part, all or none.
1231  */
1232 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1233 				   unsigned long mm_flags)
1234 {
1235 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1236 
1237 	/* always dump the vdso and vsyscall sections */
1238 	if (always_dump_vma(vma))
1239 		goto whole;
1240 
1241 	if (vma->vm_flags & VM_DONTDUMP)
1242 		return 0;
1243 
1244 	/* support for DAX */
1245 	if (vma_is_dax(vma)) {
1246 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1247 			goto whole;
1248 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1249 			goto whole;
1250 		return 0;
1251 	}
1252 
1253 	/* Hugetlb memory check */
1254 	if (vma->vm_flags & VM_HUGETLB) {
1255 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1256 			goto whole;
1257 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1258 			goto whole;
1259 		return 0;
1260 	}
1261 
1262 	/* Do not dump I/O mapped devices or special mappings */
1263 	if (vma->vm_flags & VM_IO)
1264 		return 0;
1265 
1266 	/* By default, dump shared memory if mapped from an anonymous file. */
1267 	if (vma->vm_flags & VM_SHARED) {
1268 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1269 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1270 			goto whole;
1271 		return 0;
1272 	}
1273 
1274 	/* Dump segments that have been written to.  */
1275 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1276 		goto whole;
1277 	if (vma->vm_file == NULL)
1278 		return 0;
1279 
1280 	if (FILTER(MAPPED_PRIVATE))
1281 		goto whole;
1282 
1283 	/*
1284 	 * If this looks like the beginning of a DSO or executable mapping,
1285 	 * check for an ELF header.  If we find one, dump the first page to
1286 	 * aid in determining what was mapped here.
1287 	 */
1288 	if (FILTER(ELF_HEADERS) &&
1289 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1290 		u32 __user *header = (u32 __user *) vma->vm_start;
1291 		u32 word;
1292 		mm_segment_t fs = get_fs();
1293 		/*
1294 		 * Doing it this way gets the constant folded by GCC.
1295 		 */
1296 		union {
1297 			u32 cmp;
1298 			char elfmag[SELFMAG];
1299 		} magic;
1300 		BUILD_BUG_ON(SELFMAG != sizeof word);
1301 		magic.elfmag[EI_MAG0] = ELFMAG0;
1302 		magic.elfmag[EI_MAG1] = ELFMAG1;
1303 		magic.elfmag[EI_MAG2] = ELFMAG2;
1304 		magic.elfmag[EI_MAG3] = ELFMAG3;
1305 		/*
1306 		 * Switch to the user "segment" for get_user(),
1307 		 * then put back what elf_core_dump() had in place.
1308 		 */
1309 		set_fs(USER_DS);
1310 		if (unlikely(get_user(word, header)))
1311 			word = 0;
1312 		set_fs(fs);
1313 		if (word == magic.cmp)
1314 			return PAGE_SIZE;
1315 	}
1316 
1317 #undef	FILTER
1318 
1319 	return 0;
1320 
1321 whole:
1322 	return vma->vm_end - vma->vm_start;
1323 }
1324 
1325 /* An ELF note in memory */
1326 struct memelfnote
1327 {
1328 	const char *name;
1329 	int type;
1330 	unsigned int datasz;
1331 	void *data;
1332 };
1333 
1334 static int notesize(struct memelfnote *en)
1335 {
1336 	int sz;
1337 
1338 	sz = sizeof(struct elf_note);
1339 	sz += roundup(strlen(en->name) + 1, 4);
1340 	sz += roundup(en->datasz, 4);
1341 
1342 	return sz;
1343 }
1344 
1345 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1346 {
1347 	struct elf_note en;
1348 	en.n_namesz = strlen(men->name) + 1;
1349 	en.n_descsz = men->datasz;
1350 	en.n_type = men->type;
1351 
1352 	return dump_emit(cprm, &en, sizeof(en)) &&
1353 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1354 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1355 }
1356 
1357 static void fill_elf_header(struct elfhdr *elf, int segs,
1358 			    u16 machine, u32 flags)
1359 {
1360 	memset(elf, 0, sizeof(*elf));
1361 
1362 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1363 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1364 	elf->e_ident[EI_DATA] = ELF_DATA;
1365 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1366 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1367 
1368 	elf->e_type = ET_CORE;
1369 	elf->e_machine = machine;
1370 	elf->e_version = EV_CURRENT;
1371 	elf->e_phoff = sizeof(struct elfhdr);
1372 	elf->e_flags = flags;
1373 	elf->e_ehsize = sizeof(struct elfhdr);
1374 	elf->e_phentsize = sizeof(struct elf_phdr);
1375 	elf->e_phnum = segs;
1376 
1377 	return;
1378 }
1379 
1380 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1381 {
1382 	phdr->p_type = PT_NOTE;
1383 	phdr->p_offset = offset;
1384 	phdr->p_vaddr = 0;
1385 	phdr->p_paddr = 0;
1386 	phdr->p_filesz = sz;
1387 	phdr->p_memsz = 0;
1388 	phdr->p_flags = 0;
1389 	phdr->p_align = 0;
1390 	return;
1391 }
1392 
1393 static void fill_note(struct memelfnote *note, const char *name, int type,
1394 		unsigned int sz, void *data)
1395 {
1396 	note->name = name;
1397 	note->type = type;
1398 	note->datasz = sz;
1399 	note->data = data;
1400 	return;
1401 }
1402 
1403 /*
1404  * fill up all the fields in prstatus from the given task struct, except
1405  * registers which need to be filled up separately.
1406  */
1407 static void fill_prstatus(struct elf_prstatus *prstatus,
1408 		struct task_struct *p, long signr)
1409 {
1410 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1411 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1412 	prstatus->pr_sighold = p->blocked.sig[0];
1413 	rcu_read_lock();
1414 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1415 	rcu_read_unlock();
1416 	prstatus->pr_pid = task_pid_vnr(p);
1417 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1418 	prstatus->pr_sid = task_session_vnr(p);
1419 	if (thread_group_leader(p)) {
1420 		struct task_cputime cputime;
1421 
1422 		/*
1423 		 * This is the record for the group leader.  It shows the
1424 		 * group-wide total, not its individual thread total.
1425 		 */
1426 		thread_group_cputime(p, &cputime);
1427 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1428 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1429 	} else {
1430 		cputime_t utime, stime;
1431 
1432 		task_cputime(p, &utime, &stime);
1433 		cputime_to_timeval(utime, &prstatus->pr_utime);
1434 		cputime_to_timeval(stime, &prstatus->pr_stime);
1435 	}
1436 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1437 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1438 }
1439 
1440 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1441 		       struct mm_struct *mm)
1442 {
1443 	const struct cred *cred;
1444 	unsigned int i, len;
1445 
1446 	/* first copy the parameters from user space */
1447 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1448 
1449 	len = mm->arg_end - mm->arg_start;
1450 	if (len >= ELF_PRARGSZ)
1451 		len = ELF_PRARGSZ-1;
1452 	if (copy_from_user(&psinfo->pr_psargs,
1453 		           (const char __user *)mm->arg_start, len))
1454 		return -EFAULT;
1455 	for(i = 0; i < len; i++)
1456 		if (psinfo->pr_psargs[i] == 0)
1457 			psinfo->pr_psargs[i] = ' ';
1458 	psinfo->pr_psargs[len] = 0;
1459 
1460 	rcu_read_lock();
1461 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1462 	rcu_read_unlock();
1463 	psinfo->pr_pid = task_pid_vnr(p);
1464 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1465 	psinfo->pr_sid = task_session_vnr(p);
1466 
1467 	i = p->state ? ffz(~p->state) + 1 : 0;
1468 	psinfo->pr_state = i;
1469 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1470 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1471 	psinfo->pr_nice = task_nice(p);
1472 	psinfo->pr_flag = p->flags;
1473 	rcu_read_lock();
1474 	cred = __task_cred(p);
1475 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1476 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1477 	rcu_read_unlock();
1478 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1479 
1480 	return 0;
1481 }
1482 
1483 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1484 {
1485 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1486 	int i = 0;
1487 	do
1488 		i += 2;
1489 	while (auxv[i - 2] != AT_NULL);
1490 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1491 }
1492 
1493 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1494 		const siginfo_t *siginfo)
1495 {
1496 	mm_segment_t old_fs = get_fs();
1497 	set_fs(KERNEL_DS);
1498 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1499 	set_fs(old_fs);
1500 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1501 }
1502 
1503 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1504 /*
1505  * Format of NT_FILE note:
1506  *
1507  * long count     -- how many files are mapped
1508  * long page_size -- units for file_ofs
1509  * array of [COUNT] elements of
1510  *   long start
1511  *   long end
1512  *   long file_ofs
1513  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1514  */
1515 static int fill_files_note(struct memelfnote *note)
1516 {
1517 	struct vm_area_struct *vma;
1518 	unsigned count, size, names_ofs, remaining, n;
1519 	user_long_t *data;
1520 	user_long_t *start_end_ofs;
1521 	char *name_base, *name_curpos;
1522 
1523 	/* *Estimated* file count and total data size needed */
1524 	count = current->mm->map_count;
1525 	size = count * 64;
1526 
1527 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1528  alloc:
1529 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1530 		return -EINVAL;
1531 	size = round_up(size, PAGE_SIZE);
1532 	data = vmalloc(size);
1533 	if (!data)
1534 		return -ENOMEM;
1535 
1536 	start_end_ofs = data + 2;
1537 	name_base = name_curpos = ((char *)data) + names_ofs;
1538 	remaining = size - names_ofs;
1539 	count = 0;
1540 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1541 		struct file *file;
1542 		const char *filename;
1543 
1544 		file = vma->vm_file;
1545 		if (!file)
1546 			continue;
1547 		filename = file_path(file, name_curpos, remaining);
1548 		if (IS_ERR(filename)) {
1549 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1550 				vfree(data);
1551 				size = size * 5 / 4;
1552 				goto alloc;
1553 			}
1554 			continue;
1555 		}
1556 
1557 		/* file_path() fills at the end, move name down */
1558 		/* n = strlen(filename) + 1: */
1559 		n = (name_curpos + remaining) - filename;
1560 		remaining = filename - name_curpos;
1561 		memmove(name_curpos, filename, n);
1562 		name_curpos += n;
1563 
1564 		*start_end_ofs++ = vma->vm_start;
1565 		*start_end_ofs++ = vma->vm_end;
1566 		*start_end_ofs++ = vma->vm_pgoff;
1567 		count++;
1568 	}
1569 
1570 	/* Now we know exact count of files, can store it */
1571 	data[0] = count;
1572 	data[1] = PAGE_SIZE;
1573 	/*
1574 	 * Count usually is less than current->mm->map_count,
1575 	 * we need to move filenames down.
1576 	 */
1577 	n = current->mm->map_count - count;
1578 	if (n != 0) {
1579 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1580 		memmove(name_base - shift_bytes, name_base,
1581 			name_curpos - name_base);
1582 		name_curpos -= shift_bytes;
1583 	}
1584 
1585 	size = name_curpos - (char *)data;
1586 	fill_note(note, "CORE", NT_FILE, size, data);
1587 	return 0;
1588 }
1589 
1590 #ifdef CORE_DUMP_USE_REGSET
1591 #include <linux/regset.h>
1592 
1593 struct elf_thread_core_info {
1594 	struct elf_thread_core_info *next;
1595 	struct task_struct *task;
1596 	struct elf_prstatus prstatus;
1597 	struct memelfnote notes[0];
1598 };
1599 
1600 struct elf_note_info {
1601 	struct elf_thread_core_info *thread;
1602 	struct memelfnote psinfo;
1603 	struct memelfnote signote;
1604 	struct memelfnote auxv;
1605 	struct memelfnote files;
1606 	user_siginfo_t csigdata;
1607 	size_t size;
1608 	int thread_notes;
1609 };
1610 
1611 /*
1612  * When a regset has a writeback hook, we call it on each thread before
1613  * dumping user memory.  On register window machines, this makes sure the
1614  * user memory backing the register data is up to date before we read it.
1615  */
1616 static void do_thread_regset_writeback(struct task_struct *task,
1617 				       const struct user_regset *regset)
1618 {
1619 	if (regset->writeback)
1620 		regset->writeback(task, regset, 1);
1621 }
1622 
1623 #ifndef PR_REG_SIZE
1624 #define PR_REG_SIZE(S) sizeof(S)
1625 #endif
1626 
1627 #ifndef PRSTATUS_SIZE
1628 #define PRSTATUS_SIZE(S) sizeof(S)
1629 #endif
1630 
1631 #ifndef PR_REG_PTR
1632 #define PR_REG_PTR(S) (&((S)->pr_reg))
1633 #endif
1634 
1635 #ifndef SET_PR_FPVALID
1636 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1637 #endif
1638 
1639 static int fill_thread_core_info(struct elf_thread_core_info *t,
1640 				 const struct user_regset_view *view,
1641 				 long signr, size_t *total)
1642 {
1643 	unsigned int i;
1644 
1645 	/*
1646 	 * NT_PRSTATUS is the one special case, because the regset data
1647 	 * goes into the pr_reg field inside the note contents, rather
1648 	 * than being the whole note contents.  We fill the reset in here.
1649 	 * We assume that regset 0 is NT_PRSTATUS.
1650 	 */
1651 	fill_prstatus(&t->prstatus, t->task, signr);
1652 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1653 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1654 				    PR_REG_PTR(&t->prstatus), NULL);
1655 
1656 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1657 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1658 	*total += notesize(&t->notes[0]);
1659 
1660 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1661 
1662 	/*
1663 	 * Each other regset might generate a note too.  For each regset
1664 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1665 	 * all zero and we'll know to skip writing it later.
1666 	 */
1667 	for (i = 1; i < view->n; ++i) {
1668 		const struct user_regset *regset = &view->regsets[i];
1669 		do_thread_regset_writeback(t->task, regset);
1670 		if (regset->core_note_type && regset->get &&
1671 		    (!regset->active || regset->active(t->task, regset))) {
1672 			int ret;
1673 			size_t size = regset->n * regset->size;
1674 			void *data = kmalloc(size, GFP_KERNEL);
1675 			if (unlikely(!data))
1676 				return 0;
1677 			ret = regset->get(t->task, regset,
1678 					  0, size, data, NULL);
1679 			if (unlikely(ret))
1680 				kfree(data);
1681 			else {
1682 				if (regset->core_note_type != NT_PRFPREG)
1683 					fill_note(&t->notes[i], "LINUX",
1684 						  regset->core_note_type,
1685 						  size, data);
1686 				else {
1687 					SET_PR_FPVALID(&t->prstatus, 1);
1688 					fill_note(&t->notes[i], "CORE",
1689 						  NT_PRFPREG, size, data);
1690 				}
1691 				*total += notesize(&t->notes[i]);
1692 			}
1693 		}
1694 	}
1695 
1696 	return 1;
1697 }
1698 
1699 static int fill_note_info(struct elfhdr *elf, int phdrs,
1700 			  struct elf_note_info *info,
1701 			  const siginfo_t *siginfo, struct pt_regs *regs)
1702 {
1703 	struct task_struct *dump_task = current;
1704 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1705 	struct elf_thread_core_info *t;
1706 	struct elf_prpsinfo *psinfo;
1707 	struct core_thread *ct;
1708 	unsigned int i;
1709 
1710 	info->size = 0;
1711 	info->thread = NULL;
1712 
1713 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1714 	if (psinfo == NULL) {
1715 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1716 		return 0;
1717 	}
1718 
1719 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1720 
1721 	/*
1722 	 * Figure out how many notes we're going to need for each thread.
1723 	 */
1724 	info->thread_notes = 0;
1725 	for (i = 0; i < view->n; ++i)
1726 		if (view->regsets[i].core_note_type != 0)
1727 			++info->thread_notes;
1728 
1729 	/*
1730 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1731 	 * since it is our one special case.
1732 	 */
1733 	if (unlikely(info->thread_notes == 0) ||
1734 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1735 		WARN_ON(1);
1736 		return 0;
1737 	}
1738 
1739 	/*
1740 	 * Initialize the ELF file header.
1741 	 */
1742 	fill_elf_header(elf, phdrs,
1743 			view->e_machine, view->e_flags);
1744 
1745 	/*
1746 	 * Allocate a structure for each thread.
1747 	 */
1748 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1749 		t = kzalloc(offsetof(struct elf_thread_core_info,
1750 				     notes[info->thread_notes]),
1751 			    GFP_KERNEL);
1752 		if (unlikely(!t))
1753 			return 0;
1754 
1755 		t->task = ct->task;
1756 		if (ct->task == dump_task || !info->thread) {
1757 			t->next = info->thread;
1758 			info->thread = t;
1759 		} else {
1760 			/*
1761 			 * Make sure to keep the original task at
1762 			 * the head of the list.
1763 			 */
1764 			t->next = info->thread->next;
1765 			info->thread->next = t;
1766 		}
1767 	}
1768 
1769 	/*
1770 	 * Now fill in each thread's information.
1771 	 */
1772 	for (t = info->thread; t != NULL; t = t->next)
1773 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1774 			return 0;
1775 
1776 	/*
1777 	 * Fill in the two process-wide notes.
1778 	 */
1779 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1780 	info->size += notesize(&info->psinfo);
1781 
1782 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1783 	info->size += notesize(&info->signote);
1784 
1785 	fill_auxv_note(&info->auxv, current->mm);
1786 	info->size += notesize(&info->auxv);
1787 
1788 	if (fill_files_note(&info->files) == 0)
1789 		info->size += notesize(&info->files);
1790 
1791 	return 1;
1792 }
1793 
1794 static size_t get_note_info_size(struct elf_note_info *info)
1795 {
1796 	return info->size;
1797 }
1798 
1799 /*
1800  * Write all the notes for each thread.  When writing the first thread, the
1801  * process-wide notes are interleaved after the first thread-specific note.
1802  */
1803 static int write_note_info(struct elf_note_info *info,
1804 			   struct coredump_params *cprm)
1805 {
1806 	bool first = true;
1807 	struct elf_thread_core_info *t = info->thread;
1808 
1809 	do {
1810 		int i;
1811 
1812 		if (!writenote(&t->notes[0], cprm))
1813 			return 0;
1814 
1815 		if (first && !writenote(&info->psinfo, cprm))
1816 			return 0;
1817 		if (first && !writenote(&info->signote, cprm))
1818 			return 0;
1819 		if (first && !writenote(&info->auxv, cprm))
1820 			return 0;
1821 		if (first && info->files.data &&
1822 				!writenote(&info->files, cprm))
1823 			return 0;
1824 
1825 		for (i = 1; i < info->thread_notes; ++i)
1826 			if (t->notes[i].data &&
1827 			    !writenote(&t->notes[i], cprm))
1828 				return 0;
1829 
1830 		first = false;
1831 		t = t->next;
1832 	} while (t);
1833 
1834 	return 1;
1835 }
1836 
1837 static void free_note_info(struct elf_note_info *info)
1838 {
1839 	struct elf_thread_core_info *threads = info->thread;
1840 	while (threads) {
1841 		unsigned int i;
1842 		struct elf_thread_core_info *t = threads;
1843 		threads = t->next;
1844 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1845 		for (i = 1; i < info->thread_notes; ++i)
1846 			kfree(t->notes[i].data);
1847 		kfree(t);
1848 	}
1849 	kfree(info->psinfo.data);
1850 	vfree(info->files.data);
1851 }
1852 
1853 #else
1854 
1855 /* Here is the structure in which status of each thread is captured. */
1856 struct elf_thread_status
1857 {
1858 	struct list_head list;
1859 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1860 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1861 	struct task_struct *thread;
1862 #ifdef ELF_CORE_COPY_XFPREGS
1863 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1864 #endif
1865 	struct memelfnote notes[3];
1866 	int num_notes;
1867 };
1868 
1869 /*
1870  * In order to add the specific thread information for the elf file format,
1871  * we need to keep a linked list of every threads pr_status and then create
1872  * a single section for them in the final core file.
1873  */
1874 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1875 {
1876 	int sz = 0;
1877 	struct task_struct *p = t->thread;
1878 	t->num_notes = 0;
1879 
1880 	fill_prstatus(&t->prstatus, p, signr);
1881 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1882 
1883 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1884 		  &(t->prstatus));
1885 	t->num_notes++;
1886 	sz += notesize(&t->notes[0]);
1887 
1888 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1889 								&t->fpu))) {
1890 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1891 			  &(t->fpu));
1892 		t->num_notes++;
1893 		sz += notesize(&t->notes[1]);
1894 	}
1895 
1896 #ifdef ELF_CORE_COPY_XFPREGS
1897 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1898 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1899 			  sizeof(t->xfpu), &t->xfpu);
1900 		t->num_notes++;
1901 		sz += notesize(&t->notes[2]);
1902 	}
1903 #endif
1904 	return sz;
1905 }
1906 
1907 struct elf_note_info {
1908 	struct memelfnote *notes;
1909 	struct memelfnote *notes_files;
1910 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1911 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1912 	struct list_head thread_list;
1913 	elf_fpregset_t *fpu;
1914 #ifdef ELF_CORE_COPY_XFPREGS
1915 	elf_fpxregset_t *xfpu;
1916 #endif
1917 	user_siginfo_t csigdata;
1918 	int thread_status_size;
1919 	int numnote;
1920 };
1921 
1922 static int elf_note_info_init(struct elf_note_info *info)
1923 {
1924 	memset(info, 0, sizeof(*info));
1925 	INIT_LIST_HEAD(&info->thread_list);
1926 
1927 	/* Allocate space for ELF notes */
1928 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1929 	if (!info->notes)
1930 		return 0;
1931 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1932 	if (!info->psinfo)
1933 		return 0;
1934 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1935 	if (!info->prstatus)
1936 		return 0;
1937 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1938 	if (!info->fpu)
1939 		return 0;
1940 #ifdef ELF_CORE_COPY_XFPREGS
1941 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1942 	if (!info->xfpu)
1943 		return 0;
1944 #endif
1945 	return 1;
1946 }
1947 
1948 static int fill_note_info(struct elfhdr *elf, int phdrs,
1949 			  struct elf_note_info *info,
1950 			  const siginfo_t *siginfo, struct pt_regs *regs)
1951 {
1952 	struct list_head *t;
1953 	struct core_thread *ct;
1954 	struct elf_thread_status *ets;
1955 
1956 	if (!elf_note_info_init(info))
1957 		return 0;
1958 
1959 	for (ct = current->mm->core_state->dumper.next;
1960 					ct; ct = ct->next) {
1961 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1962 		if (!ets)
1963 			return 0;
1964 
1965 		ets->thread = ct->task;
1966 		list_add(&ets->list, &info->thread_list);
1967 	}
1968 
1969 	list_for_each(t, &info->thread_list) {
1970 		int sz;
1971 
1972 		ets = list_entry(t, struct elf_thread_status, list);
1973 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1974 		info->thread_status_size += sz;
1975 	}
1976 	/* now collect the dump for the current */
1977 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1978 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1979 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1980 
1981 	/* Set up header */
1982 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1983 
1984 	/*
1985 	 * Set up the notes in similar form to SVR4 core dumps made
1986 	 * with info from their /proc.
1987 	 */
1988 
1989 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1990 		  sizeof(*info->prstatus), info->prstatus);
1991 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1992 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1993 		  sizeof(*info->psinfo), info->psinfo);
1994 
1995 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1996 	fill_auxv_note(info->notes + 3, current->mm);
1997 	info->numnote = 4;
1998 
1999 	if (fill_files_note(info->notes + info->numnote) == 0) {
2000 		info->notes_files = info->notes + info->numnote;
2001 		info->numnote++;
2002 	}
2003 
2004 	/* Try to dump the FPU. */
2005 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2006 							       info->fpu);
2007 	if (info->prstatus->pr_fpvalid)
2008 		fill_note(info->notes + info->numnote++,
2009 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2010 #ifdef ELF_CORE_COPY_XFPREGS
2011 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2012 		fill_note(info->notes + info->numnote++,
2013 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2014 			  sizeof(*info->xfpu), info->xfpu);
2015 #endif
2016 
2017 	return 1;
2018 }
2019 
2020 static size_t get_note_info_size(struct elf_note_info *info)
2021 {
2022 	int sz = 0;
2023 	int i;
2024 
2025 	for (i = 0; i < info->numnote; i++)
2026 		sz += notesize(info->notes + i);
2027 
2028 	sz += info->thread_status_size;
2029 
2030 	return sz;
2031 }
2032 
2033 static int write_note_info(struct elf_note_info *info,
2034 			   struct coredump_params *cprm)
2035 {
2036 	int i;
2037 	struct list_head *t;
2038 
2039 	for (i = 0; i < info->numnote; i++)
2040 		if (!writenote(info->notes + i, cprm))
2041 			return 0;
2042 
2043 	/* write out the thread status notes section */
2044 	list_for_each(t, &info->thread_list) {
2045 		struct elf_thread_status *tmp =
2046 				list_entry(t, struct elf_thread_status, list);
2047 
2048 		for (i = 0; i < tmp->num_notes; i++)
2049 			if (!writenote(&tmp->notes[i], cprm))
2050 				return 0;
2051 	}
2052 
2053 	return 1;
2054 }
2055 
2056 static void free_note_info(struct elf_note_info *info)
2057 {
2058 	while (!list_empty(&info->thread_list)) {
2059 		struct list_head *tmp = info->thread_list.next;
2060 		list_del(tmp);
2061 		kfree(list_entry(tmp, struct elf_thread_status, list));
2062 	}
2063 
2064 	/* Free data possibly allocated by fill_files_note(): */
2065 	if (info->notes_files)
2066 		vfree(info->notes_files->data);
2067 
2068 	kfree(info->prstatus);
2069 	kfree(info->psinfo);
2070 	kfree(info->notes);
2071 	kfree(info->fpu);
2072 #ifdef ELF_CORE_COPY_XFPREGS
2073 	kfree(info->xfpu);
2074 #endif
2075 }
2076 
2077 #endif
2078 
2079 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2080 					struct vm_area_struct *gate_vma)
2081 {
2082 	struct vm_area_struct *ret = tsk->mm->mmap;
2083 
2084 	if (ret)
2085 		return ret;
2086 	return gate_vma;
2087 }
2088 /*
2089  * Helper function for iterating across a vma list.  It ensures that the caller
2090  * will visit `gate_vma' prior to terminating the search.
2091  */
2092 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2093 					struct vm_area_struct *gate_vma)
2094 {
2095 	struct vm_area_struct *ret;
2096 
2097 	ret = this_vma->vm_next;
2098 	if (ret)
2099 		return ret;
2100 	if (this_vma == gate_vma)
2101 		return NULL;
2102 	return gate_vma;
2103 }
2104 
2105 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2106 			     elf_addr_t e_shoff, int segs)
2107 {
2108 	elf->e_shoff = e_shoff;
2109 	elf->e_shentsize = sizeof(*shdr4extnum);
2110 	elf->e_shnum = 1;
2111 	elf->e_shstrndx = SHN_UNDEF;
2112 
2113 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2114 
2115 	shdr4extnum->sh_type = SHT_NULL;
2116 	shdr4extnum->sh_size = elf->e_shnum;
2117 	shdr4extnum->sh_link = elf->e_shstrndx;
2118 	shdr4extnum->sh_info = segs;
2119 }
2120 
2121 /*
2122  * Actual dumper
2123  *
2124  * This is a two-pass process; first we find the offsets of the bits,
2125  * and then they are actually written out.  If we run out of core limit
2126  * we just truncate.
2127  */
2128 static int elf_core_dump(struct coredump_params *cprm)
2129 {
2130 	int has_dumped = 0;
2131 	mm_segment_t fs;
2132 	int segs, i;
2133 	size_t vma_data_size = 0;
2134 	struct vm_area_struct *vma, *gate_vma;
2135 	struct elfhdr *elf = NULL;
2136 	loff_t offset = 0, dataoff;
2137 	struct elf_note_info info = { };
2138 	struct elf_phdr *phdr4note = NULL;
2139 	struct elf_shdr *shdr4extnum = NULL;
2140 	Elf_Half e_phnum;
2141 	elf_addr_t e_shoff;
2142 	elf_addr_t *vma_filesz = NULL;
2143 
2144 	/*
2145 	 * We no longer stop all VM operations.
2146 	 *
2147 	 * This is because those proceses that could possibly change map_count
2148 	 * or the mmap / vma pages are now blocked in do_exit on current
2149 	 * finishing this core dump.
2150 	 *
2151 	 * Only ptrace can touch these memory addresses, but it doesn't change
2152 	 * the map_count or the pages allocated. So no possibility of crashing
2153 	 * exists while dumping the mm->vm_next areas to the core file.
2154 	 */
2155 
2156 	/* alloc memory for large data structures: too large to be on stack */
2157 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2158 	if (!elf)
2159 		goto out;
2160 	/*
2161 	 * The number of segs are recored into ELF header as 16bit value.
2162 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2163 	 */
2164 	segs = current->mm->map_count;
2165 	segs += elf_core_extra_phdrs();
2166 
2167 	gate_vma = get_gate_vma(current->mm);
2168 	if (gate_vma != NULL)
2169 		segs++;
2170 
2171 	/* for notes section */
2172 	segs++;
2173 
2174 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2175 	 * this, kernel supports extended numbering. Have a look at
2176 	 * include/linux/elf.h for further information. */
2177 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2178 
2179 	/*
2180 	 * Collect all the non-memory information about the process for the
2181 	 * notes.  This also sets up the file header.
2182 	 */
2183 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2184 		goto cleanup;
2185 
2186 	has_dumped = 1;
2187 
2188 	fs = get_fs();
2189 	set_fs(KERNEL_DS);
2190 
2191 	offset += sizeof(*elf);				/* Elf header */
2192 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2193 
2194 	/* Write notes phdr entry */
2195 	{
2196 		size_t sz = get_note_info_size(&info);
2197 
2198 		sz += elf_coredump_extra_notes_size();
2199 
2200 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2201 		if (!phdr4note)
2202 			goto end_coredump;
2203 
2204 		fill_elf_note_phdr(phdr4note, sz, offset);
2205 		offset += sz;
2206 	}
2207 
2208 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2209 
2210 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2211 	if (!vma_filesz)
2212 		goto end_coredump;
2213 
2214 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2215 			vma = next_vma(vma, gate_vma)) {
2216 		unsigned long dump_size;
2217 
2218 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2219 		vma_filesz[i++] = dump_size;
2220 		vma_data_size += dump_size;
2221 	}
2222 
2223 	offset += vma_data_size;
2224 	offset += elf_core_extra_data_size();
2225 	e_shoff = offset;
2226 
2227 	if (e_phnum == PN_XNUM) {
2228 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2229 		if (!shdr4extnum)
2230 			goto end_coredump;
2231 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2232 	}
2233 
2234 	offset = dataoff;
2235 
2236 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2237 		goto end_coredump;
2238 
2239 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2240 		goto end_coredump;
2241 
2242 	/* Write program headers for segments dump */
2243 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2244 			vma = next_vma(vma, gate_vma)) {
2245 		struct elf_phdr phdr;
2246 
2247 		phdr.p_type = PT_LOAD;
2248 		phdr.p_offset = offset;
2249 		phdr.p_vaddr = vma->vm_start;
2250 		phdr.p_paddr = 0;
2251 		phdr.p_filesz = vma_filesz[i++];
2252 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2253 		offset += phdr.p_filesz;
2254 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2255 		if (vma->vm_flags & VM_WRITE)
2256 			phdr.p_flags |= PF_W;
2257 		if (vma->vm_flags & VM_EXEC)
2258 			phdr.p_flags |= PF_X;
2259 		phdr.p_align = ELF_EXEC_PAGESIZE;
2260 
2261 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2262 			goto end_coredump;
2263 	}
2264 
2265 	if (!elf_core_write_extra_phdrs(cprm, offset))
2266 		goto end_coredump;
2267 
2268  	/* write out the notes section */
2269 	if (!write_note_info(&info, cprm))
2270 		goto end_coredump;
2271 
2272 	if (elf_coredump_extra_notes_write(cprm))
2273 		goto end_coredump;
2274 
2275 	/* Align to page */
2276 	if (!dump_skip(cprm, dataoff - cprm->written))
2277 		goto end_coredump;
2278 
2279 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2280 			vma = next_vma(vma, gate_vma)) {
2281 		unsigned long addr;
2282 		unsigned long end;
2283 
2284 		end = vma->vm_start + vma_filesz[i++];
2285 
2286 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2287 			struct page *page;
2288 			int stop;
2289 
2290 			page = get_dump_page(addr);
2291 			if (page) {
2292 				void *kaddr = kmap(page);
2293 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2294 				kunmap(page);
2295 				page_cache_release(page);
2296 			} else
2297 				stop = !dump_skip(cprm, PAGE_SIZE);
2298 			if (stop)
2299 				goto end_coredump;
2300 		}
2301 	}
2302 
2303 	if (!elf_core_write_extra_data(cprm))
2304 		goto end_coredump;
2305 
2306 	if (e_phnum == PN_XNUM) {
2307 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2308 			goto end_coredump;
2309 	}
2310 
2311 end_coredump:
2312 	set_fs(fs);
2313 
2314 cleanup:
2315 	free_note_info(&info);
2316 	kfree(shdr4extnum);
2317 	kfree(vma_filesz);
2318 	kfree(phdr4note);
2319 	kfree(elf);
2320 out:
2321 	return has_dumped;
2322 }
2323 
2324 #endif		/* CONFIG_ELF_CORE */
2325 
2326 static int __init init_elf_binfmt(void)
2327 {
2328 	register_binfmt(&elf_format);
2329 	return 0;
2330 }
2331 
2332 static void __exit exit_elf_binfmt(void)
2333 {
2334 	/* Remove the COFF and ELF loaders. */
2335 	unregister_binfmt(&elf_format);
2336 }
2337 
2338 core_initcall(init_elf_binfmt);
2339 module_exit(exit_elf_binfmt);
2340 MODULE_LICENSE("GPL");
2341