xref: /openbmc/linux/fs/binfmt_elf.c (revision 861e10be)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40 
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47 
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 				int, int, unsigned long);
52 
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump	NULL
61 #endif
62 
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN	PAGE_SIZE
67 #endif
68 
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS	0
71 #endif
72 
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76 
77 static struct linux_binfmt elf_format = {
78 	.module		= THIS_MODULE,
79 	.load_binary	= load_elf_binary,
80 	.load_shlib	= load_elf_library,
81 	.core_dump	= elf_core_dump,
82 	.min_coredump	= ELF_EXEC_PAGESIZE,
83 };
84 
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86 
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89 	start = ELF_PAGEALIGN(start);
90 	end = ELF_PAGEALIGN(end);
91 	if (end > start) {
92 		unsigned long addr;
93 		addr = vm_brk(start, end - start);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	elf_addr_t __user *u_rand_bytes;
156 	const char *k_platform = ELF_PLATFORM;
157 	const char *k_base_platform = ELF_BASE_PLATFORM;
158 	unsigned char k_rand_bytes[16];
159 	int items;
160 	elf_addr_t *elf_info;
161 	int ei_index = 0;
162 	const struct cred *cred = current_cred();
163 	struct vm_area_struct *vma;
164 
165 	/*
166 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 	 * evictions by the processes running on the same package. One
168 	 * thing we can do is to shuffle the initial stack for them.
169 	 */
170 
171 	p = arch_align_stack(p);
172 
173 	/*
174 	 * If this architecture has a platform capability string, copy it
175 	 * to userspace.  In some cases (Sparc), this info is impossible
176 	 * for userspace to get any other way, in others (i386) it is
177 	 * merely difficult.
178 	 */
179 	u_platform = NULL;
180 	if (k_platform) {
181 		size_t len = strlen(k_platform) + 1;
182 
183 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184 		if (__copy_to_user(u_platform, k_platform, len))
185 			return -EFAULT;
186 	}
187 
188 	/*
189 	 * If this architecture has a "base" platform capability
190 	 * string, copy it to userspace.
191 	 */
192 	u_base_platform = NULL;
193 	if (k_base_platform) {
194 		size_t len = strlen(k_base_platform) + 1;
195 
196 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 		if (__copy_to_user(u_base_platform, k_base_platform, len))
198 			return -EFAULT;
199 	}
200 
201 	/*
202 	 * Generate 16 random bytes for userspace PRNG seeding.
203 	 */
204 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 	u_rand_bytes = (elf_addr_t __user *)
206 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
207 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 		return -EFAULT;
209 
210 	/* Create the ELF interpreter info */
211 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
212 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214 	do { \
215 		elf_info[ei_index++] = id; \
216 		elf_info[ei_index++] = val; \
217 	} while (0)
218 
219 #ifdef ARCH_DLINFO
220 	/*
221 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
222 	 * AUXV.
223 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224 	 * ARCH_DLINFO changes
225 	 */
226 	ARCH_DLINFO;
227 #endif
228 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
235 	NEW_AUX_ENT(AT_FLAGS, 0);
236 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244 	if (k_platform) {
245 		NEW_AUX_ENT(AT_PLATFORM,
246 			    (elf_addr_t)(unsigned long)u_platform);
247 	}
248 	if (k_base_platform) {
249 		NEW_AUX_ENT(AT_BASE_PLATFORM,
250 			    (elf_addr_t)(unsigned long)u_base_platform);
251 	}
252 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254 	}
255 #undef NEW_AUX_ENT
256 	/* AT_NULL is zero; clear the rest too */
257 	memset(&elf_info[ei_index], 0,
258 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259 
260 	/* And advance past the AT_NULL entry.  */
261 	ei_index += 2;
262 
263 	sp = STACK_ADD(p, ei_index);
264 
265 	items = (argc + 1) + (envc + 1) + 1;
266 	bprm->p = STACK_ROUND(sp, items);
267 
268 	/* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273 	sp = (elf_addr_t __user *)bprm->p;
274 #endif
275 
276 
277 	/*
278 	 * Grow the stack manually; some architectures have a limit on how
279 	 * far ahead a user-space access may be in order to grow the stack.
280 	 */
281 	vma = find_extend_vma(current->mm, bprm->p);
282 	if (!vma)
283 		return -EFAULT;
284 
285 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
286 	if (__put_user(argc, sp++))
287 		return -EFAULT;
288 	argv = sp;
289 	envp = argv + argc + 1;
290 
291 	/* Populate argv and envp */
292 	p = current->mm->arg_end = current->mm->arg_start;
293 	while (argc-- > 0) {
294 		size_t len;
295 		if (__put_user((elf_addr_t)p, argv++))
296 			return -EFAULT;
297 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 		if (!len || len > MAX_ARG_STRLEN)
299 			return -EINVAL;
300 		p += len;
301 	}
302 	if (__put_user(0, argv))
303 		return -EFAULT;
304 	current->mm->arg_end = current->mm->env_start = p;
305 	while (envc-- > 0) {
306 		size_t len;
307 		if (__put_user((elf_addr_t)p, envp++))
308 			return -EFAULT;
309 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310 		if (!len || len > MAX_ARG_STRLEN)
311 			return -EINVAL;
312 		p += len;
313 	}
314 	if (__put_user(0, envp))
315 		return -EFAULT;
316 	current->mm->env_end = p;
317 
318 	/* Put the elf_info on the stack in the right place.  */
319 	sp = (elf_addr_t __user *)envp + 1;
320 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321 		return -EFAULT;
322 	return 0;
323 }
324 
325 static unsigned long elf_map(struct file *filep, unsigned long addr,
326 		struct elf_phdr *eppnt, int prot, int type,
327 		unsigned long total_size)
328 {
329 	unsigned long map_addr;
330 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
331 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
332 	addr = ELF_PAGESTART(addr);
333 	size = ELF_PAGEALIGN(size);
334 
335 	/* mmap() will return -EINVAL if given a zero size, but a
336 	 * segment with zero filesize is perfectly valid */
337 	if (!size)
338 		return addr;
339 
340 	/*
341 	* total_size is the size of the ELF (interpreter) image.
342 	* The _first_ mmap needs to know the full size, otherwise
343 	* randomization might put this image into an overlapping
344 	* position with the ELF binary image. (since size < total_size)
345 	* So we first map the 'big' image - and unmap the remainder at
346 	* the end. (which unmap is needed for ELF images with holes.)
347 	*/
348 	if (total_size) {
349 		total_size = ELF_PAGEALIGN(total_size);
350 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
351 		if (!BAD_ADDR(map_addr))
352 			vm_munmap(map_addr+size, total_size-size);
353 	} else
354 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
355 
356 	return(map_addr);
357 }
358 
359 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
360 {
361 	int i, first_idx = -1, last_idx = -1;
362 
363 	for (i = 0; i < nr; i++) {
364 		if (cmds[i].p_type == PT_LOAD) {
365 			last_idx = i;
366 			if (first_idx == -1)
367 				first_idx = i;
368 		}
369 	}
370 	if (first_idx == -1)
371 		return 0;
372 
373 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
374 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
375 }
376 
377 
378 /* This is much more generalized than the library routine read function,
379    so we keep this separate.  Technically the library read function
380    is only provided so that we can read a.out libraries that have
381    an ELF header */
382 
383 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
384 		struct file *interpreter, unsigned long *interp_map_addr,
385 		unsigned long no_base)
386 {
387 	struct elf_phdr *elf_phdata;
388 	struct elf_phdr *eppnt;
389 	unsigned long load_addr = 0;
390 	int load_addr_set = 0;
391 	unsigned long last_bss = 0, elf_bss = 0;
392 	unsigned long error = ~0UL;
393 	unsigned long total_size;
394 	int retval, i, size;
395 
396 	/* First of all, some simple consistency checks */
397 	if (interp_elf_ex->e_type != ET_EXEC &&
398 	    interp_elf_ex->e_type != ET_DYN)
399 		goto out;
400 	if (!elf_check_arch(interp_elf_ex))
401 		goto out;
402 	if (!interpreter->f_op || !interpreter->f_op->mmap)
403 		goto out;
404 
405 	/*
406 	 * If the size of this structure has changed, then punt, since
407 	 * we will be doing the wrong thing.
408 	 */
409 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
410 		goto out;
411 	if (interp_elf_ex->e_phnum < 1 ||
412 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
413 		goto out;
414 
415 	/* Now read in all of the header information */
416 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
417 	if (size > ELF_MIN_ALIGN)
418 		goto out;
419 	elf_phdata = kmalloc(size, GFP_KERNEL);
420 	if (!elf_phdata)
421 		goto out;
422 
423 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
424 			     (char *)elf_phdata, size);
425 	error = -EIO;
426 	if (retval != size) {
427 		if (retval < 0)
428 			error = retval;
429 		goto out_close;
430 	}
431 
432 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
433 	if (!total_size) {
434 		error = -EINVAL;
435 		goto out_close;
436 	}
437 
438 	eppnt = elf_phdata;
439 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
440 		if (eppnt->p_type == PT_LOAD) {
441 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
442 			int elf_prot = 0;
443 			unsigned long vaddr = 0;
444 			unsigned long k, map_addr;
445 
446 			if (eppnt->p_flags & PF_R)
447 		    		elf_prot = PROT_READ;
448 			if (eppnt->p_flags & PF_W)
449 				elf_prot |= PROT_WRITE;
450 			if (eppnt->p_flags & PF_X)
451 				elf_prot |= PROT_EXEC;
452 			vaddr = eppnt->p_vaddr;
453 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
454 				elf_type |= MAP_FIXED;
455 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
456 				load_addr = -vaddr;
457 
458 			map_addr = elf_map(interpreter, load_addr + vaddr,
459 					eppnt, elf_prot, elf_type, total_size);
460 			total_size = 0;
461 			if (!*interp_map_addr)
462 				*interp_map_addr = map_addr;
463 			error = map_addr;
464 			if (BAD_ADDR(map_addr))
465 				goto out_close;
466 
467 			if (!load_addr_set &&
468 			    interp_elf_ex->e_type == ET_DYN) {
469 				load_addr = map_addr - ELF_PAGESTART(vaddr);
470 				load_addr_set = 1;
471 			}
472 
473 			/*
474 			 * Check to see if the section's size will overflow the
475 			 * allowed task size. Note that p_filesz must always be
476 			 * <= p_memsize so it's only necessary to check p_memsz.
477 			 */
478 			k = load_addr + eppnt->p_vaddr;
479 			if (BAD_ADDR(k) ||
480 			    eppnt->p_filesz > eppnt->p_memsz ||
481 			    eppnt->p_memsz > TASK_SIZE ||
482 			    TASK_SIZE - eppnt->p_memsz < k) {
483 				error = -ENOMEM;
484 				goto out_close;
485 			}
486 
487 			/*
488 			 * Find the end of the file mapping for this phdr, and
489 			 * keep track of the largest address we see for this.
490 			 */
491 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
492 			if (k > elf_bss)
493 				elf_bss = k;
494 
495 			/*
496 			 * Do the same thing for the memory mapping - between
497 			 * elf_bss and last_bss is the bss section.
498 			 */
499 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
500 			if (k > last_bss)
501 				last_bss = k;
502 		}
503 	}
504 
505 	if (last_bss > elf_bss) {
506 		/*
507 		 * Now fill out the bss section.  First pad the last page up
508 		 * to the page boundary, and then perform a mmap to make sure
509 		 * that there are zero-mapped pages up to and including the
510 		 * last bss page.
511 		 */
512 		if (padzero(elf_bss)) {
513 			error = -EFAULT;
514 			goto out_close;
515 		}
516 
517 		/* What we have mapped so far */
518 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
519 
520 		/* Map the last of the bss segment */
521 		error = vm_brk(elf_bss, last_bss - elf_bss);
522 		if (BAD_ADDR(error))
523 			goto out_close;
524 	}
525 
526 	error = load_addr;
527 
528 out_close:
529 	kfree(elf_phdata);
530 out:
531 	return error;
532 }
533 
534 /*
535  * These are the functions used to load ELF style executables and shared
536  * libraries.  There is no binary dependent code anywhere else.
537  */
538 
539 #define INTERPRETER_NONE 0
540 #define INTERPRETER_ELF 2
541 
542 #ifndef STACK_RND_MASK
543 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
544 #endif
545 
546 static unsigned long randomize_stack_top(unsigned long stack_top)
547 {
548 	unsigned int random_variable = 0;
549 
550 	if ((current->flags & PF_RANDOMIZE) &&
551 		!(current->personality & ADDR_NO_RANDOMIZE)) {
552 		random_variable = get_random_int() & STACK_RND_MASK;
553 		random_variable <<= PAGE_SHIFT;
554 	}
555 #ifdef CONFIG_STACK_GROWSUP
556 	return PAGE_ALIGN(stack_top) + random_variable;
557 #else
558 	return PAGE_ALIGN(stack_top) - random_variable;
559 #endif
560 }
561 
562 static int load_elf_binary(struct linux_binprm *bprm)
563 {
564 	struct file *interpreter = NULL; /* to shut gcc up */
565  	unsigned long load_addr = 0, load_bias = 0;
566 	int load_addr_set = 0;
567 	char * elf_interpreter = NULL;
568 	unsigned long error;
569 	struct elf_phdr *elf_ppnt, *elf_phdata;
570 	unsigned long elf_bss, elf_brk;
571 	int retval, i;
572 	unsigned int size;
573 	unsigned long elf_entry;
574 	unsigned long interp_load_addr = 0;
575 	unsigned long start_code, end_code, start_data, end_data;
576 	unsigned long reloc_func_desc __maybe_unused = 0;
577 	int executable_stack = EXSTACK_DEFAULT;
578 	unsigned long def_flags = 0;
579 	struct pt_regs *regs = current_pt_regs();
580 	struct {
581 		struct elfhdr elf_ex;
582 		struct elfhdr interp_elf_ex;
583 	} *loc;
584 
585 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586 	if (!loc) {
587 		retval = -ENOMEM;
588 		goto out_ret;
589 	}
590 
591 	/* Get the exec-header */
592 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
593 
594 	retval = -ENOEXEC;
595 	/* First of all, some simple consistency checks */
596 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597 		goto out;
598 
599 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600 		goto out;
601 	if (!elf_check_arch(&loc->elf_ex))
602 		goto out;
603 	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
604 		goto out;
605 
606 	/* Now read in all of the header information */
607 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608 		goto out;
609 	if (loc->elf_ex.e_phnum < 1 ||
610 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611 		goto out;
612 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613 	retval = -ENOMEM;
614 	elf_phdata = kmalloc(size, GFP_KERNEL);
615 	if (!elf_phdata)
616 		goto out;
617 
618 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619 			     (char *)elf_phdata, size);
620 	if (retval != size) {
621 		if (retval >= 0)
622 			retval = -EIO;
623 		goto out_free_ph;
624 	}
625 
626 	elf_ppnt = elf_phdata;
627 	elf_bss = 0;
628 	elf_brk = 0;
629 
630 	start_code = ~0UL;
631 	end_code = 0;
632 	start_data = 0;
633 	end_data = 0;
634 
635 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636 		if (elf_ppnt->p_type == PT_INTERP) {
637 			/* This is the program interpreter used for
638 			 * shared libraries - for now assume that this
639 			 * is an a.out format binary
640 			 */
641 			retval = -ENOEXEC;
642 			if (elf_ppnt->p_filesz > PATH_MAX ||
643 			    elf_ppnt->p_filesz < 2)
644 				goto out_free_ph;
645 
646 			retval = -ENOMEM;
647 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648 						  GFP_KERNEL);
649 			if (!elf_interpreter)
650 				goto out_free_ph;
651 
652 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653 					     elf_interpreter,
654 					     elf_ppnt->p_filesz);
655 			if (retval != elf_ppnt->p_filesz) {
656 				if (retval >= 0)
657 					retval = -EIO;
658 				goto out_free_interp;
659 			}
660 			/* make sure path is NULL terminated */
661 			retval = -ENOEXEC;
662 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663 				goto out_free_interp;
664 
665 			interpreter = open_exec(elf_interpreter);
666 			retval = PTR_ERR(interpreter);
667 			if (IS_ERR(interpreter))
668 				goto out_free_interp;
669 
670 			/*
671 			 * If the binary is not readable then enforce
672 			 * mm->dumpable = 0 regardless of the interpreter's
673 			 * permissions.
674 			 */
675 			would_dump(bprm, interpreter);
676 
677 			retval = kernel_read(interpreter, 0, bprm->buf,
678 					     BINPRM_BUF_SIZE);
679 			if (retval != BINPRM_BUF_SIZE) {
680 				if (retval >= 0)
681 					retval = -EIO;
682 				goto out_free_dentry;
683 			}
684 
685 			/* Get the exec headers */
686 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
687 			break;
688 		}
689 		elf_ppnt++;
690 	}
691 
692 	elf_ppnt = elf_phdata;
693 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
694 		if (elf_ppnt->p_type == PT_GNU_STACK) {
695 			if (elf_ppnt->p_flags & PF_X)
696 				executable_stack = EXSTACK_ENABLE_X;
697 			else
698 				executable_stack = EXSTACK_DISABLE_X;
699 			break;
700 		}
701 
702 	/* Some simple consistency checks for the interpreter */
703 	if (elf_interpreter) {
704 		retval = -ELIBBAD;
705 		/* Not an ELF interpreter */
706 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
707 			goto out_free_dentry;
708 		/* Verify the interpreter has a valid arch */
709 		if (!elf_check_arch(&loc->interp_elf_ex))
710 			goto out_free_dentry;
711 	}
712 
713 	/* Flush all traces of the currently running executable */
714 	retval = flush_old_exec(bprm);
715 	if (retval)
716 		goto out_free_dentry;
717 
718 	/* OK, This is the point of no return */
719 	current->mm->def_flags = def_flags;
720 
721 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
722 	   may depend on the personality.  */
723 	SET_PERSONALITY(loc->elf_ex);
724 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725 		current->personality |= READ_IMPLIES_EXEC;
726 
727 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728 		current->flags |= PF_RANDOMIZE;
729 
730 	setup_new_exec(bprm);
731 
732 	/* Do this so that we can load the interpreter, if need be.  We will
733 	   change some of these later */
734 	current->mm->free_area_cache = current->mm->mmap_base;
735 	current->mm->cached_hole_size = 0;
736 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
737 				 executable_stack);
738 	if (retval < 0) {
739 		send_sig(SIGKILL, current, 0);
740 		goto out_free_dentry;
741 	}
742 
743 	current->mm->start_stack = bprm->p;
744 
745 	/* Now we do a little grungy work by mmapping the ELF image into
746 	   the correct location in memory. */
747 	for(i = 0, elf_ppnt = elf_phdata;
748 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
749 		int elf_prot = 0, elf_flags;
750 		unsigned long k, vaddr;
751 
752 		if (elf_ppnt->p_type != PT_LOAD)
753 			continue;
754 
755 		if (unlikely (elf_brk > elf_bss)) {
756 			unsigned long nbyte;
757 
758 			/* There was a PT_LOAD segment with p_memsz > p_filesz
759 			   before this one. Map anonymous pages, if needed,
760 			   and clear the area.  */
761 			retval = set_brk(elf_bss + load_bias,
762 					 elf_brk + load_bias);
763 			if (retval) {
764 				send_sig(SIGKILL, current, 0);
765 				goto out_free_dentry;
766 			}
767 			nbyte = ELF_PAGEOFFSET(elf_bss);
768 			if (nbyte) {
769 				nbyte = ELF_MIN_ALIGN - nbyte;
770 				if (nbyte > elf_brk - elf_bss)
771 					nbyte = elf_brk - elf_bss;
772 				if (clear_user((void __user *)elf_bss +
773 							load_bias, nbyte)) {
774 					/*
775 					 * This bss-zeroing can fail if the ELF
776 					 * file specifies odd protections. So
777 					 * we don't check the return value
778 					 */
779 				}
780 			}
781 		}
782 
783 		if (elf_ppnt->p_flags & PF_R)
784 			elf_prot |= PROT_READ;
785 		if (elf_ppnt->p_flags & PF_W)
786 			elf_prot |= PROT_WRITE;
787 		if (elf_ppnt->p_flags & PF_X)
788 			elf_prot |= PROT_EXEC;
789 
790 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
791 
792 		vaddr = elf_ppnt->p_vaddr;
793 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
794 			elf_flags |= MAP_FIXED;
795 		} else if (loc->elf_ex.e_type == ET_DYN) {
796 			/* Try and get dynamic programs out of the way of the
797 			 * default mmap base, as well as whatever program they
798 			 * might try to exec.  This is because the brk will
799 			 * follow the loader, and is not movable.  */
800 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
801 			/* Memory randomization might have been switched off
802 			 * in runtime via sysctl.
803 			 * If that is the case, retain the original non-zero
804 			 * load_bias value in order to establish proper
805 			 * non-randomized mappings.
806 			 */
807 			if (current->flags & PF_RANDOMIZE)
808 				load_bias = 0;
809 			else
810 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
811 #else
812 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
813 #endif
814 		}
815 
816 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
817 				elf_prot, elf_flags, 0);
818 		if (BAD_ADDR(error)) {
819 			send_sig(SIGKILL, current, 0);
820 			retval = IS_ERR((void *)error) ?
821 				PTR_ERR((void*)error) : -EINVAL;
822 			goto out_free_dentry;
823 		}
824 
825 		if (!load_addr_set) {
826 			load_addr_set = 1;
827 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
828 			if (loc->elf_ex.e_type == ET_DYN) {
829 				load_bias += error -
830 				             ELF_PAGESTART(load_bias + vaddr);
831 				load_addr += load_bias;
832 				reloc_func_desc = load_bias;
833 			}
834 		}
835 		k = elf_ppnt->p_vaddr;
836 		if (k < start_code)
837 			start_code = k;
838 		if (start_data < k)
839 			start_data = k;
840 
841 		/*
842 		 * Check to see if the section's size will overflow the
843 		 * allowed task size. Note that p_filesz must always be
844 		 * <= p_memsz so it is only necessary to check p_memsz.
845 		 */
846 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
847 		    elf_ppnt->p_memsz > TASK_SIZE ||
848 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
849 			/* set_brk can never work. Avoid overflows. */
850 			send_sig(SIGKILL, current, 0);
851 			retval = -EINVAL;
852 			goto out_free_dentry;
853 		}
854 
855 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
856 
857 		if (k > elf_bss)
858 			elf_bss = k;
859 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
860 			end_code = k;
861 		if (end_data < k)
862 			end_data = k;
863 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
864 		if (k > elf_brk)
865 			elf_brk = k;
866 	}
867 
868 	loc->elf_ex.e_entry += load_bias;
869 	elf_bss += load_bias;
870 	elf_brk += load_bias;
871 	start_code += load_bias;
872 	end_code += load_bias;
873 	start_data += load_bias;
874 	end_data += load_bias;
875 
876 	/* Calling set_brk effectively mmaps the pages that we need
877 	 * for the bss and break sections.  We must do this before
878 	 * mapping in the interpreter, to make sure it doesn't wind
879 	 * up getting placed where the bss needs to go.
880 	 */
881 	retval = set_brk(elf_bss, elf_brk);
882 	if (retval) {
883 		send_sig(SIGKILL, current, 0);
884 		goto out_free_dentry;
885 	}
886 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
887 		send_sig(SIGSEGV, current, 0);
888 		retval = -EFAULT; /* Nobody gets to see this, but.. */
889 		goto out_free_dentry;
890 	}
891 
892 	if (elf_interpreter) {
893 		unsigned long interp_map_addr = 0;
894 
895 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
896 					    interpreter,
897 					    &interp_map_addr,
898 					    load_bias);
899 		if (!IS_ERR((void *)elf_entry)) {
900 			/*
901 			 * load_elf_interp() returns relocation
902 			 * adjustment
903 			 */
904 			interp_load_addr = elf_entry;
905 			elf_entry += loc->interp_elf_ex.e_entry;
906 		}
907 		if (BAD_ADDR(elf_entry)) {
908 			force_sig(SIGSEGV, current);
909 			retval = IS_ERR((void *)elf_entry) ?
910 					(int)elf_entry : -EINVAL;
911 			goto out_free_dentry;
912 		}
913 		reloc_func_desc = interp_load_addr;
914 
915 		allow_write_access(interpreter);
916 		fput(interpreter);
917 		kfree(elf_interpreter);
918 	} else {
919 		elf_entry = loc->elf_ex.e_entry;
920 		if (BAD_ADDR(elf_entry)) {
921 			force_sig(SIGSEGV, current);
922 			retval = -EINVAL;
923 			goto out_free_dentry;
924 		}
925 	}
926 
927 	kfree(elf_phdata);
928 
929 	set_binfmt(&elf_format);
930 
931 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
932 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
933 	if (retval < 0) {
934 		send_sig(SIGKILL, current, 0);
935 		goto out;
936 	}
937 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
938 
939 	install_exec_creds(bprm);
940 	retval = create_elf_tables(bprm, &loc->elf_ex,
941 			  load_addr, interp_load_addr);
942 	if (retval < 0) {
943 		send_sig(SIGKILL, current, 0);
944 		goto out;
945 	}
946 	/* N.B. passed_fileno might not be initialized? */
947 	current->mm->end_code = end_code;
948 	current->mm->start_code = start_code;
949 	current->mm->start_data = start_data;
950 	current->mm->end_data = end_data;
951 	current->mm->start_stack = bprm->p;
952 
953 #ifdef arch_randomize_brk
954 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
955 		current->mm->brk = current->mm->start_brk =
956 			arch_randomize_brk(current->mm);
957 #ifdef CONFIG_COMPAT_BRK
958 		current->brk_randomized = 1;
959 #endif
960 	}
961 #endif
962 
963 	if (current->personality & MMAP_PAGE_ZERO) {
964 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
965 		   and some applications "depend" upon this behavior.
966 		   Since we do not have the power to recompile these, we
967 		   emulate the SVr4 behavior. Sigh. */
968 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
969 				MAP_FIXED | MAP_PRIVATE, 0);
970 	}
971 
972 #ifdef ELF_PLAT_INIT
973 	/*
974 	 * The ABI may specify that certain registers be set up in special
975 	 * ways (on i386 %edx is the address of a DT_FINI function, for
976 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
977 	 * that the e_entry field is the address of the function descriptor
978 	 * for the startup routine, rather than the address of the startup
979 	 * routine itself.  This macro performs whatever initialization to
980 	 * the regs structure is required as well as any relocations to the
981 	 * function descriptor entries when executing dynamically links apps.
982 	 */
983 	ELF_PLAT_INIT(regs, reloc_func_desc);
984 #endif
985 
986 	start_thread(regs, elf_entry, bprm->p);
987 	retval = 0;
988 out:
989 	kfree(loc);
990 out_ret:
991 	return retval;
992 
993 	/* error cleanup */
994 out_free_dentry:
995 	allow_write_access(interpreter);
996 	if (interpreter)
997 		fput(interpreter);
998 out_free_interp:
999 	kfree(elf_interpreter);
1000 out_free_ph:
1001 	kfree(elf_phdata);
1002 	goto out;
1003 }
1004 
1005 /* This is really simpleminded and specialized - we are loading an
1006    a.out library that is given an ELF header. */
1007 static int load_elf_library(struct file *file)
1008 {
1009 	struct elf_phdr *elf_phdata;
1010 	struct elf_phdr *eppnt;
1011 	unsigned long elf_bss, bss, len;
1012 	int retval, error, i, j;
1013 	struct elfhdr elf_ex;
1014 
1015 	error = -ENOEXEC;
1016 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1017 	if (retval != sizeof(elf_ex))
1018 		goto out;
1019 
1020 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1021 		goto out;
1022 
1023 	/* First of all, some simple consistency checks */
1024 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1025 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1026 		goto out;
1027 
1028 	/* Now read in all of the header information */
1029 
1030 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1031 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1032 
1033 	error = -ENOMEM;
1034 	elf_phdata = kmalloc(j, GFP_KERNEL);
1035 	if (!elf_phdata)
1036 		goto out;
1037 
1038 	eppnt = elf_phdata;
1039 	error = -ENOEXEC;
1040 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1041 	if (retval != j)
1042 		goto out_free_ph;
1043 
1044 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1045 		if ((eppnt + i)->p_type == PT_LOAD)
1046 			j++;
1047 	if (j != 1)
1048 		goto out_free_ph;
1049 
1050 	while (eppnt->p_type != PT_LOAD)
1051 		eppnt++;
1052 
1053 	/* Now use mmap to map the library into memory. */
1054 	error = vm_mmap(file,
1055 			ELF_PAGESTART(eppnt->p_vaddr),
1056 			(eppnt->p_filesz +
1057 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058 			PROT_READ | PROT_WRITE | PROT_EXEC,
1059 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060 			(eppnt->p_offset -
1061 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1063 		goto out_free_ph;
1064 
1065 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1066 	if (padzero(elf_bss)) {
1067 		error = -EFAULT;
1068 		goto out_free_ph;
1069 	}
1070 
1071 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1072 			    ELF_MIN_ALIGN - 1);
1073 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1074 	if (bss > len)
1075 		vm_brk(len, bss - len);
1076 	error = 0;
1077 
1078 out_free_ph:
1079 	kfree(elf_phdata);
1080 out:
1081 	return error;
1082 }
1083 
1084 #ifdef CONFIG_ELF_CORE
1085 /*
1086  * ELF core dumper
1087  *
1088  * Modelled on fs/exec.c:aout_core_dump()
1089  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1090  */
1091 
1092 /*
1093  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1094  * that are useful for post-mortem analysis are included in every core dump.
1095  * In that way we ensure that the core dump is fully interpretable later
1096  * without matching up the same kernel and hardware config to see what PC values
1097  * meant. These special mappings include - vDSO, vsyscall, and other
1098  * architecture specific mappings
1099  */
1100 static bool always_dump_vma(struct vm_area_struct *vma)
1101 {
1102 	/* Any vsyscall mappings? */
1103 	if (vma == get_gate_vma(vma->vm_mm))
1104 		return true;
1105 	/*
1106 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1107 	 * such as vDSO sections.
1108 	 */
1109 	if (arch_vma_name(vma))
1110 		return true;
1111 
1112 	return false;
1113 }
1114 
1115 /*
1116  * Decide what to dump of a segment, part, all or none.
1117  */
1118 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1119 				   unsigned long mm_flags)
1120 {
1121 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1122 
1123 	/* always dump the vdso and vsyscall sections */
1124 	if (always_dump_vma(vma))
1125 		goto whole;
1126 
1127 	if (vma->vm_flags & VM_DONTDUMP)
1128 		return 0;
1129 
1130 	/* Hugetlb memory check */
1131 	if (vma->vm_flags & VM_HUGETLB) {
1132 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1133 			goto whole;
1134 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1135 			goto whole;
1136 	}
1137 
1138 	/* Do not dump I/O mapped devices or special mappings */
1139 	if (vma->vm_flags & VM_IO)
1140 		return 0;
1141 
1142 	/* By default, dump shared memory if mapped from an anonymous file. */
1143 	if (vma->vm_flags & VM_SHARED) {
1144 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1145 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1146 			goto whole;
1147 		return 0;
1148 	}
1149 
1150 	/* Dump segments that have been written to.  */
1151 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1152 		goto whole;
1153 	if (vma->vm_file == NULL)
1154 		return 0;
1155 
1156 	if (FILTER(MAPPED_PRIVATE))
1157 		goto whole;
1158 
1159 	/*
1160 	 * If this looks like the beginning of a DSO or executable mapping,
1161 	 * check for an ELF header.  If we find one, dump the first page to
1162 	 * aid in determining what was mapped here.
1163 	 */
1164 	if (FILTER(ELF_HEADERS) &&
1165 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1166 		u32 __user *header = (u32 __user *) vma->vm_start;
1167 		u32 word;
1168 		mm_segment_t fs = get_fs();
1169 		/*
1170 		 * Doing it this way gets the constant folded by GCC.
1171 		 */
1172 		union {
1173 			u32 cmp;
1174 			char elfmag[SELFMAG];
1175 		} magic;
1176 		BUILD_BUG_ON(SELFMAG != sizeof word);
1177 		magic.elfmag[EI_MAG0] = ELFMAG0;
1178 		magic.elfmag[EI_MAG1] = ELFMAG1;
1179 		magic.elfmag[EI_MAG2] = ELFMAG2;
1180 		magic.elfmag[EI_MAG3] = ELFMAG3;
1181 		/*
1182 		 * Switch to the user "segment" for get_user(),
1183 		 * then put back what elf_core_dump() had in place.
1184 		 */
1185 		set_fs(USER_DS);
1186 		if (unlikely(get_user(word, header)))
1187 			word = 0;
1188 		set_fs(fs);
1189 		if (word == magic.cmp)
1190 			return PAGE_SIZE;
1191 	}
1192 
1193 #undef	FILTER
1194 
1195 	return 0;
1196 
1197 whole:
1198 	return vma->vm_end - vma->vm_start;
1199 }
1200 
1201 /* An ELF note in memory */
1202 struct memelfnote
1203 {
1204 	const char *name;
1205 	int type;
1206 	unsigned int datasz;
1207 	void *data;
1208 };
1209 
1210 static int notesize(struct memelfnote *en)
1211 {
1212 	int sz;
1213 
1214 	sz = sizeof(struct elf_note);
1215 	sz += roundup(strlen(en->name) + 1, 4);
1216 	sz += roundup(en->datasz, 4);
1217 
1218 	return sz;
1219 }
1220 
1221 #define DUMP_WRITE(addr, nr, foffset)	\
1222 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1223 
1224 static int alignfile(struct file *file, loff_t *foffset)
1225 {
1226 	static const char buf[4] = { 0, };
1227 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1228 	return 1;
1229 }
1230 
1231 static int writenote(struct memelfnote *men, struct file *file,
1232 			loff_t *foffset)
1233 {
1234 	struct elf_note en;
1235 	en.n_namesz = strlen(men->name) + 1;
1236 	en.n_descsz = men->datasz;
1237 	en.n_type = men->type;
1238 
1239 	DUMP_WRITE(&en, sizeof(en), foffset);
1240 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1241 	if (!alignfile(file, foffset))
1242 		return 0;
1243 	DUMP_WRITE(men->data, men->datasz, foffset);
1244 	if (!alignfile(file, foffset))
1245 		return 0;
1246 
1247 	return 1;
1248 }
1249 #undef DUMP_WRITE
1250 
1251 static void fill_elf_header(struct elfhdr *elf, int segs,
1252 			    u16 machine, u32 flags, u8 osabi)
1253 {
1254 	memset(elf, 0, sizeof(*elf));
1255 
1256 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1257 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1258 	elf->e_ident[EI_DATA] = ELF_DATA;
1259 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1260 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1261 
1262 	elf->e_type = ET_CORE;
1263 	elf->e_machine = machine;
1264 	elf->e_version = EV_CURRENT;
1265 	elf->e_phoff = sizeof(struct elfhdr);
1266 	elf->e_flags = flags;
1267 	elf->e_ehsize = sizeof(struct elfhdr);
1268 	elf->e_phentsize = sizeof(struct elf_phdr);
1269 	elf->e_phnum = segs;
1270 
1271 	return;
1272 }
1273 
1274 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1275 {
1276 	phdr->p_type = PT_NOTE;
1277 	phdr->p_offset = offset;
1278 	phdr->p_vaddr = 0;
1279 	phdr->p_paddr = 0;
1280 	phdr->p_filesz = sz;
1281 	phdr->p_memsz = 0;
1282 	phdr->p_flags = 0;
1283 	phdr->p_align = 0;
1284 	return;
1285 }
1286 
1287 static void fill_note(struct memelfnote *note, const char *name, int type,
1288 		unsigned int sz, void *data)
1289 {
1290 	note->name = name;
1291 	note->type = type;
1292 	note->datasz = sz;
1293 	note->data = data;
1294 	return;
1295 }
1296 
1297 /*
1298  * fill up all the fields in prstatus from the given task struct, except
1299  * registers which need to be filled up separately.
1300  */
1301 static void fill_prstatus(struct elf_prstatus *prstatus,
1302 		struct task_struct *p, long signr)
1303 {
1304 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1305 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1306 	prstatus->pr_sighold = p->blocked.sig[0];
1307 	rcu_read_lock();
1308 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1309 	rcu_read_unlock();
1310 	prstatus->pr_pid = task_pid_vnr(p);
1311 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1312 	prstatus->pr_sid = task_session_vnr(p);
1313 	if (thread_group_leader(p)) {
1314 		struct task_cputime cputime;
1315 
1316 		/*
1317 		 * This is the record for the group leader.  It shows the
1318 		 * group-wide total, not its individual thread total.
1319 		 */
1320 		thread_group_cputime(p, &cputime);
1321 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1322 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1323 	} else {
1324 		cputime_t utime, stime;
1325 
1326 		task_cputime(p, &utime, &stime);
1327 		cputime_to_timeval(utime, &prstatus->pr_utime);
1328 		cputime_to_timeval(stime, &prstatus->pr_stime);
1329 	}
1330 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1331 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1332 }
1333 
1334 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1335 		       struct mm_struct *mm)
1336 {
1337 	const struct cred *cred;
1338 	unsigned int i, len;
1339 
1340 	/* first copy the parameters from user space */
1341 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1342 
1343 	len = mm->arg_end - mm->arg_start;
1344 	if (len >= ELF_PRARGSZ)
1345 		len = ELF_PRARGSZ-1;
1346 	if (copy_from_user(&psinfo->pr_psargs,
1347 		           (const char __user *)mm->arg_start, len))
1348 		return -EFAULT;
1349 	for(i = 0; i < len; i++)
1350 		if (psinfo->pr_psargs[i] == 0)
1351 			psinfo->pr_psargs[i] = ' ';
1352 	psinfo->pr_psargs[len] = 0;
1353 
1354 	rcu_read_lock();
1355 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1356 	rcu_read_unlock();
1357 	psinfo->pr_pid = task_pid_vnr(p);
1358 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1359 	psinfo->pr_sid = task_session_vnr(p);
1360 
1361 	i = p->state ? ffz(~p->state) + 1 : 0;
1362 	psinfo->pr_state = i;
1363 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1364 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1365 	psinfo->pr_nice = task_nice(p);
1366 	psinfo->pr_flag = p->flags;
1367 	rcu_read_lock();
1368 	cred = __task_cred(p);
1369 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1370 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1371 	rcu_read_unlock();
1372 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1373 
1374 	return 0;
1375 }
1376 
1377 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1378 {
1379 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1380 	int i = 0;
1381 	do
1382 		i += 2;
1383 	while (auxv[i - 2] != AT_NULL);
1384 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1385 }
1386 
1387 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1388 		siginfo_t *siginfo)
1389 {
1390 	mm_segment_t old_fs = get_fs();
1391 	set_fs(KERNEL_DS);
1392 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1393 	set_fs(old_fs);
1394 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1395 }
1396 
1397 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1398 /*
1399  * Format of NT_FILE note:
1400  *
1401  * long count     -- how many files are mapped
1402  * long page_size -- units for file_ofs
1403  * array of [COUNT] elements of
1404  *   long start
1405  *   long end
1406  *   long file_ofs
1407  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1408  */
1409 static void fill_files_note(struct memelfnote *note)
1410 {
1411 	struct vm_area_struct *vma;
1412 	unsigned count, size, names_ofs, remaining, n;
1413 	user_long_t *data;
1414 	user_long_t *start_end_ofs;
1415 	char *name_base, *name_curpos;
1416 
1417 	/* *Estimated* file count and total data size needed */
1418 	count = current->mm->map_count;
1419 	size = count * 64;
1420 
1421 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1422  alloc:
1423 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1424 		goto err;
1425 	size = round_up(size, PAGE_SIZE);
1426 	data = vmalloc(size);
1427 	if (!data)
1428 		goto err;
1429 
1430 	start_end_ofs = data + 2;
1431 	name_base = name_curpos = ((char *)data) + names_ofs;
1432 	remaining = size - names_ofs;
1433 	count = 0;
1434 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1435 		struct file *file;
1436 		const char *filename;
1437 
1438 		file = vma->vm_file;
1439 		if (!file)
1440 			continue;
1441 		filename = d_path(&file->f_path, name_curpos, remaining);
1442 		if (IS_ERR(filename)) {
1443 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1444 				vfree(data);
1445 				size = size * 5 / 4;
1446 				goto alloc;
1447 			}
1448 			continue;
1449 		}
1450 
1451 		/* d_path() fills at the end, move name down */
1452 		/* n = strlen(filename) + 1: */
1453 		n = (name_curpos + remaining) - filename;
1454 		remaining = filename - name_curpos;
1455 		memmove(name_curpos, filename, n);
1456 		name_curpos += n;
1457 
1458 		*start_end_ofs++ = vma->vm_start;
1459 		*start_end_ofs++ = vma->vm_end;
1460 		*start_end_ofs++ = vma->vm_pgoff;
1461 		count++;
1462 	}
1463 
1464 	/* Now we know exact count of files, can store it */
1465 	data[0] = count;
1466 	data[1] = PAGE_SIZE;
1467 	/*
1468 	 * Count usually is less than current->mm->map_count,
1469 	 * we need to move filenames down.
1470 	 */
1471 	n = current->mm->map_count - count;
1472 	if (n != 0) {
1473 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1474 		memmove(name_base - shift_bytes, name_base,
1475 			name_curpos - name_base);
1476 		name_curpos -= shift_bytes;
1477 	}
1478 
1479 	size = name_curpos - (char *)data;
1480 	fill_note(note, "CORE", NT_FILE, size, data);
1481  err: ;
1482 }
1483 
1484 #ifdef CORE_DUMP_USE_REGSET
1485 #include <linux/regset.h>
1486 
1487 struct elf_thread_core_info {
1488 	struct elf_thread_core_info *next;
1489 	struct task_struct *task;
1490 	struct elf_prstatus prstatus;
1491 	struct memelfnote notes[0];
1492 };
1493 
1494 struct elf_note_info {
1495 	struct elf_thread_core_info *thread;
1496 	struct memelfnote psinfo;
1497 	struct memelfnote signote;
1498 	struct memelfnote auxv;
1499 	struct memelfnote files;
1500 	user_siginfo_t csigdata;
1501 	size_t size;
1502 	int thread_notes;
1503 };
1504 
1505 /*
1506  * When a regset has a writeback hook, we call it on each thread before
1507  * dumping user memory.  On register window machines, this makes sure the
1508  * user memory backing the register data is up to date before we read it.
1509  */
1510 static void do_thread_regset_writeback(struct task_struct *task,
1511 				       const struct user_regset *regset)
1512 {
1513 	if (regset->writeback)
1514 		regset->writeback(task, regset, 1);
1515 }
1516 
1517 #ifndef PR_REG_SIZE
1518 #define PR_REG_SIZE(S) sizeof(S)
1519 #endif
1520 
1521 #ifndef PRSTATUS_SIZE
1522 #define PRSTATUS_SIZE(S) sizeof(S)
1523 #endif
1524 
1525 #ifndef PR_REG_PTR
1526 #define PR_REG_PTR(S) (&((S)->pr_reg))
1527 #endif
1528 
1529 #ifndef SET_PR_FPVALID
1530 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1531 #endif
1532 
1533 static int fill_thread_core_info(struct elf_thread_core_info *t,
1534 				 const struct user_regset_view *view,
1535 				 long signr, size_t *total)
1536 {
1537 	unsigned int i;
1538 
1539 	/*
1540 	 * NT_PRSTATUS is the one special case, because the regset data
1541 	 * goes into the pr_reg field inside the note contents, rather
1542 	 * than being the whole note contents.  We fill the reset in here.
1543 	 * We assume that regset 0 is NT_PRSTATUS.
1544 	 */
1545 	fill_prstatus(&t->prstatus, t->task, signr);
1546 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1547 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1548 				    PR_REG_PTR(&t->prstatus), NULL);
1549 
1550 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1551 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1552 	*total += notesize(&t->notes[0]);
1553 
1554 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1555 
1556 	/*
1557 	 * Each other regset might generate a note too.  For each regset
1558 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1559 	 * all zero and we'll know to skip writing it later.
1560 	 */
1561 	for (i = 1; i < view->n; ++i) {
1562 		const struct user_regset *regset = &view->regsets[i];
1563 		do_thread_regset_writeback(t->task, regset);
1564 		if (regset->core_note_type && regset->get &&
1565 		    (!regset->active || regset->active(t->task, regset))) {
1566 			int ret;
1567 			size_t size = regset->n * regset->size;
1568 			void *data = kmalloc(size, GFP_KERNEL);
1569 			if (unlikely(!data))
1570 				return 0;
1571 			ret = regset->get(t->task, regset,
1572 					  0, size, data, NULL);
1573 			if (unlikely(ret))
1574 				kfree(data);
1575 			else {
1576 				if (regset->core_note_type != NT_PRFPREG)
1577 					fill_note(&t->notes[i], "LINUX",
1578 						  regset->core_note_type,
1579 						  size, data);
1580 				else {
1581 					SET_PR_FPVALID(&t->prstatus, 1);
1582 					fill_note(&t->notes[i], "CORE",
1583 						  NT_PRFPREG, size, data);
1584 				}
1585 				*total += notesize(&t->notes[i]);
1586 			}
1587 		}
1588 	}
1589 
1590 	return 1;
1591 }
1592 
1593 static int fill_note_info(struct elfhdr *elf, int phdrs,
1594 			  struct elf_note_info *info,
1595 			  siginfo_t *siginfo, struct pt_regs *regs)
1596 {
1597 	struct task_struct *dump_task = current;
1598 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1599 	struct elf_thread_core_info *t;
1600 	struct elf_prpsinfo *psinfo;
1601 	struct core_thread *ct;
1602 	unsigned int i;
1603 
1604 	info->size = 0;
1605 	info->thread = NULL;
1606 
1607 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1608 	if (psinfo == NULL) {
1609 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1610 		return 0;
1611 	}
1612 
1613 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1614 
1615 	/*
1616 	 * Figure out how many notes we're going to need for each thread.
1617 	 */
1618 	info->thread_notes = 0;
1619 	for (i = 0; i < view->n; ++i)
1620 		if (view->regsets[i].core_note_type != 0)
1621 			++info->thread_notes;
1622 
1623 	/*
1624 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1625 	 * since it is our one special case.
1626 	 */
1627 	if (unlikely(info->thread_notes == 0) ||
1628 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1629 		WARN_ON(1);
1630 		return 0;
1631 	}
1632 
1633 	/*
1634 	 * Initialize the ELF file header.
1635 	 */
1636 	fill_elf_header(elf, phdrs,
1637 			view->e_machine, view->e_flags, view->ei_osabi);
1638 
1639 	/*
1640 	 * Allocate a structure for each thread.
1641 	 */
1642 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1643 		t = kzalloc(offsetof(struct elf_thread_core_info,
1644 				     notes[info->thread_notes]),
1645 			    GFP_KERNEL);
1646 		if (unlikely(!t))
1647 			return 0;
1648 
1649 		t->task = ct->task;
1650 		if (ct->task == dump_task || !info->thread) {
1651 			t->next = info->thread;
1652 			info->thread = t;
1653 		} else {
1654 			/*
1655 			 * Make sure to keep the original task at
1656 			 * the head of the list.
1657 			 */
1658 			t->next = info->thread->next;
1659 			info->thread->next = t;
1660 		}
1661 	}
1662 
1663 	/*
1664 	 * Now fill in each thread's information.
1665 	 */
1666 	for (t = info->thread; t != NULL; t = t->next)
1667 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1668 			return 0;
1669 
1670 	/*
1671 	 * Fill in the two process-wide notes.
1672 	 */
1673 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1674 	info->size += notesize(&info->psinfo);
1675 
1676 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1677 	info->size += notesize(&info->signote);
1678 
1679 	fill_auxv_note(&info->auxv, current->mm);
1680 	info->size += notesize(&info->auxv);
1681 
1682 	fill_files_note(&info->files);
1683 	info->size += notesize(&info->files);
1684 
1685 	return 1;
1686 }
1687 
1688 static size_t get_note_info_size(struct elf_note_info *info)
1689 {
1690 	return info->size;
1691 }
1692 
1693 /*
1694  * Write all the notes for each thread.  When writing the first thread, the
1695  * process-wide notes are interleaved after the first thread-specific note.
1696  */
1697 static int write_note_info(struct elf_note_info *info,
1698 			   struct file *file, loff_t *foffset)
1699 {
1700 	bool first = 1;
1701 	struct elf_thread_core_info *t = info->thread;
1702 
1703 	do {
1704 		int i;
1705 
1706 		if (!writenote(&t->notes[0], file, foffset))
1707 			return 0;
1708 
1709 		if (first && !writenote(&info->psinfo, file, foffset))
1710 			return 0;
1711 		if (first && !writenote(&info->signote, file, foffset))
1712 			return 0;
1713 		if (first && !writenote(&info->auxv, file, foffset))
1714 			return 0;
1715 		if (first && !writenote(&info->files, file, foffset))
1716 			return 0;
1717 
1718 		for (i = 1; i < info->thread_notes; ++i)
1719 			if (t->notes[i].data &&
1720 			    !writenote(&t->notes[i], file, foffset))
1721 				return 0;
1722 
1723 		first = 0;
1724 		t = t->next;
1725 	} while (t);
1726 
1727 	return 1;
1728 }
1729 
1730 static void free_note_info(struct elf_note_info *info)
1731 {
1732 	struct elf_thread_core_info *threads = info->thread;
1733 	while (threads) {
1734 		unsigned int i;
1735 		struct elf_thread_core_info *t = threads;
1736 		threads = t->next;
1737 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1738 		for (i = 1; i < info->thread_notes; ++i)
1739 			kfree(t->notes[i].data);
1740 		kfree(t);
1741 	}
1742 	kfree(info->psinfo.data);
1743 	vfree(info->files.data);
1744 }
1745 
1746 #else
1747 
1748 /* Here is the structure in which status of each thread is captured. */
1749 struct elf_thread_status
1750 {
1751 	struct list_head list;
1752 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1753 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1754 	struct task_struct *thread;
1755 #ifdef ELF_CORE_COPY_XFPREGS
1756 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1757 #endif
1758 	struct memelfnote notes[3];
1759 	int num_notes;
1760 };
1761 
1762 /*
1763  * In order to add the specific thread information for the elf file format,
1764  * we need to keep a linked list of every threads pr_status and then create
1765  * a single section for them in the final core file.
1766  */
1767 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1768 {
1769 	int sz = 0;
1770 	struct task_struct *p = t->thread;
1771 	t->num_notes = 0;
1772 
1773 	fill_prstatus(&t->prstatus, p, signr);
1774 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1775 
1776 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1777 		  &(t->prstatus));
1778 	t->num_notes++;
1779 	sz += notesize(&t->notes[0]);
1780 
1781 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1782 								&t->fpu))) {
1783 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1784 			  &(t->fpu));
1785 		t->num_notes++;
1786 		sz += notesize(&t->notes[1]);
1787 	}
1788 
1789 #ifdef ELF_CORE_COPY_XFPREGS
1790 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1791 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1792 			  sizeof(t->xfpu), &t->xfpu);
1793 		t->num_notes++;
1794 		sz += notesize(&t->notes[2]);
1795 	}
1796 #endif
1797 	return sz;
1798 }
1799 
1800 struct elf_note_info {
1801 	struct memelfnote *notes;
1802 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1803 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1804 	struct list_head thread_list;
1805 	elf_fpregset_t *fpu;
1806 #ifdef ELF_CORE_COPY_XFPREGS
1807 	elf_fpxregset_t *xfpu;
1808 #endif
1809 	user_siginfo_t csigdata;
1810 	int thread_status_size;
1811 	int numnote;
1812 };
1813 
1814 static int elf_note_info_init(struct elf_note_info *info)
1815 {
1816 	memset(info, 0, sizeof(*info));
1817 	INIT_LIST_HEAD(&info->thread_list);
1818 
1819 	/* Allocate space for ELF notes */
1820 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1821 	if (!info->notes)
1822 		return 0;
1823 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1824 	if (!info->psinfo)
1825 		return 0;
1826 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1827 	if (!info->prstatus)
1828 		return 0;
1829 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1830 	if (!info->fpu)
1831 		return 0;
1832 #ifdef ELF_CORE_COPY_XFPREGS
1833 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1834 	if (!info->xfpu)
1835 		return 0;
1836 #endif
1837 	return 1;
1838 }
1839 
1840 static int fill_note_info(struct elfhdr *elf, int phdrs,
1841 			  struct elf_note_info *info,
1842 			  siginfo_t *siginfo, struct pt_regs *regs)
1843 {
1844 	struct list_head *t;
1845 
1846 	if (!elf_note_info_init(info))
1847 		return 0;
1848 
1849 	if (siginfo->si_signo) {
1850 		struct core_thread *ct;
1851 		struct elf_thread_status *ets;
1852 
1853 		for (ct = current->mm->core_state->dumper.next;
1854 						ct; ct = ct->next) {
1855 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1856 			if (!ets)
1857 				return 0;
1858 
1859 			ets->thread = ct->task;
1860 			list_add(&ets->list, &info->thread_list);
1861 		}
1862 
1863 		list_for_each(t, &info->thread_list) {
1864 			int sz;
1865 
1866 			ets = list_entry(t, struct elf_thread_status, list);
1867 			sz = elf_dump_thread_status(siginfo->si_signo, ets);
1868 			info->thread_status_size += sz;
1869 		}
1870 	}
1871 	/* now collect the dump for the current */
1872 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1873 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1874 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1875 
1876 	/* Set up header */
1877 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1878 
1879 	/*
1880 	 * Set up the notes in similar form to SVR4 core dumps made
1881 	 * with info from their /proc.
1882 	 */
1883 
1884 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1885 		  sizeof(*info->prstatus), info->prstatus);
1886 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1887 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1888 		  sizeof(*info->psinfo), info->psinfo);
1889 
1890 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1891 	fill_auxv_note(info->notes + 3, current->mm);
1892 	fill_files_note(info->notes + 4);
1893 
1894 	info->numnote = 5;
1895 
1896 	/* Try to dump the FPU. */
1897 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1898 							       info->fpu);
1899 	if (info->prstatus->pr_fpvalid)
1900 		fill_note(info->notes + info->numnote++,
1901 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1902 #ifdef ELF_CORE_COPY_XFPREGS
1903 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1904 		fill_note(info->notes + info->numnote++,
1905 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1906 			  sizeof(*info->xfpu), info->xfpu);
1907 #endif
1908 
1909 	return 1;
1910 }
1911 
1912 static size_t get_note_info_size(struct elf_note_info *info)
1913 {
1914 	int sz = 0;
1915 	int i;
1916 
1917 	for (i = 0; i < info->numnote; i++)
1918 		sz += notesize(info->notes + i);
1919 
1920 	sz += info->thread_status_size;
1921 
1922 	return sz;
1923 }
1924 
1925 static int write_note_info(struct elf_note_info *info,
1926 			   struct file *file, loff_t *foffset)
1927 {
1928 	int i;
1929 	struct list_head *t;
1930 
1931 	for (i = 0; i < info->numnote; i++)
1932 		if (!writenote(info->notes + i, file, foffset))
1933 			return 0;
1934 
1935 	/* write out the thread status notes section */
1936 	list_for_each(t, &info->thread_list) {
1937 		struct elf_thread_status *tmp =
1938 				list_entry(t, struct elf_thread_status, list);
1939 
1940 		for (i = 0; i < tmp->num_notes; i++)
1941 			if (!writenote(&tmp->notes[i], file, foffset))
1942 				return 0;
1943 	}
1944 
1945 	return 1;
1946 }
1947 
1948 static void free_note_info(struct elf_note_info *info)
1949 {
1950 	while (!list_empty(&info->thread_list)) {
1951 		struct list_head *tmp = info->thread_list.next;
1952 		list_del(tmp);
1953 		kfree(list_entry(tmp, struct elf_thread_status, list));
1954 	}
1955 
1956 	/* Free data allocated by fill_files_note(): */
1957 	vfree(info->notes[4].data);
1958 
1959 	kfree(info->prstatus);
1960 	kfree(info->psinfo);
1961 	kfree(info->notes);
1962 	kfree(info->fpu);
1963 #ifdef ELF_CORE_COPY_XFPREGS
1964 	kfree(info->xfpu);
1965 #endif
1966 }
1967 
1968 #endif
1969 
1970 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1971 					struct vm_area_struct *gate_vma)
1972 {
1973 	struct vm_area_struct *ret = tsk->mm->mmap;
1974 
1975 	if (ret)
1976 		return ret;
1977 	return gate_vma;
1978 }
1979 /*
1980  * Helper function for iterating across a vma list.  It ensures that the caller
1981  * will visit `gate_vma' prior to terminating the search.
1982  */
1983 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1984 					struct vm_area_struct *gate_vma)
1985 {
1986 	struct vm_area_struct *ret;
1987 
1988 	ret = this_vma->vm_next;
1989 	if (ret)
1990 		return ret;
1991 	if (this_vma == gate_vma)
1992 		return NULL;
1993 	return gate_vma;
1994 }
1995 
1996 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1997 			     elf_addr_t e_shoff, int segs)
1998 {
1999 	elf->e_shoff = e_shoff;
2000 	elf->e_shentsize = sizeof(*shdr4extnum);
2001 	elf->e_shnum = 1;
2002 	elf->e_shstrndx = SHN_UNDEF;
2003 
2004 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2005 
2006 	shdr4extnum->sh_type = SHT_NULL;
2007 	shdr4extnum->sh_size = elf->e_shnum;
2008 	shdr4extnum->sh_link = elf->e_shstrndx;
2009 	shdr4extnum->sh_info = segs;
2010 }
2011 
2012 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2013 				     unsigned long mm_flags)
2014 {
2015 	struct vm_area_struct *vma;
2016 	size_t size = 0;
2017 
2018 	for (vma = first_vma(current, gate_vma); vma != NULL;
2019 	     vma = next_vma(vma, gate_vma))
2020 		size += vma_dump_size(vma, mm_flags);
2021 	return size;
2022 }
2023 
2024 /*
2025  * Actual dumper
2026  *
2027  * This is a two-pass process; first we find the offsets of the bits,
2028  * and then they are actually written out.  If we run out of core limit
2029  * we just truncate.
2030  */
2031 static int elf_core_dump(struct coredump_params *cprm)
2032 {
2033 	int has_dumped = 0;
2034 	mm_segment_t fs;
2035 	int segs;
2036 	size_t size = 0;
2037 	struct vm_area_struct *vma, *gate_vma;
2038 	struct elfhdr *elf = NULL;
2039 	loff_t offset = 0, dataoff, foffset;
2040 	struct elf_note_info info;
2041 	struct elf_phdr *phdr4note = NULL;
2042 	struct elf_shdr *shdr4extnum = NULL;
2043 	Elf_Half e_phnum;
2044 	elf_addr_t e_shoff;
2045 
2046 	/*
2047 	 * We no longer stop all VM operations.
2048 	 *
2049 	 * This is because those proceses that could possibly change map_count
2050 	 * or the mmap / vma pages are now blocked in do_exit on current
2051 	 * finishing this core dump.
2052 	 *
2053 	 * Only ptrace can touch these memory addresses, but it doesn't change
2054 	 * the map_count or the pages allocated. So no possibility of crashing
2055 	 * exists while dumping the mm->vm_next areas to the core file.
2056 	 */
2057 
2058 	/* alloc memory for large data structures: too large to be on stack */
2059 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2060 	if (!elf)
2061 		goto out;
2062 	/*
2063 	 * The number of segs are recored into ELF header as 16bit value.
2064 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2065 	 */
2066 	segs = current->mm->map_count;
2067 	segs += elf_core_extra_phdrs();
2068 
2069 	gate_vma = get_gate_vma(current->mm);
2070 	if (gate_vma != NULL)
2071 		segs++;
2072 
2073 	/* for notes section */
2074 	segs++;
2075 
2076 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2077 	 * this, kernel supports extended numbering. Have a look at
2078 	 * include/linux/elf.h for further information. */
2079 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2080 
2081 	/*
2082 	 * Collect all the non-memory information about the process for the
2083 	 * notes.  This also sets up the file header.
2084 	 */
2085 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2086 		goto cleanup;
2087 
2088 	has_dumped = 1;
2089 	current->flags |= PF_DUMPCORE;
2090 
2091 	fs = get_fs();
2092 	set_fs(KERNEL_DS);
2093 
2094 	offset += sizeof(*elf);				/* Elf header */
2095 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2096 	foffset = offset;
2097 
2098 	/* Write notes phdr entry */
2099 	{
2100 		size_t sz = get_note_info_size(&info);
2101 
2102 		sz += elf_coredump_extra_notes_size();
2103 
2104 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2105 		if (!phdr4note)
2106 			goto end_coredump;
2107 
2108 		fill_elf_note_phdr(phdr4note, sz, offset);
2109 		offset += sz;
2110 	}
2111 
2112 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2113 
2114 	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2115 	offset += elf_core_extra_data_size();
2116 	e_shoff = offset;
2117 
2118 	if (e_phnum == PN_XNUM) {
2119 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2120 		if (!shdr4extnum)
2121 			goto end_coredump;
2122 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2123 	}
2124 
2125 	offset = dataoff;
2126 
2127 	size += sizeof(*elf);
2128 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2129 		goto end_coredump;
2130 
2131 	size += sizeof(*phdr4note);
2132 	if (size > cprm->limit
2133 	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2134 		goto end_coredump;
2135 
2136 	/* Write program headers for segments dump */
2137 	for (vma = first_vma(current, gate_vma); vma != NULL;
2138 			vma = next_vma(vma, gate_vma)) {
2139 		struct elf_phdr phdr;
2140 
2141 		phdr.p_type = PT_LOAD;
2142 		phdr.p_offset = offset;
2143 		phdr.p_vaddr = vma->vm_start;
2144 		phdr.p_paddr = 0;
2145 		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2146 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2147 		offset += phdr.p_filesz;
2148 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2149 		if (vma->vm_flags & VM_WRITE)
2150 			phdr.p_flags |= PF_W;
2151 		if (vma->vm_flags & VM_EXEC)
2152 			phdr.p_flags |= PF_X;
2153 		phdr.p_align = ELF_EXEC_PAGESIZE;
2154 
2155 		size += sizeof(phdr);
2156 		if (size > cprm->limit
2157 		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2158 			goto end_coredump;
2159 	}
2160 
2161 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2162 		goto end_coredump;
2163 
2164  	/* write out the notes section */
2165 	if (!write_note_info(&info, cprm->file, &foffset))
2166 		goto end_coredump;
2167 
2168 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2169 		goto end_coredump;
2170 
2171 	/* Align to page */
2172 	if (!dump_seek(cprm->file, dataoff - foffset))
2173 		goto end_coredump;
2174 
2175 	for (vma = first_vma(current, gate_vma); vma != NULL;
2176 			vma = next_vma(vma, gate_vma)) {
2177 		unsigned long addr;
2178 		unsigned long end;
2179 
2180 		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2181 
2182 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2183 			struct page *page;
2184 			int stop;
2185 
2186 			page = get_dump_page(addr);
2187 			if (page) {
2188 				void *kaddr = kmap(page);
2189 				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2190 					!dump_write(cprm->file, kaddr,
2191 						    PAGE_SIZE);
2192 				kunmap(page);
2193 				page_cache_release(page);
2194 			} else
2195 				stop = !dump_seek(cprm->file, PAGE_SIZE);
2196 			if (stop)
2197 				goto end_coredump;
2198 		}
2199 	}
2200 
2201 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2202 		goto end_coredump;
2203 
2204 	if (e_phnum == PN_XNUM) {
2205 		size += sizeof(*shdr4extnum);
2206 		if (size > cprm->limit
2207 		    || !dump_write(cprm->file, shdr4extnum,
2208 				   sizeof(*shdr4extnum)))
2209 			goto end_coredump;
2210 	}
2211 
2212 end_coredump:
2213 	set_fs(fs);
2214 
2215 cleanup:
2216 	free_note_info(&info);
2217 	kfree(shdr4extnum);
2218 	kfree(phdr4note);
2219 	kfree(elf);
2220 out:
2221 	return has_dumped;
2222 }
2223 
2224 #endif		/* CONFIG_ELF_CORE */
2225 
2226 static int __init init_elf_binfmt(void)
2227 {
2228 	register_binfmt(&elf_format);
2229 	return 0;
2230 }
2231 
2232 static void __exit exit_elf_binfmt(void)
2233 {
2234 	/* Remove the COFF and ELF loaders. */
2235 	unregister_binfmt(&elf_format);
2236 }
2237 
2238 core_initcall(init_elf_binfmt);
2239 module_exit(exit_elf_binfmt);
2240 MODULE_LICENSE("GPL");
2241