xref: /openbmc/linux/fs/binfmt_elf.c (revision 95e9fd10)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 #include <asm/exec.h>
39 
40 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
41 static int load_elf_library(struct file *);
42 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
43 				int, int, unsigned long);
44 
45 /*
46  * If we don't support core dumping, then supply a NULL so we
47  * don't even try.
48  */
49 #ifdef CONFIG_ELF_CORE
50 static int elf_core_dump(struct coredump_params *cprm);
51 #else
52 #define elf_core_dump	NULL
53 #endif
54 
55 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
56 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
57 #else
58 #define ELF_MIN_ALIGN	PAGE_SIZE
59 #endif
60 
61 #ifndef ELF_CORE_EFLAGS
62 #define ELF_CORE_EFLAGS	0
63 #endif
64 
65 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
66 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
67 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
68 
69 static struct linux_binfmt elf_format = {
70 	.module		= THIS_MODULE,
71 	.load_binary	= load_elf_binary,
72 	.load_shlib	= load_elf_library,
73 	.core_dump	= elf_core_dump,
74 	.min_coredump	= ELF_EXEC_PAGESIZE,
75 };
76 
77 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78 
79 static int set_brk(unsigned long start, unsigned long end)
80 {
81 	start = ELF_PAGEALIGN(start);
82 	end = ELF_PAGEALIGN(end);
83 	if (end > start) {
84 		unsigned long addr;
85 		addr = vm_brk(start, end - start);
86 		if (BAD_ADDR(addr))
87 			return addr;
88 	}
89 	current->mm->start_brk = current->mm->brk = end;
90 	return 0;
91 }
92 
93 /* We need to explicitly zero any fractional pages
94    after the data section (i.e. bss).  This would
95    contain the junk from the file that should not
96    be in memory
97  */
98 static int padzero(unsigned long elf_bss)
99 {
100 	unsigned long nbyte;
101 
102 	nbyte = ELF_PAGEOFFSET(elf_bss);
103 	if (nbyte) {
104 		nbyte = ELF_MIN_ALIGN - nbyte;
105 		if (clear_user((void __user *) elf_bss, nbyte))
106 			return -EFAULT;
107 	}
108 	return 0;
109 }
110 
111 /* Let's use some macros to make this stack manipulation a little clearer */
112 #ifdef CONFIG_STACK_GROWSUP
113 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
114 #define STACK_ROUND(sp, items) \
115 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
116 #define STACK_ALLOC(sp, len) ({ \
117 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
118 	old_sp; })
119 #else
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
121 #define STACK_ROUND(sp, items) \
122 	(((unsigned long) (sp - items)) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
124 #endif
125 
126 #ifndef ELF_BASE_PLATFORM
127 /*
128  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
129  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
130  * will be copied to the user stack in the same manner as AT_PLATFORM.
131  */
132 #define ELF_BASE_PLATFORM NULL
133 #endif
134 
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137 		unsigned long load_addr, unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	elf_addr_t __user *u_base_platform;
147 	elf_addr_t __user *u_rand_bytes;
148 	const char *k_platform = ELF_PLATFORM;
149 	const char *k_base_platform = ELF_BASE_PLATFORM;
150 	unsigned char k_rand_bytes[16];
151 	int items;
152 	elf_addr_t *elf_info;
153 	int ei_index = 0;
154 	const struct cred *cred = current_cred();
155 	struct vm_area_struct *vma;
156 
157 	/*
158 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
159 	 * evictions by the processes running on the same package. One
160 	 * thing we can do is to shuffle the initial stack for them.
161 	 */
162 
163 	p = arch_align_stack(p);
164 
165 	/*
166 	 * If this architecture has a platform capability string, copy it
167 	 * to userspace.  In some cases (Sparc), this info is impossible
168 	 * for userspace to get any other way, in others (i386) it is
169 	 * merely difficult.
170 	 */
171 	u_platform = NULL;
172 	if (k_platform) {
173 		size_t len = strlen(k_platform) + 1;
174 
175 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
176 		if (__copy_to_user(u_platform, k_platform, len))
177 			return -EFAULT;
178 	}
179 
180 	/*
181 	 * If this architecture has a "base" platform capability
182 	 * string, copy it to userspace.
183 	 */
184 	u_base_platform = NULL;
185 	if (k_base_platform) {
186 		size_t len = strlen(k_base_platform) + 1;
187 
188 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 		if (__copy_to_user(u_base_platform, k_base_platform, len))
190 			return -EFAULT;
191 	}
192 
193 	/*
194 	 * Generate 16 random bytes for userspace PRNG seeding.
195 	 */
196 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
197 	u_rand_bytes = (elf_addr_t __user *)
198 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
199 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
200 		return -EFAULT;
201 
202 	/* Create the ELF interpreter info */
203 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
204 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
205 #define NEW_AUX_ENT(id, val) \
206 	do { \
207 		elf_info[ei_index++] = id; \
208 		elf_info[ei_index++] = val; \
209 	} while (0)
210 
211 #ifdef ARCH_DLINFO
212 	/*
213 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
214 	 * AUXV.
215 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
216 	 * ARCH_DLINFO changes
217 	 */
218 	ARCH_DLINFO;
219 #endif
220 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
221 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
222 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
223 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
224 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
225 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
226 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
227 	NEW_AUX_ENT(AT_FLAGS, 0);
228 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
229 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
230 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
231 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
232 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
233  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
234 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
235 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
236 	if (k_platform) {
237 		NEW_AUX_ENT(AT_PLATFORM,
238 			    (elf_addr_t)(unsigned long)u_platform);
239 	}
240 	if (k_base_platform) {
241 		NEW_AUX_ENT(AT_BASE_PLATFORM,
242 			    (elf_addr_t)(unsigned long)u_base_platform);
243 	}
244 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
245 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
246 	}
247 #undef NEW_AUX_ENT
248 	/* AT_NULL is zero; clear the rest too */
249 	memset(&elf_info[ei_index], 0,
250 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
251 
252 	/* And advance past the AT_NULL entry.  */
253 	ei_index += 2;
254 
255 	sp = STACK_ADD(p, ei_index);
256 
257 	items = (argc + 1) + (envc + 1) + 1;
258 	bprm->p = STACK_ROUND(sp, items);
259 
260 	/* Point sp at the lowest address on the stack */
261 #ifdef CONFIG_STACK_GROWSUP
262 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
263 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
264 #else
265 	sp = (elf_addr_t __user *)bprm->p;
266 #endif
267 
268 
269 	/*
270 	 * Grow the stack manually; some architectures have a limit on how
271 	 * far ahead a user-space access may be in order to grow the stack.
272 	 */
273 	vma = find_extend_vma(current->mm, bprm->p);
274 	if (!vma)
275 		return -EFAULT;
276 
277 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
278 	if (__put_user(argc, sp++))
279 		return -EFAULT;
280 	argv = sp;
281 	envp = argv + argc + 1;
282 
283 	/* Populate argv and envp */
284 	p = current->mm->arg_end = current->mm->arg_start;
285 	while (argc-- > 0) {
286 		size_t len;
287 		if (__put_user((elf_addr_t)p, argv++))
288 			return -EFAULT;
289 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
290 		if (!len || len > MAX_ARG_STRLEN)
291 			return -EINVAL;
292 		p += len;
293 	}
294 	if (__put_user(0, argv))
295 		return -EFAULT;
296 	current->mm->arg_end = current->mm->env_start = p;
297 	while (envc-- > 0) {
298 		size_t len;
299 		if (__put_user((elf_addr_t)p, envp++))
300 			return -EFAULT;
301 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
302 		if (!len || len > MAX_ARG_STRLEN)
303 			return -EINVAL;
304 		p += len;
305 	}
306 	if (__put_user(0, envp))
307 		return -EFAULT;
308 	current->mm->env_end = p;
309 
310 	/* Put the elf_info on the stack in the right place.  */
311 	sp = (elf_addr_t __user *)envp + 1;
312 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
313 		return -EFAULT;
314 	return 0;
315 }
316 
317 static unsigned long elf_map(struct file *filep, unsigned long addr,
318 		struct elf_phdr *eppnt, int prot, int type,
319 		unsigned long total_size)
320 {
321 	unsigned long map_addr;
322 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
323 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
324 	addr = ELF_PAGESTART(addr);
325 	size = ELF_PAGEALIGN(size);
326 
327 	/* mmap() will return -EINVAL if given a zero size, but a
328 	 * segment with zero filesize is perfectly valid */
329 	if (!size)
330 		return addr;
331 
332 	/*
333 	* total_size is the size of the ELF (interpreter) image.
334 	* The _first_ mmap needs to know the full size, otherwise
335 	* randomization might put this image into an overlapping
336 	* position with the ELF binary image. (since size < total_size)
337 	* So we first map the 'big' image - and unmap the remainder at
338 	* the end. (which unmap is needed for ELF images with holes.)
339 	*/
340 	if (total_size) {
341 		total_size = ELF_PAGEALIGN(total_size);
342 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
343 		if (!BAD_ADDR(map_addr))
344 			vm_munmap(map_addr+size, total_size-size);
345 	} else
346 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
347 
348 	return(map_addr);
349 }
350 
351 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
352 {
353 	int i, first_idx = -1, last_idx = -1;
354 
355 	for (i = 0; i < nr; i++) {
356 		if (cmds[i].p_type == PT_LOAD) {
357 			last_idx = i;
358 			if (first_idx == -1)
359 				first_idx = i;
360 		}
361 	}
362 	if (first_idx == -1)
363 		return 0;
364 
365 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
366 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
367 }
368 
369 
370 /* This is much more generalized than the library routine read function,
371    so we keep this separate.  Technically the library read function
372    is only provided so that we can read a.out libraries that have
373    an ELF header */
374 
375 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
376 		struct file *interpreter, unsigned long *interp_map_addr,
377 		unsigned long no_base)
378 {
379 	struct elf_phdr *elf_phdata;
380 	struct elf_phdr *eppnt;
381 	unsigned long load_addr = 0;
382 	int load_addr_set = 0;
383 	unsigned long last_bss = 0, elf_bss = 0;
384 	unsigned long error = ~0UL;
385 	unsigned long total_size;
386 	int retval, i, size;
387 
388 	/* First of all, some simple consistency checks */
389 	if (interp_elf_ex->e_type != ET_EXEC &&
390 	    interp_elf_ex->e_type != ET_DYN)
391 		goto out;
392 	if (!elf_check_arch(interp_elf_ex))
393 		goto out;
394 	if (!interpreter->f_op || !interpreter->f_op->mmap)
395 		goto out;
396 
397 	/*
398 	 * If the size of this structure has changed, then punt, since
399 	 * we will be doing the wrong thing.
400 	 */
401 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
402 		goto out;
403 	if (interp_elf_ex->e_phnum < 1 ||
404 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
405 		goto out;
406 
407 	/* Now read in all of the header information */
408 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
409 	if (size > ELF_MIN_ALIGN)
410 		goto out;
411 	elf_phdata = kmalloc(size, GFP_KERNEL);
412 	if (!elf_phdata)
413 		goto out;
414 
415 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
416 			     (char *)elf_phdata, size);
417 	error = -EIO;
418 	if (retval != size) {
419 		if (retval < 0)
420 			error = retval;
421 		goto out_close;
422 	}
423 
424 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
425 	if (!total_size) {
426 		error = -EINVAL;
427 		goto out_close;
428 	}
429 
430 	eppnt = elf_phdata;
431 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
432 		if (eppnt->p_type == PT_LOAD) {
433 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
434 			int elf_prot = 0;
435 			unsigned long vaddr = 0;
436 			unsigned long k, map_addr;
437 
438 			if (eppnt->p_flags & PF_R)
439 		    		elf_prot = PROT_READ;
440 			if (eppnt->p_flags & PF_W)
441 				elf_prot |= PROT_WRITE;
442 			if (eppnt->p_flags & PF_X)
443 				elf_prot |= PROT_EXEC;
444 			vaddr = eppnt->p_vaddr;
445 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
446 				elf_type |= MAP_FIXED;
447 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
448 				load_addr = -vaddr;
449 
450 			map_addr = elf_map(interpreter, load_addr + vaddr,
451 					eppnt, elf_prot, elf_type, total_size);
452 			total_size = 0;
453 			if (!*interp_map_addr)
454 				*interp_map_addr = map_addr;
455 			error = map_addr;
456 			if (BAD_ADDR(map_addr))
457 				goto out_close;
458 
459 			if (!load_addr_set &&
460 			    interp_elf_ex->e_type == ET_DYN) {
461 				load_addr = map_addr - ELF_PAGESTART(vaddr);
462 				load_addr_set = 1;
463 			}
464 
465 			/*
466 			 * Check to see if the section's size will overflow the
467 			 * allowed task size. Note that p_filesz must always be
468 			 * <= p_memsize so it's only necessary to check p_memsz.
469 			 */
470 			k = load_addr + eppnt->p_vaddr;
471 			if (BAD_ADDR(k) ||
472 			    eppnt->p_filesz > eppnt->p_memsz ||
473 			    eppnt->p_memsz > TASK_SIZE ||
474 			    TASK_SIZE - eppnt->p_memsz < k) {
475 				error = -ENOMEM;
476 				goto out_close;
477 			}
478 
479 			/*
480 			 * Find the end of the file mapping for this phdr, and
481 			 * keep track of the largest address we see for this.
482 			 */
483 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
484 			if (k > elf_bss)
485 				elf_bss = k;
486 
487 			/*
488 			 * Do the same thing for the memory mapping - between
489 			 * elf_bss and last_bss is the bss section.
490 			 */
491 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
492 			if (k > last_bss)
493 				last_bss = k;
494 		}
495 	}
496 
497 	if (last_bss > elf_bss) {
498 		/*
499 		 * Now fill out the bss section.  First pad the last page up
500 		 * to the page boundary, and then perform a mmap to make sure
501 		 * that there are zero-mapped pages up to and including the
502 		 * last bss page.
503 		 */
504 		if (padzero(elf_bss)) {
505 			error = -EFAULT;
506 			goto out_close;
507 		}
508 
509 		/* What we have mapped so far */
510 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
511 
512 		/* Map the last of the bss segment */
513 		error = vm_brk(elf_bss, last_bss - elf_bss);
514 		if (BAD_ADDR(error))
515 			goto out_close;
516 	}
517 
518 	error = load_addr;
519 
520 out_close:
521 	kfree(elf_phdata);
522 out:
523 	return error;
524 }
525 
526 /*
527  * These are the functions used to load ELF style executables and shared
528  * libraries.  There is no binary dependent code anywhere else.
529  */
530 
531 #define INTERPRETER_NONE 0
532 #define INTERPRETER_ELF 2
533 
534 #ifndef STACK_RND_MASK
535 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
536 #endif
537 
538 static unsigned long randomize_stack_top(unsigned long stack_top)
539 {
540 	unsigned int random_variable = 0;
541 
542 	if ((current->flags & PF_RANDOMIZE) &&
543 		!(current->personality & ADDR_NO_RANDOMIZE)) {
544 		random_variable = get_random_int() & STACK_RND_MASK;
545 		random_variable <<= PAGE_SHIFT;
546 	}
547 #ifdef CONFIG_STACK_GROWSUP
548 	return PAGE_ALIGN(stack_top) + random_variable;
549 #else
550 	return PAGE_ALIGN(stack_top) - random_variable;
551 #endif
552 }
553 
554 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
555 {
556 	struct file *interpreter = NULL; /* to shut gcc up */
557  	unsigned long load_addr = 0, load_bias = 0;
558 	int load_addr_set = 0;
559 	char * elf_interpreter = NULL;
560 	unsigned long error;
561 	struct elf_phdr *elf_ppnt, *elf_phdata;
562 	unsigned long elf_bss, elf_brk;
563 	int retval, i;
564 	unsigned int size;
565 	unsigned long elf_entry;
566 	unsigned long interp_load_addr = 0;
567 	unsigned long start_code, end_code, start_data, end_data;
568 	unsigned long reloc_func_desc __maybe_unused = 0;
569 	int executable_stack = EXSTACK_DEFAULT;
570 	unsigned long def_flags = 0;
571 	struct {
572 		struct elfhdr elf_ex;
573 		struct elfhdr interp_elf_ex;
574 	} *loc;
575 
576 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
577 	if (!loc) {
578 		retval = -ENOMEM;
579 		goto out_ret;
580 	}
581 
582 	/* Get the exec-header */
583 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
584 
585 	retval = -ENOEXEC;
586 	/* First of all, some simple consistency checks */
587 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
588 		goto out;
589 
590 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
591 		goto out;
592 	if (!elf_check_arch(&loc->elf_ex))
593 		goto out;
594 	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
595 		goto out;
596 
597 	/* Now read in all of the header information */
598 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
599 		goto out;
600 	if (loc->elf_ex.e_phnum < 1 ||
601 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
602 		goto out;
603 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
604 	retval = -ENOMEM;
605 	elf_phdata = kmalloc(size, GFP_KERNEL);
606 	if (!elf_phdata)
607 		goto out;
608 
609 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
610 			     (char *)elf_phdata, size);
611 	if (retval != size) {
612 		if (retval >= 0)
613 			retval = -EIO;
614 		goto out_free_ph;
615 	}
616 
617 	elf_ppnt = elf_phdata;
618 	elf_bss = 0;
619 	elf_brk = 0;
620 
621 	start_code = ~0UL;
622 	end_code = 0;
623 	start_data = 0;
624 	end_data = 0;
625 
626 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
627 		if (elf_ppnt->p_type == PT_INTERP) {
628 			/* This is the program interpreter used for
629 			 * shared libraries - for now assume that this
630 			 * is an a.out format binary
631 			 */
632 			retval = -ENOEXEC;
633 			if (elf_ppnt->p_filesz > PATH_MAX ||
634 			    elf_ppnt->p_filesz < 2)
635 				goto out_free_ph;
636 
637 			retval = -ENOMEM;
638 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
639 						  GFP_KERNEL);
640 			if (!elf_interpreter)
641 				goto out_free_ph;
642 
643 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
644 					     elf_interpreter,
645 					     elf_ppnt->p_filesz);
646 			if (retval != elf_ppnt->p_filesz) {
647 				if (retval >= 0)
648 					retval = -EIO;
649 				goto out_free_interp;
650 			}
651 			/* make sure path is NULL terminated */
652 			retval = -ENOEXEC;
653 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
654 				goto out_free_interp;
655 
656 			interpreter = open_exec(elf_interpreter);
657 			retval = PTR_ERR(interpreter);
658 			if (IS_ERR(interpreter))
659 				goto out_free_interp;
660 
661 			/*
662 			 * If the binary is not readable then enforce
663 			 * mm->dumpable = 0 regardless of the interpreter's
664 			 * permissions.
665 			 */
666 			would_dump(bprm, interpreter);
667 
668 			retval = kernel_read(interpreter, 0, bprm->buf,
669 					     BINPRM_BUF_SIZE);
670 			if (retval != BINPRM_BUF_SIZE) {
671 				if (retval >= 0)
672 					retval = -EIO;
673 				goto out_free_dentry;
674 			}
675 
676 			/* Get the exec headers */
677 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
678 			break;
679 		}
680 		elf_ppnt++;
681 	}
682 
683 	elf_ppnt = elf_phdata;
684 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
685 		if (elf_ppnt->p_type == PT_GNU_STACK) {
686 			if (elf_ppnt->p_flags & PF_X)
687 				executable_stack = EXSTACK_ENABLE_X;
688 			else
689 				executable_stack = EXSTACK_DISABLE_X;
690 			break;
691 		}
692 
693 	/* Some simple consistency checks for the interpreter */
694 	if (elf_interpreter) {
695 		retval = -ELIBBAD;
696 		/* Not an ELF interpreter */
697 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698 			goto out_free_dentry;
699 		/* Verify the interpreter has a valid arch */
700 		if (!elf_check_arch(&loc->interp_elf_ex))
701 			goto out_free_dentry;
702 	}
703 
704 	/* Flush all traces of the currently running executable */
705 	retval = flush_old_exec(bprm);
706 	if (retval)
707 		goto out_free_dentry;
708 
709 	/* OK, This is the point of no return */
710 	current->mm->def_flags = def_flags;
711 
712 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
713 	   may depend on the personality.  */
714 	SET_PERSONALITY(loc->elf_ex);
715 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
716 		current->personality |= READ_IMPLIES_EXEC;
717 
718 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
719 		current->flags |= PF_RANDOMIZE;
720 
721 	setup_new_exec(bprm);
722 
723 	/* Do this so that we can load the interpreter, if need be.  We will
724 	   change some of these later */
725 	current->mm->free_area_cache = current->mm->mmap_base;
726 	current->mm->cached_hole_size = 0;
727 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
728 				 executable_stack);
729 	if (retval < 0) {
730 		send_sig(SIGKILL, current, 0);
731 		goto out_free_dentry;
732 	}
733 
734 	current->mm->start_stack = bprm->p;
735 
736 	/* Now we do a little grungy work by mmapping the ELF image into
737 	   the correct location in memory. */
738 	for(i = 0, elf_ppnt = elf_phdata;
739 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
740 		int elf_prot = 0, elf_flags;
741 		unsigned long k, vaddr;
742 
743 		if (elf_ppnt->p_type != PT_LOAD)
744 			continue;
745 
746 		if (unlikely (elf_brk > elf_bss)) {
747 			unsigned long nbyte;
748 
749 			/* There was a PT_LOAD segment with p_memsz > p_filesz
750 			   before this one. Map anonymous pages, if needed,
751 			   and clear the area.  */
752 			retval = set_brk(elf_bss + load_bias,
753 					 elf_brk + load_bias);
754 			if (retval) {
755 				send_sig(SIGKILL, current, 0);
756 				goto out_free_dentry;
757 			}
758 			nbyte = ELF_PAGEOFFSET(elf_bss);
759 			if (nbyte) {
760 				nbyte = ELF_MIN_ALIGN - nbyte;
761 				if (nbyte > elf_brk - elf_bss)
762 					nbyte = elf_brk - elf_bss;
763 				if (clear_user((void __user *)elf_bss +
764 							load_bias, nbyte)) {
765 					/*
766 					 * This bss-zeroing can fail if the ELF
767 					 * file specifies odd protections. So
768 					 * we don't check the return value
769 					 */
770 				}
771 			}
772 		}
773 
774 		if (elf_ppnt->p_flags & PF_R)
775 			elf_prot |= PROT_READ;
776 		if (elf_ppnt->p_flags & PF_W)
777 			elf_prot |= PROT_WRITE;
778 		if (elf_ppnt->p_flags & PF_X)
779 			elf_prot |= PROT_EXEC;
780 
781 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
782 
783 		vaddr = elf_ppnt->p_vaddr;
784 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
785 			elf_flags |= MAP_FIXED;
786 		} else if (loc->elf_ex.e_type == ET_DYN) {
787 			/* Try and get dynamic programs out of the way of the
788 			 * default mmap base, as well as whatever program they
789 			 * might try to exec.  This is because the brk will
790 			 * follow the loader, and is not movable.  */
791 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
792 			/* Memory randomization might have been switched off
793 			 * in runtime via sysctl.
794 			 * If that is the case, retain the original non-zero
795 			 * load_bias value in order to establish proper
796 			 * non-randomized mappings.
797 			 */
798 			if (current->flags & PF_RANDOMIZE)
799 				load_bias = 0;
800 			else
801 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 #else
803 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
804 #endif
805 		}
806 
807 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
808 				elf_prot, elf_flags, 0);
809 		if (BAD_ADDR(error)) {
810 			send_sig(SIGKILL, current, 0);
811 			retval = IS_ERR((void *)error) ?
812 				PTR_ERR((void*)error) : -EINVAL;
813 			goto out_free_dentry;
814 		}
815 
816 		if (!load_addr_set) {
817 			load_addr_set = 1;
818 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
819 			if (loc->elf_ex.e_type == ET_DYN) {
820 				load_bias += error -
821 				             ELF_PAGESTART(load_bias + vaddr);
822 				load_addr += load_bias;
823 				reloc_func_desc = load_bias;
824 			}
825 		}
826 		k = elf_ppnt->p_vaddr;
827 		if (k < start_code)
828 			start_code = k;
829 		if (start_data < k)
830 			start_data = k;
831 
832 		/*
833 		 * Check to see if the section's size will overflow the
834 		 * allowed task size. Note that p_filesz must always be
835 		 * <= p_memsz so it is only necessary to check p_memsz.
836 		 */
837 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
838 		    elf_ppnt->p_memsz > TASK_SIZE ||
839 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
840 			/* set_brk can never work. Avoid overflows. */
841 			send_sig(SIGKILL, current, 0);
842 			retval = -EINVAL;
843 			goto out_free_dentry;
844 		}
845 
846 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
847 
848 		if (k > elf_bss)
849 			elf_bss = k;
850 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
851 			end_code = k;
852 		if (end_data < k)
853 			end_data = k;
854 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
855 		if (k > elf_brk)
856 			elf_brk = k;
857 	}
858 
859 	loc->elf_ex.e_entry += load_bias;
860 	elf_bss += load_bias;
861 	elf_brk += load_bias;
862 	start_code += load_bias;
863 	end_code += load_bias;
864 	start_data += load_bias;
865 	end_data += load_bias;
866 
867 	/* Calling set_brk effectively mmaps the pages that we need
868 	 * for the bss and break sections.  We must do this before
869 	 * mapping in the interpreter, to make sure it doesn't wind
870 	 * up getting placed where the bss needs to go.
871 	 */
872 	retval = set_brk(elf_bss, elf_brk);
873 	if (retval) {
874 		send_sig(SIGKILL, current, 0);
875 		goto out_free_dentry;
876 	}
877 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
878 		send_sig(SIGSEGV, current, 0);
879 		retval = -EFAULT; /* Nobody gets to see this, but.. */
880 		goto out_free_dentry;
881 	}
882 
883 	if (elf_interpreter) {
884 		unsigned long uninitialized_var(interp_map_addr);
885 
886 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
887 					    interpreter,
888 					    &interp_map_addr,
889 					    load_bias);
890 		if (!IS_ERR((void *)elf_entry)) {
891 			/*
892 			 * load_elf_interp() returns relocation
893 			 * adjustment
894 			 */
895 			interp_load_addr = elf_entry;
896 			elf_entry += loc->interp_elf_ex.e_entry;
897 		}
898 		if (BAD_ADDR(elf_entry)) {
899 			force_sig(SIGSEGV, current);
900 			retval = IS_ERR((void *)elf_entry) ?
901 					(int)elf_entry : -EINVAL;
902 			goto out_free_dentry;
903 		}
904 		reloc_func_desc = interp_load_addr;
905 
906 		allow_write_access(interpreter);
907 		fput(interpreter);
908 		kfree(elf_interpreter);
909 	} else {
910 		elf_entry = loc->elf_ex.e_entry;
911 		if (BAD_ADDR(elf_entry)) {
912 			force_sig(SIGSEGV, current);
913 			retval = -EINVAL;
914 			goto out_free_dentry;
915 		}
916 	}
917 
918 	kfree(elf_phdata);
919 
920 	set_binfmt(&elf_format);
921 
922 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
923 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
924 	if (retval < 0) {
925 		send_sig(SIGKILL, current, 0);
926 		goto out;
927 	}
928 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929 
930 	install_exec_creds(bprm);
931 	retval = create_elf_tables(bprm, &loc->elf_ex,
932 			  load_addr, interp_load_addr);
933 	if (retval < 0) {
934 		send_sig(SIGKILL, current, 0);
935 		goto out;
936 	}
937 	/* N.B. passed_fileno might not be initialized? */
938 	current->mm->end_code = end_code;
939 	current->mm->start_code = start_code;
940 	current->mm->start_data = start_data;
941 	current->mm->end_data = end_data;
942 	current->mm->start_stack = bprm->p;
943 
944 #ifdef arch_randomize_brk
945 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
946 		current->mm->brk = current->mm->start_brk =
947 			arch_randomize_brk(current->mm);
948 #ifdef CONFIG_COMPAT_BRK
949 		current->brk_randomized = 1;
950 #endif
951 	}
952 #endif
953 
954 	if (current->personality & MMAP_PAGE_ZERO) {
955 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
956 		   and some applications "depend" upon this behavior.
957 		   Since we do not have the power to recompile these, we
958 		   emulate the SVr4 behavior. Sigh. */
959 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
960 				MAP_FIXED | MAP_PRIVATE, 0);
961 	}
962 
963 #ifdef ELF_PLAT_INIT
964 	/*
965 	 * The ABI may specify that certain registers be set up in special
966 	 * ways (on i386 %edx is the address of a DT_FINI function, for
967 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968 	 * that the e_entry field is the address of the function descriptor
969 	 * for the startup routine, rather than the address of the startup
970 	 * routine itself.  This macro performs whatever initialization to
971 	 * the regs structure is required as well as any relocations to the
972 	 * function descriptor entries when executing dynamically links apps.
973 	 */
974 	ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976 
977 	start_thread(regs, elf_entry, bprm->p);
978 	retval = 0;
979 out:
980 	kfree(loc);
981 out_ret:
982 	return retval;
983 
984 	/* error cleanup */
985 out_free_dentry:
986 	allow_write_access(interpreter);
987 	if (interpreter)
988 		fput(interpreter);
989 out_free_interp:
990 	kfree(elf_interpreter);
991 out_free_ph:
992 	kfree(elf_phdata);
993 	goto out;
994 }
995 
996 /* This is really simpleminded and specialized - we are loading an
997    a.out library that is given an ELF header. */
998 static int load_elf_library(struct file *file)
999 {
1000 	struct elf_phdr *elf_phdata;
1001 	struct elf_phdr *eppnt;
1002 	unsigned long elf_bss, bss, len;
1003 	int retval, error, i, j;
1004 	struct elfhdr elf_ex;
1005 
1006 	error = -ENOEXEC;
1007 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1008 	if (retval != sizeof(elf_ex))
1009 		goto out;
1010 
1011 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1012 		goto out;
1013 
1014 	/* First of all, some simple consistency checks */
1015 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1016 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1017 		goto out;
1018 
1019 	/* Now read in all of the header information */
1020 
1021 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1022 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1023 
1024 	error = -ENOMEM;
1025 	elf_phdata = kmalloc(j, GFP_KERNEL);
1026 	if (!elf_phdata)
1027 		goto out;
1028 
1029 	eppnt = elf_phdata;
1030 	error = -ENOEXEC;
1031 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1032 	if (retval != j)
1033 		goto out_free_ph;
1034 
1035 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1036 		if ((eppnt + i)->p_type == PT_LOAD)
1037 			j++;
1038 	if (j != 1)
1039 		goto out_free_ph;
1040 
1041 	while (eppnt->p_type != PT_LOAD)
1042 		eppnt++;
1043 
1044 	/* Now use mmap to map the library into memory. */
1045 	error = vm_mmap(file,
1046 			ELF_PAGESTART(eppnt->p_vaddr),
1047 			(eppnt->p_filesz +
1048 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1049 			PROT_READ | PROT_WRITE | PROT_EXEC,
1050 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1051 			(eppnt->p_offset -
1052 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1053 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1054 		goto out_free_ph;
1055 
1056 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1057 	if (padzero(elf_bss)) {
1058 		error = -EFAULT;
1059 		goto out_free_ph;
1060 	}
1061 
1062 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1063 			    ELF_MIN_ALIGN - 1);
1064 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1065 	if (bss > len)
1066 		vm_brk(len, bss - len);
1067 	error = 0;
1068 
1069 out_free_ph:
1070 	kfree(elf_phdata);
1071 out:
1072 	return error;
1073 }
1074 
1075 #ifdef CONFIG_ELF_CORE
1076 /*
1077  * ELF core dumper
1078  *
1079  * Modelled on fs/exec.c:aout_core_dump()
1080  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1081  */
1082 
1083 /*
1084  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1085  * that are useful for post-mortem analysis are included in every core dump.
1086  * In that way we ensure that the core dump is fully interpretable later
1087  * without matching up the same kernel and hardware config to see what PC values
1088  * meant. These special mappings include - vDSO, vsyscall, and other
1089  * architecture specific mappings
1090  */
1091 static bool always_dump_vma(struct vm_area_struct *vma)
1092 {
1093 	/* Any vsyscall mappings? */
1094 	if (vma == get_gate_vma(vma->vm_mm))
1095 		return true;
1096 	/*
1097 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1098 	 * such as vDSO sections.
1099 	 */
1100 	if (arch_vma_name(vma))
1101 		return true;
1102 
1103 	return false;
1104 }
1105 
1106 /*
1107  * Decide what to dump of a segment, part, all or none.
1108  */
1109 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1110 				   unsigned long mm_flags)
1111 {
1112 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1113 
1114 	/* always dump the vdso and vsyscall sections */
1115 	if (always_dump_vma(vma))
1116 		goto whole;
1117 
1118 	if (vma->vm_flags & VM_NODUMP)
1119 		return 0;
1120 
1121 	/* Hugetlb memory check */
1122 	if (vma->vm_flags & VM_HUGETLB) {
1123 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1124 			goto whole;
1125 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1126 			goto whole;
1127 	}
1128 
1129 	/* Do not dump I/O mapped devices or special mappings */
1130 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1131 		return 0;
1132 
1133 	/* By default, dump shared memory if mapped from an anonymous file. */
1134 	if (vma->vm_flags & VM_SHARED) {
1135 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1136 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1137 			goto whole;
1138 		return 0;
1139 	}
1140 
1141 	/* Dump segments that have been written to.  */
1142 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1143 		goto whole;
1144 	if (vma->vm_file == NULL)
1145 		return 0;
1146 
1147 	if (FILTER(MAPPED_PRIVATE))
1148 		goto whole;
1149 
1150 	/*
1151 	 * If this looks like the beginning of a DSO or executable mapping,
1152 	 * check for an ELF header.  If we find one, dump the first page to
1153 	 * aid in determining what was mapped here.
1154 	 */
1155 	if (FILTER(ELF_HEADERS) &&
1156 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1157 		u32 __user *header = (u32 __user *) vma->vm_start;
1158 		u32 word;
1159 		mm_segment_t fs = get_fs();
1160 		/*
1161 		 * Doing it this way gets the constant folded by GCC.
1162 		 */
1163 		union {
1164 			u32 cmp;
1165 			char elfmag[SELFMAG];
1166 		} magic;
1167 		BUILD_BUG_ON(SELFMAG != sizeof word);
1168 		magic.elfmag[EI_MAG0] = ELFMAG0;
1169 		magic.elfmag[EI_MAG1] = ELFMAG1;
1170 		magic.elfmag[EI_MAG2] = ELFMAG2;
1171 		magic.elfmag[EI_MAG3] = ELFMAG3;
1172 		/*
1173 		 * Switch to the user "segment" for get_user(),
1174 		 * then put back what elf_core_dump() had in place.
1175 		 */
1176 		set_fs(USER_DS);
1177 		if (unlikely(get_user(word, header)))
1178 			word = 0;
1179 		set_fs(fs);
1180 		if (word == magic.cmp)
1181 			return PAGE_SIZE;
1182 	}
1183 
1184 #undef	FILTER
1185 
1186 	return 0;
1187 
1188 whole:
1189 	return vma->vm_end - vma->vm_start;
1190 }
1191 
1192 /* An ELF note in memory */
1193 struct memelfnote
1194 {
1195 	const char *name;
1196 	int type;
1197 	unsigned int datasz;
1198 	void *data;
1199 };
1200 
1201 static int notesize(struct memelfnote *en)
1202 {
1203 	int sz;
1204 
1205 	sz = sizeof(struct elf_note);
1206 	sz += roundup(strlen(en->name) + 1, 4);
1207 	sz += roundup(en->datasz, 4);
1208 
1209 	return sz;
1210 }
1211 
1212 #define DUMP_WRITE(addr, nr, foffset)	\
1213 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1214 
1215 static int alignfile(struct file *file, loff_t *foffset)
1216 {
1217 	static const char buf[4] = { 0, };
1218 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1219 	return 1;
1220 }
1221 
1222 static int writenote(struct memelfnote *men, struct file *file,
1223 			loff_t *foffset)
1224 {
1225 	struct elf_note en;
1226 	en.n_namesz = strlen(men->name) + 1;
1227 	en.n_descsz = men->datasz;
1228 	en.n_type = men->type;
1229 
1230 	DUMP_WRITE(&en, sizeof(en), foffset);
1231 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1232 	if (!alignfile(file, foffset))
1233 		return 0;
1234 	DUMP_WRITE(men->data, men->datasz, foffset);
1235 	if (!alignfile(file, foffset))
1236 		return 0;
1237 
1238 	return 1;
1239 }
1240 #undef DUMP_WRITE
1241 
1242 static void fill_elf_header(struct elfhdr *elf, int segs,
1243 			    u16 machine, u32 flags, u8 osabi)
1244 {
1245 	memset(elf, 0, sizeof(*elf));
1246 
1247 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1248 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1249 	elf->e_ident[EI_DATA] = ELF_DATA;
1250 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1251 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1252 
1253 	elf->e_type = ET_CORE;
1254 	elf->e_machine = machine;
1255 	elf->e_version = EV_CURRENT;
1256 	elf->e_phoff = sizeof(struct elfhdr);
1257 	elf->e_flags = flags;
1258 	elf->e_ehsize = sizeof(struct elfhdr);
1259 	elf->e_phentsize = sizeof(struct elf_phdr);
1260 	elf->e_phnum = segs;
1261 
1262 	return;
1263 }
1264 
1265 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266 {
1267 	phdr->p_type = PT_NOTE;
1268 	phdr->p_offset = offset;
1269 	phdr->p_vaddr = 0;
1270 	phdr->p_paddr = 0;
1271 	phdr->p_filesz = sz;
1272 	phdr->p_memsz = 0;
1273 	phdr->p_flags = 0;
1274 	phdr->p_align = 0;
1275 	return;
1276 }
1277 
1278 static void fill_note(struct memelfnote *note, const char *name, int type,
1279 		unsigned int sz, void *data)
1280 {
1281 	note->name = name;
1282 	note->type = type;
1283 	note->datasz = sz;
1284 	note->data = data;
1285 	return;
1286 }
1287 
1288 /*
1289  * fill up all the fields in prstatus from the given task struct, except
1290  * registers which need to be filled up separately.
1291  */
1292 static void fill_prstatus(struct elf_prstatus *prstatus,
1293 		struct task_struct *p, long signr)
1294 {
1295 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1296 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1297 	prstatus->pr_sighold = p->blocked.sig[0];
1298 	rcu_read_lock();
1299 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1300 	rcu_read_unlock();
1301 	prstatus->pr_pid = task_pid_vnr(p);
1302 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1303 	prstatus->pr_sid = task_session_vnr(p);
1304 	if (thread_group_leader(p)) {
1305 		struct task_cputime cputime;
1306 
1307 		/*
1308 		 * This is the record for the group leader.  It shows the
1309 		 * group-wide total, not its individual thread total.
1310 		 */
1311 		thread_group_cputime(p, &cputime);
1312 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1313 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1314 	} else {
1315 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1316 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1317 	}
1318 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1319 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1320 }
1321 
1322 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1323 		       struct mm_struct *mm)
1324 {
1325 	const struct cred *cred;
1326 	unsigned int i, len;
1327 
1328 	/* first copy the parameters from user space */
1329 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1330 
1331 	len = mm->arg_end - mm->arg_start;
1332 	if (len >= ELF_PRARGSZ)
1333 		len = ELF_PRARGSZ-1;
1334 	if (copy_from_user(&psinfo->pr_psargs,
1335 		           (const char __user *)mm->arg_start, len))
1336 		return -EFAULT;
1337 	for(i = 0; i < len; i++)
1338 		if (psinfo->pr_psargs[i] == 0)
1339 			psinfo->pr_psargs[i] = ' ';
1340 	psinfo->pr_psargs[len] = 0;
1341 
1342 	rcu_read_lock();
1343 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1344 	rcu_read_unlock();
1345 	psinfo->pr_pid = task_pid_vnr(p);
1346 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1347 	psinfo->pr_sid = task_session_vnr(p);
1348 
1349 	i = p->state ? ffz(~p->state) + 1 : 0;
1350 	psinfo->pr_state = i;
1351 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1352 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1353 	psinfo->pr_nice = task_nice(p);
1354 	psinfo->pr_flag = p->flags;
1355 	rcu_read_lock();
1356 	cred = __task_cred(p);
1357 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1358 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1359 	rcu_read_unlock();
1360 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1361 
1362 	return 0;
1363 }
1364 
1365 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1366 {
1367 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1368 	int i = 0;
1369 	do
1370 		i += 2;
1371 	while (auxv[i - 2] != AT_NULL);
1372 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1373 }
1374 
1375 #ifdef CORE_DUMP_USE_REGSET
1376 #include <linux/regset.h>
1377 
1378 struct elf_thread_core_info {
1379 	struct elf_thread_core_info *next;
1380 	struct task_struct *task;
1381 	struct elf_prstatus prstatus;
1382 	struct memelfnote notes[0];
1383 };
1384 
1385 struct elf_note_info {
1386 	struct elf_thread_core_info *thread;
1387 	struct memelfnote psinfo;
1388 	struct memelfnote auxv;
1389 	size_t size;
1390 	int thread_notes;
1391 };
1392 
1393 /*
1394  * When a regset has a writeback hook, we call it on each thread before
1395  * dumping user memory.  On register window machines, this makes sure the
1396  * user memory backing the register data is up to date before we read it.
1397  */
1398 static void do_thread_regset_writeback(struct task_struct *task,
1399 				       const struct user_regset *regset)
1400 {
1401 	if (regset->writeback)
1402 		regset->writeback(task, regset, 1);
1403 }
1404 
1405 #ifndef PR_REG_SIZE
1406 #define PR_REG_SIZE(S) sizeof(S)
1407 #endif
1408 
1409 #ifndef PRSTATUS_SIZE
1410 #define PRSTATUS_SIZE(S) sizeof(S)
1411 #endif
1412 
1413 #ifndef PR_REG_PTR
1414 #define PR_REG_PTR(S) (&((S)->pr_reg))
1415 #endif
1416 
1417 #ifndef SET_PR_FPVALID
1418 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1419 #endif
1420 
1421 static int fill_thread_core_info(struct elf_thread_core_info *t,
1422 				 const struct user_regset_view *view,
1423 				 long signr, size_t *total)
1424 {
1425 	unsigned int i;
1426 
1427 	/*
1428 	 * NT_PRSTATUS is the one special case, because the regset data
1429 	 * goes into the pr_reg field inside the note contents, rather
1430 	 * than being the whole note contents.  We fill the reset in here.
1431 	 * We assume that regset 0 is NT_PRSTATUS.
1432 	 */
1433 	fill_prstatus(&t->prstatus, t->task, signr);
1434 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1435 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1436 				    PR_REG_PTR(&t->prstatus), NULL);
1437 
1438 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1439 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1440 	*total += notesize(&t->notes[0]);
1441 
1442 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1443 
1444 	/*
1445 	 * Each other regset might generate a note too.  For each regset
1446 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1447 	 * all zero and we'll know to skip writing it later.
1448 	 */
1449 	for (i = 1; i < view->n; ++i) {
1450 		const struct user_regset *regset = &view->regsets[i];
1451 		do_thread_regset_writeback(t->task, regset);
1452 		if (regset->core_note_type && regset->get &&
1453 		    (!regset->active || regset->active(t->task, regset))) {
1454 			int ret;
1455 			size_t size = regset->n * regset->size;
1456 			void *data = kmalloc(size, GFP_KERNEL);
1457 			if (unlikely(!data))
1458 				return 0;
1459 			ret = regset->get(t->task, regset,
1460 					  0, size, data, NULL);
1461 			if (unlikely(ret))
1462 				kfree(data);
1463 			else {
1464 				if (regset->core_note_type != NT_PRFPREG)
1465 					fill_note(&t->notes[i], "LINUX",
1466 						  regset->core_note_type,
1467 						  size, data);
1468 				else {
1469 					SET_PR_FPVALID(&t->prstatus, 1);
1470 					fill_note(&t->notes[i], "CORE",
1471 						  NT_PRFPREG, size, data);
1472 				}
1473 				*total += notesize(&t->notes[i]);
1474 			}
1475 		}
1476 	}
1477 
1478 	return 1;
1479 }
1480 
1481 static int fill_note_info(struct elfhdr *elf, int phdrs,
1482 			  struct elf_note_info *info,
1483 			  long signr, struct pt_regs *regs)
1484 {
1485 	struct task_struct *dump_task = current;
1486 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1487 	struct elf_thread_core_info *t;
1488 	struct elf_prpsinfo *psinfo;
1489 	struct core_thread *ct;
1490 	unsigned int i;
1491 
1492 	info->size = 0;
1493 	info->thread = NULL;
1494 
1495 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1496 	if (psinfo == NULL)
1497 		return 0;
1498 
1499 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1500 
1501 	/*
1502 	 * Figure out how many notes we're going to need for each thread.
1503 	 */
1504 	info->thread_notes = 0;
1505 	for (i = 0; i < view->n; ++i)
1506 		if (view->regsets[i].core_note_type != 0)
1507 			++info->thread_notes;
1508 
1509 	/*
1510 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1511 	 * since it is our one special case.
1512 	 */
1513 	if (unlikely(info->thread_notes == 0) ||
1514 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1515 		WARN_ON(1);
1516 		return 0;
1517 	}
1518 
1519 	/*
1520 	 * Initialize the ELF file header.
1521 	 */
1522 	fill_elf_header(elf, phdrs,
1523 			view->e_machine, view->e_flags, view->ei_osabi);
1524 
1525 	/*
1526 	 * Allocate a structure for each thread.
1527 	 */
1528 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1529 		t = kzalloc(offsetof(struct elf_thread_core_info,
1530 				     notes[info->thread_notes]),
1531 			    GFP_KERNEL);
1532 		if (unlikely(!t))
1533 			return 0;
1534 
1535 		t->task = ct->task;
1536 		if (ct->task == dump_task || !info->thread) {
1537 			t->next = info->thread;
1538 			info->thread = t;
1539 		} else {
1540 			/*
1541 			 * Make sure to keep the original task at
1542 			 * the head of the list.
1543 			 */
1544 			t->next = info->thread->next;
1545 			info->thread->next = t;
1546 		}
1547 	}
1548 
1549 	/*
1550 	 * Now fill in each thread's information.
1551 	 */
1552 	for (t = info->thread; t != NULL; t = t->next)
1553 		if (!fill_thread_core_info(t, view, signr, &info->size))
1554 			return 0;
1555 
1556 	/*
1557 	 * Fill in the two process-wide notes.
1558 	 */
1559 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560 	info->size += notesize(&info->psinfo);
1561 
1562 	fill_auxv_note(&info->auxv, current->mm);
1563 	info->size += notesize(&info->auxv);
1564 
1565 	return 1;
1566 }
1567 
1568 static size_t get_note_info_size(struct elf_note_info *info)
1569 {
1570 	return info->size;
1571 }
1572 
1573 /*
1574  * Write all the notes for each thread.  When writing the first thread, the
1575  * process-wide notes are interleaved after the first thread-specific note.
1576  */
1577 static int write_note_info(struct elf_note_info *info,
1578 			   struct file *file, loff_t *foffset)
1579 {
1580 	bool first = 1;
1581 	struct elf_thread_core_info *t = info->thread;
1582 
1583 	do {
1584 		int i;
1585 
1586 		if (!writenote(&t->notes[0], file, foffset))
1587 			return 0;
1588 
1589 		if (first && !writenote(&info->psinfo, file, foffset))
1590 			return 0;
1591 		if (first && !writenote(&info->auxv, file, foffset))
1592 			return 0;
1593 
1594 		for (i = 1; i < info->thread_notes; ++i)
1595 			if (t->notes[i].data &&
1596 			    !writenote(&t->notes[i], file, foffset))
1597 				return 0;
1598 
1599 		first = 0;
1600 		t = t->next;
1601 	} while (t);
1602 
1603 	return 1;
1604 }
1605 
1606 static void free_note_info(struct elf_note_info *info)
1607 {
1608 	struct elf_thread_core_info *threads = info->thread;
1609 	while (threads) {
1610 		unsigned int i;
1611 		struct elf_thread_core_info *t = threads;
1612 		threads = t->next;
1613 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1614 		for (i = 1; i < info->thread_notes; ++i)
1615 			kfree(t->notes[i].data);
1616 		kfree(t);
1617 	}
1618 	kfree(info->psinfo.data);
1619 }
1620 
1621 #else
1622 
1623 /* Here is the structure in which status of each thread is captured. */
1624 struct elf_thread_status
1625 {
1626 	struct list_head list;
1627 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1628 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1629 	struct task_struct *thread;
1630 #ifdef ELF_CORE_COPY_XFPREGS
1631 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1632 #endif
1633 	struct memelfnote notes[3];
1634 	int num_notes;
1635 };
1636 
1637 /*
1638  * In order to add the specific thread information for the elf file format,
1639  * we need to keep a linked list of every threads pr_status and then create
1640  * a single section for them in the final core file.
1641  */
1642 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1643 {
1644 	int sz = 0;
1645 	struct task_struct *p = t->thread;
1646 	t->num_notes = 0;
1647 
1648 	fill_prstatus(&t->prstatus, p, signr);
1649 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1650 
1651 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1652 		  &(t->prstatus));
1653 	t->num_notes++;
1654 	sz += notesize(&t->notes[0]);
1655 
1656 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657 								&t->fpu))) {
1658 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1659 			  &(t->fpu));
1660 		t->num_notes++;
1661 		sz += notesize(&t->notes[1]);
1662 	}
1663 
1664 #ifdef ELF_CORE_COPY_XFPREGS
1665 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1666 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1667 			  sizeof(t->xfpu), &t->xfpu);
1668 		t->num_notes++;
1669 		sz += notesize(&t->notes[2]);
1670 	}
1671 #endif
1672 	return sz;
1673 }
1674 
1675 struct elf_note_info {
1676 	struct memelfnote *notes;
1677 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1678 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1679 	struct list_head thread_list;
1680 	elf_fpregset_t *fpu;
1681 #ifdef ELF_CORE_COPY_XFPREGS
1682 	elf_fpxregset_t *xfpu;
1683 #endif
1684 	int thread_status_size;
1685 	int numnote;
1686 };
1687 
1688 static int elf_note_info_init(struct elf_note_info *info)
1689 {
1690 	memset(info, 0, sizeof(*info));
1691 	INIT_LIST_HEAD(&info->thread_list);
1692 
1693 	/* Allocate space for six ELF notes */
1694 	info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1695 	if (!info->notes)
1696 		return 0;
1697 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698 	if (!info->psinfo)
1699 		goto notes_free;
1700 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701 	if (!info->prstatus)
1702 		goto psinfo_free;
1703 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704 	if (!info->fpu)
1705 		goto prstatus_free;
1706 #ifdef ELF_CORE_COPY_XFPREGS
1707 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708 	if (!info->xfpu)
1709 		goto fpu_free;
1710 #endif
1711 	return 1;
1712 #ifdef ELF_CORE_COPY_XFPREGS
1713  fpu_free:
1714 	kfree(info->fpu);
1715 #endif
1716  prstatus_free:
1717 	kfree(info->prstatus);
1718  psinfo_free:
1719 	kfree(info->psinfo);
1720  notes_free:
1721 	kfree(info->notes);
1722 	return 0;
1723 }
1724 
1725 static int fill_note_info(struct elfhdr *elf, int phdrs,
1726 			  struct elf_note_info *info,
1727 			  long signr, struct pt_regs *regs)
1728 {
1729 	struct list_head *t;
1730 
1731 	if (!elf_note_info_init(info))
1732 		return 0;
1733 
1734 	if (signr) {
1735 		struct core_thread *ct;
1736 		struct elf_thread_status *ets;
1737 
1738 		for (ct = current->mm->core_state->dumper.next;
1739 						ct; ct = ct->next) {
1740 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1741 			if (!ets)
1742 				return 0;
1743 
1744 			ets->thread = ct->task;
1745 			list_add(&ets->list, &info->thread_list);
1746 		}
1747 
1748 		list_for_each(t, &info->thread_list) {
1749 			int sz;
1750 
1751 			ets = list_entry(t, struct elf_thread_status, list);
1752 			sz = elf_dump_thread_status(signr, ets);
1753 			info->thread_status_size += sz;
1754 		}
1755 	}
1756 	/* now collect the dump for the current */
1757 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1758 	fill_prstatus(info->prstatus, current, signr);
1759 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1760 
1761 	/* Set up header */
1762 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1763 
1764 	/*
1765 	 * Set up the notes in similar form to SVR4 core dumps made
1766 	 * with info from their /proc.
1767 	 */
1768 
1769 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1770 		  sizeof(*info->prstatus), info->prstatus);
1771 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1772 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1773 		  sizeof(*info->psinfo), info->psinfo);
1774 
1775 	info->numnote = 2;
1776 
1777 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1778 
1779 	/* Try to dump the FPU. */
1780 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1781 							       info->fpu);
1782 	if (info->prstatus->pr_fpvalid)
1783 		fill_note(info->notes + info->numnote++,
1784 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1785 #ifdef ELF_CORE_COPY_XFPREGS
1786 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1787 		fill_note(info->notes + info->numnote++,
1788 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1789 			  sizeof(*info->xfpu), info->xfpu);
1790 #endif
1791 
1792 	return 1;
1793 }
1794 
1795 static size_t get_note_info_size(struct elf_note_info *info)
1796 {
1797 	int sz = 0;
1798 	int i;
1799 
1800 	for (i = 0; i < info->numnote; i++)
1801 		sz += notesize(info->notes + i);
1802 
1803 	sz += info->thread_status_size;
1804 
1805 	return sz;
1806 }
1807 
1808 static int write_note_info(struct elf_note_info *info,
1809 			   struct file *file, loff_t *foffset)
1810 {
1811 	int i;
1812 	struct list_head *t;
1813 
1814 	for (i = 0; i < info->numnote; i++)
1815 		if (!writenote(info->notes + i, file, foffset))
1816 			return 0;
1817 
1818 	/* write out the thread status notes section */
1819 	list_for_each(t, &info->thread_list) {
1820 		struct elf_thread_status *tmp =
1821 				list_entry(t, struct elf_thread_status, list);
1822 
1823 		for (i = 0; i < tmp->num_notes; i++)
1824 			if (!writenote(&tmp->notes[i], file, foffset))
1825 				return 0;
1826 	}
1827 
1828 	return 1;
1829 }
1830 
1831 static void free_note_info(struct elf_note_info *info)
1832 {
1833 	while (!list_empty(&info->thread_list)) {
1834 		struct list_head *tmp = info->thread_list.next;
1835 		list_del(tmp);
1836 		kfree(list_entry(tmp, struct elf_thread_status, list));
1837 	}
1838 
1839 	kfree(info->prstatus);
1840 	kfree(info->psinfo);
1841 	kfree(info->notes);
1842 	kfree(info->fpu);
1843 #ifdef ELF_CORE_COPY_XFPREGS
1844 	kfree(info->xfpu);
1845 #endif
1846 }
1847 
1848 #endif
1849 
1850 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1851 					struct vm_area_struct *gate_vma)
1852 {
1853 	struct vm_area_struct *ret = tsk->mm->mmap;
1854 
1855 	if (ret)
1856 		return ret;
1857 	return gate_vma;
1858 }
1859 /*
1860  * Helper function for iterating across a vma list.  It ensures that the caller
1861  * will visit `gate_vma' prior to terminating the search.
1862  */
1863 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1864 					struct vm_area_struct *gate_vma)
1865 {
1866 	struct vm_area_struct *ret;
1867 
1868 	ret = this_vma->vm_next;
1869 	if (ret)
1870 		return ret;
1871 	if (this_vma == gate_vma)
1872 		return NULL;
1873 	return gate_vma;
1874 }
1875 
1876 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1877 			     elf_addr_t e_shoff, int segs)
1878 {
1879 	elf->e_shoff = e_shoff;
1880 	elf->e_shentsize = sizeof(*shdr4extnum);
1881 	elf->e_shnum = 1;
1882 	elf->e_shstrndx = SHN_UNDEF;
1883 
1884 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1885 
1886 	shdr4extnum->sh_type = SHT_NULL;
1887 	shdr4extnum->sh_size = elf->e_shnum;
1888 	shdr4extnum->sh_link = elf->e_shstrndx;
1889 	shdr4extnum->sh_info = segs;
1890 }
1891 
1892 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1893 				     unsigned long mm_flags)
1894 {
1895 	struct vm_area_struct *vma;
1896 	size_t size = 0;
1897 
1898 	for (vma = first_vma(current, gate_vma); vma != NULL;
1899 	     vma = next_vma(vma, gate_vma))
1900 		size += vma_dump_size(vma, mm_flags);
1901 	return size;
1902 }
1903 
1904 /*
1905  * Actual dumper
1906  *
1907  * This is a two-pass process; first we find the offsets of the bits,
1908  * and then they are actually written out.  If we run out of core limit
1909  * we just truncate.
1910  */
1911 static int elf_core_dump(struct coredump_params *cprm)
1912 {
1913 	int has_dumped = 0;
1914 	mm_segment_t fs;
1915 	int segs;
1916 	size_t size = 0;
1917 	struct vm_area_struct *vma, *gate_vma;
1918 	struct elfhdr *elf = NULL;
1919 	loff_t offset = 0, dataoff, foffset;
1920 	struct elf_note_info info;
1921 	struct elf_phdr *phdr4note = NULL;
1922 	struct elf_shdr *shdr4extnum = NULL;
1923 	Elf_Half e_phnum;
1924 	elf_addr_t e_shoff;
1925 
1926 	/*
1927 	 * We no longer stop all VM operations.
1928 	 *
1929 	 * This is because those proceses that could possibly change map_count
1930 	 * or the mmap / vma pages are now blocked in do_exit on current
1931 	 * finishing this core dump.
1932 	 *
1933 	 * Only ptrace can touch these memory addresses, but it doesn't change
1934 	 * the map_count or the pages allocated. So no possibility of crashing
1935 	 * exists while dumping the mm->vm_next areas to the core file.
1936 	 */
1937 
1938 	/* alloc memory for large data structures: too large to be on stack */
1939 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1940 	if (!elf)
1941 		goto out;
1942 	/*
1943 	 * The number of segs are recored into ELF header as 16bit value.
1944 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1945 	 */
1946 	segs = current->mm->map_count;
1947 	segs += elf_core_extra_phdrs();
1948 
1949 	gate_vma = get_gate_vma(current->mm);
1950 	if (gate_vma != NULL)
1951 		segs++;
1952 
1953 	/* for notes section */
1954 	segs++;
1955 
1956 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1957 	 * this, kernel supports extended numbering. Have a look at
1958 	 * include/linux/elf.h for further information. */
1959 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1960 
1961 	/*
1962 	 * Collect all the non-memory information about the process for the
1963 	 * notes.  This also sets up the file header.
1964 	 */
1965 	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1966 		goto cleanup;
1967 
1968 	has_dumped = 1;
1969 	current->flags |= PF_DUMPCORE;
1970 
1971 	fs = get_fs();
1972 	set_fs(KERNEL_DS);
1973 
1974 	offset += sizeof(*elf);				/* Elf header */
1975 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
1976 	foffset = offset;
1977 
1978 	/* Write notes phdr entry */
1979 	{
1980 		size_t sz = get_note_info_size(&info);
1981 
1982 		sz += elf_coredump_extra_notes_size();
1983 
1984 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1985 		if (!phdr4note)
1986 			goto end_coredump;
1987 
1988 		fill_elf_note_phdr(phdr4note, sz, offset);
1989 		offset += sz;
1990 	}
1991 
1992 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1993 
1994 	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1995 	offset += elf_core_extra_data_size();
1996 	e_shoff = offset;
1997 
1998 	if (e_phnum == PN_XNUM) {
1999 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2000 		if (!shdr4extnum)
2001 			goto end_coredump;
2002 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2003 	}
2004 
2005 	offset = dataoff;
2006 
2007 	size += sizeof(*elf);
2008 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2009 		goto end_coredump;
2010 
2011 	size += sizeof(*phdr4note);
2012 	if (size > cprm->limit
2013 	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2014 		goto end_coredump;
2015 
2016 	/* Write program headers for segments dump */
2017 	for (vma = first_vma(current, gate_vma); vma != NULL;
2018 			vma = next_vma(vma, gate_vma)) {
2019 		struct elf_phdr phdr;
2020 
2021 		phdr.p_type = PT_LOAD;
2022 		phdr.p_offset = offset;
2023 		phdr.p_vaddr = vma->vm_start;
2024 		phdr.p_paddr = 0;
2025 		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2026 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2027 		offset += phdr.p_filesz;
2028 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2029 		if (vma->vm_flags & VM_WRITE)
2030 			phdr.p_flags |= PF_W;
2031 		if (vma->vm_flags & VM_EXEC)
2032 			phdr.p_flags |= PF_X;
2033 		phdr.p_align = ELF_EXEC_PAGESIZE;
2034 
2035 		size += sizeof(phdr);
2036 		if (size > cprm->limit
2037 		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2038 			goto end_coredump;
2039 	}
2040 
2041 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2042 		goto end_coredump;
2043 
2044  	/* write out the notes section */
2045 	if (!write_note_info(&info, cprm->file, &foffset))
2046 		goto end_coredump;
2047 
2048 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2049 		goto end_coredump;
2050 
2051 	/* Align to page */
2052 	if (!dump_seek(cprm->file, dataoff - foffset))
2053 		goto end_coredump;
2054 
2055 	for (vma = first_vma(current, gate_vma); vma != NULL;
2056 			vma = next_vma(vma, gate_vma)) {
2057 		unsigned long addr;
2058 		unsigned long end;
2059 
2060 		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2061 
2062 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2063 			struct page *page;
2064 			int stop;
2065 
2066 			page = get_dump_page(addr);
2067 			if (page) {
2068 				void *kaddr = kmap(page);
2069 				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2070 					!dump_write(cprm->file, kaddr,
2071 						    PAGE_SIZE);
2072 				kunmap(page);
2073 				page_cache_release(page);
2074 			} else
2075 				stop = !dump_seek(cprm->file, PAGE_SIZE);
2076 			if (stop)
2077 				goto end_coredump;
2078 		}
2079 	}
2080 
2081 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2082 		goto end_coredump;
2083 
2084 	if (e_phnum == PN_XNUM) {
2085 		size += sizeof(*shdr4extnum);
2086 		if (size > cprm->limit
2087 		    || !dump_write(cprm->file, shdr4extnum,
2088 				   sizeof(*shdr4extnum)))
2089 			goto end_coredump;
2090 	}
2091 
2092 end_coredump:
2093 	set_fs(fs);
2094 
2095 cleanup:
2096 	free_note_info(&info);
2097 	kfree(shdr4extnum);
2098 	kfree(phdr4note);
2099 	kfree(elf);
2100 out:
2101 	return has_dumped;
2102 }
2103 
2104 #endif		/* CONFIG_ELF_CORE */
2105 
2106 static int __init init_elf_binfmt(void)
2107 {
2108 	register_binfmt(&elf_format);
2109 	return 0;
2110 }
2111 
2112 static void __exit exit_elf_binfmt(void)
2113 {
2114 	/* Remove the COFF and ELF loaders. */
2115 	unregister_binfmt(&elf_format);
2116 }
2117 
2118 core_initcall(init_elf_binfmt);
2119 module_exit(exit_elf_binfmt);
2120 MODULE_LICENSE("GPL");
2121