xref: /openbmc/linux/fs/binfmt_elf.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	const char *k_platform = ELF_PLATFORM;
156 	const char *k_base_platform = ELF_BASE_PLATFORM;
157 	int items;
158 	elf_addr_t *elf_info;
159 	int ei_index = 0;
160 	struct task_struct *tsk = current;
161 	struct vm_area_struct *vma;
162 
163 	/*
164 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 	 * evictions by the processes running on the same package. One
166 	 * thing we can do is to shuffle the initial stack for them.
167 	 */
168 
169 	p = arch_align_stack(p);
170 
171 	/*
172 	 * If this architecture has a platform capability string, copy it
173 	 * to userspace.  In some cases (Sparc), this info is impossible
174 	 * for userspace to get any other way, in others (i386) it is
175 	 * merely difficult.
176 	 */
177 	u_platform = NULL;
178 	if (k_platform) {
179 		size_t len = strlen(k_platform) + 1;
180 
181 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
182 		if (__copy_to_user(u_platform, k_platform, len))
183 			return -EFAULT;
184 	}
185 
186 	/*
187 	 * If this architecture has a "base" platform capability
188 	 * string, copy it to userspace.
189 	 */
190 	u_base_platform = NULL;
191 	if (k_base_platform) {
192 		size_t len = strlen(k_base_platform) + 1;
193 
194 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 		if (__copy_to_user(u_base_platform, k_base_platform, len))
196 			return -EFAULT;
197 	}
198 
199 	/* Create the ELF interpreter info */
200 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
201 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
202 #define NEW_AUX_ENT(id, val) \
203 	do { \
204 		elf_info[ei_index++] = id; \
205 		elf_info[ei_index++] = val; \
206 	} while (0)
207 
208 #ifdef ARCH_DLINFO
209 	/*
210 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
211 	 * AUXV.
212 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
213 	 * ARCH_DLINFO changes
214 	 */
215 	ARCH_DLINFO;
216 #endif
217 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
218 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
219 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
220 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
221 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
222 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
223 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
224 	NEW_AUX_ENT(AT_FLAGS, 0);
225 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
226 	NEW_AUX_ENT(AT_UID, tsk->uid);
227 	NEW_AUX_ENT(AT_EUID, tsk->euid);
228 	NEW_AUX_ENT(AT_GID, tsk->gid);
229 	NEW_AUX_ENT(AT_EGID, tsk->egid);
230  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
232 	if (k_platform) {
233 		NEW_AUX_ENT(AT_PLATFORM,
234 			    (elf_addr_t)(unsigned long)u_platform);
235 	}
236 	if (k_base_platform) {
237 		NEW_AUX_ENT(AT_BASE_PLATFORM,
238 			    (elf_addr_t)(unsigned long)u_base_platform);
239 	}
240 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
241 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
242 	}
243 #undef NEW_AUX_ENT
244 	/* AT_NULL is zero; clear the rest too */
245 	memset(&elf_info[ei_index], 0,
246 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
247 
248 	/* And advance past the AT_NULL entry.  */
249 	ei_index += 2;
250 
251 	sp = STACK_ADD(p, ei_index);
252 
253 	items = (argc + 1) + (envc + 1) + 1;
254 	bprm->p = STACK_ROUND(sp, items);
255 
256 	/* Point sp at the lowest address on the stack */
257 #ifdef CONFIG_STACK_GROWSUP
258 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
259 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
260 #else
261 	sp = (elf_addr_t __user *)bprm->p;
262 #endif
263 
264 
265 	/*
266 	 * Grow the stack manually; some architectures have a limit on how
267 	 * far ahead a user-space access may be in order to grow the stack.
268 	 */
269 	vma = find_extend_vma(current->mm, bprm->p);
270 	if (!vma)
271 		return -EFAULT;
272 
273 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
274 	if (__put_user(argc, sp++))
275 		return -EFAULT;
276 	argv = sp;
277 	envp = argv + argc + 1;
278 
279 	/* Populate argv and envp */
280 	p = current->mm->arg_end = current->mm->arg_start;
281 	while (argc-- > 0) {
282 		size_t len;
283 		if (__put_user((elf_addr_t)p, argv++))
284 			return -EFAULT;
285 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
286 		if (!len || len > MAX_ARG_STRLEN)
287 			return -EINVAL;
288 		p += len;
289 	}
290 	if (__put_user(0, argv))
291 		return -EFAULT;
292 	current->mm->arg_end = current->mm->env_start = p;
293 	while (envc-- > 0) {
294 		size_t len;
295 		if (__put_user((elf_addr_t)p, envp++))
296 			return -EFAULT;
297 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 		if (!len || len > MAX_ARG_STRLEN)
299 			return -EINVAL;
300 		p += len;
301 	}
302 	if (__put_user(0, envp))
303 		return -EFAULT;
304 	current->mm->env_end = p;
305 
306 	/* Put the elf_info on the stack in the right place.  */
307 	sp = (elf_addr_t __user *)envp + 1;
308 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
309 		return -EFAULT;
310 	return 0;
311 }
312 
313 #ifndef elf_map
314 
315 static unsigned long elf_map(struct file *filep, unsigned long addr,
316 		struct elf_phdr *eppnt, int prot, int type,
317 		unsigned long total_size)
318 {
319 	unsigned long map_addr;
320 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
321 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
322 	addr = ELF_PAGESTART(addr);
323 	size = ELF_PAGEALIGN(size);
324 
325 	/* mmap() will return -EINVAL if given a zero size, but a
326 	 * segment with zero filesize is perfectly valid */
327 	if (!size)
328 		return addr;
329 
330 	down_write(&current->mm->mmap_sem);
331 	/*
332 	* total_size is the size of the ELF (interpreter) image.
333 	* The _first_ mmap needs to know the full size, otherwise
334 	* randomization might put this image into an overlapping
335 	* position with the ELF binary image. (since size < total_size)
336 	* So we first map the 'big' image - and unmap the remainder at
337 	* the end. (which unmap is needed for ELF images with holes.)
338 	*/
339 	if (total_size) {
340 		total_size = ELF_PAGEALIGN(total_size);
341 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
342 		if (!BAD_ADDR(map_addr))
343 			do_munmap(current->mm, map_addr+size, total_size-size);
344 	} else
345 		map_addr = do_mmap(filep, addr, size, prot, type, off);
346 
347 	up_write(&current->mm->mmap_sem);
348 	return(map_addr);
349 }
350 
351 #endif /* !elf_map */
352 
353 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
354 {
355 	int i, first_idx = -1, last_idx = -1;
356 
357 	for (i = 0; i < nr; i++) {
358 		if (cmds[i].p_type == PT_LOAD) {
359 			last_idx = i;
360 			if (first_idx == -1)
361 				first_idx = i;
362 		}
363 	}
364 	if (first_idx == -1)
365 		return 0;
366 
367 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
368 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 }
370 
371 
372 /* This is much more generalized than the library routine read function,
373    so we keep this separate.  Technically the library read function
374    is only provided so that we can read a.out libraries that have
375    an ELF header */
376 
377 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
378 		struct file *interpreter, unsigned long *interp_map_addr,
379 		unsigned long no_base)
380 {
381 	struct elf_phdr *elf_phdata;
382 	struct elf_phdr *eppnt;
383 	unsigned long load_addr = 0;
384 	int load_addr_set = 0;
385 	unsigned long last_bss = 0, elf_bss = 0;
386 	unsigned long error = ~0UL;
387 	unsigned long total_size;
388 	int retval, i, size;
389 
390 	/* First of all, some simple consistency checks */
391 	if (interp_elf_ex->e_type != ET_EXEC &&
392 	    interp_elf_ex->e_type != ET_DYN)
393 		goto out;
394 	if (!elf_check_arch(interp_elf_ex))
395 		goto out;
396 	if (!interpreter->f_op || !interpreter->f_op->mmap)
397 		goto out;
398 
399 	/*
400 	 * If the size of this structure has changed, then punt, since
401 	 * we will be doing the wrong thing.
402 	 */
403 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
404 		goto out;
405 	if (interp_elf_ex->e_phnum < 1 ||
406 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
407 		goto out;
408 
409 	/* Now read in all of the header information */
410 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
411 	if (size > ELF_MIN_ALIGN)
412 		goto out;
413 	elf_phdata = kmalloc(size, GFP_KERNEL);
414 	if (!elf_phdata)
415 		goto out;
416 
417 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
418 			     (char *)elf_phdata,size);
419 	error = -EIO;
420 	if (retval != size) {
421 		if (retval < 0)
422 			error = retval;
423 		goto out_close;
424 	}
425 
426 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
427 	if (!total_size) {
428 		error = -EINVAL;
429 		goto out_close;
430 	}
431 
432 	eppnt = elf_phdata;
433 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
434 		if (eppnt->p_type == PT_LOAD) {
435 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
436 			int elf_prot = 0;
437 			unsigned long vaddr = 0;
438 			unsigned long k, map_addr;
439 
440 			if (eppnt->p_flags & PF_R)
441 		    		elf_prot = PROT_READ;
442 			if (eppnt->p_flags & PF_W)
443 				elf_prot |= PROT_WRITE;
444 			if (eppnt->p_flags & PF_X)
445 				elf_prot |= PROT_EXEC;
446 			vaddr = eppnt->p_vaddr;
447 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
448 				elf_type |= MAP_FIXED;
449 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
450 				load_addr = -vaddr;
451 
452 			map_addr = elf_map(interpreter, load_addr + vaddr,
453 					eppnt, elf_prot, elf_type, total_size);
454 			total_size = 0;
455 			if (!*interp_map_addr)
456 				*interp_map_addr = map_addr;
457 			error = map_addr;
458 			if (BAD_ADDR(map_addr))
459 				goto out_close;
460 
461 			if (!load_addr_set &&
462 			    interp_elf_ex->e_type == ET_DYN) {
463 				load_addr = map_addr - ELF_PAGESTART(vaddr);
464 				load_addr_set = 1;
465 			}
466 
467 			/*
468 			 * Check to see if the section's size will overflow the
469 			 * allowed task size. Note that p_filesz must always be
470 			 * <= p_memsize so it's only necessary to check p_memsz.
471 			 */
472 			k = load_addr + eppnt->p_vaddr;
473 			if (BAD_ADDR(k) ||
474 			    eppnt->p_filesz > eppnt->p_memsz ||
475 			    eppnt->p_memsz > TASK_SIZE ||
476 			    TASK_SIZE - eppnt->p_memsz < k) {
477 				error = -ENOMEM;
478 				goto out_close;
479 			}
480 
481 			/*
482 			 * Find the end of the file mapping for this phdr, and
483 			 * keep track of the largest address we see for this.
484 			 */
485 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
486 			if (k > elf_bss)
487 				elf_bss = k;
488 
489 			/*
490 			 * Do the same thing for the memory mapping - between
491 			 * elf_bss and last_bss is the bss section.
492 			 */
493 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
494 			if (k > last_bss)
495 				last_bss = k;
496 		}
497 	}
498 
499 	/*
500 	 * Now fill out the bss section.  First pad the last page up
501 	 * to the page boundary, and then perform a mmap to make sure
502 	 * that there are zero-mapped pages up to and including the
503 	 * last bss page.
504 	 */
505 	if (padzero(elf_bss)) {
506 		error = -EFAULT;
507 		goto out_close;
508 	}
509 
510 	/* What we have mapped so far */
511 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
512 
513 	/* Map the last of the bss segment */
514 	if (last_bss > elf_bss) {
515 		down_write(&current->mm->mmap_sem);
516 		error = do_brk(elf_bss, last_bss - elf_bss);
517 		up_write(&current->mm->mmap_sem);
518 		if (BAD_ADDR(error))
519 			goto out_close;
520 	}
521 
522 	error = load_addr;
523 
524 out_close:
525 	kfree(elf_phdata);
526 out:
527 	return error;
528 }
529 
530 /*
531  * These are the functions used to load ELF style executables and shared
532  * libraries.  There is no binary dependent code anywhere else.
533  */
534 
535 #define INTERPRETER_NONE 0
536 #define INTERPRETER_ELF 2
537 
538 #ifndef STACK_RND_MASK
539 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
540 #endif
541 
542 static unsigned long randomize_stack_top(unsigned long stack_top)
543 {
544 	unsigned int random_variable = 0;
545 
546 	if ((current->flags & PF_RANDOMIZE) &&
547 		!(current->personality & ADDR_NO_RANDOMIZE)) {
548 		random_variable = get_random_int() & STACK_RND_MASK;
549 		random_variable <<= PAGE_SHIFT;
550 	}
551 #ifdef CONFIG_STACK_GROWSUP
552 	return PAGE_ALIGN(stack_top) + random_variable;
553 #else
554 	return PAGE_ALIGN(stack_top) - random_variable;
555 #endif
556 }
557 
558 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
559 {
560 	struct file *interpreter = NULL; /* to shut gcc up */
561  	unsigned long load_addr = 0, load_bias = 0;
562 	int load_addr_set = 0;
563 	char * elf_interpreter = NULL;
564 	unsigned long error;
565 	struct elf_phdr *elf_ppnt, *elf_phdata;
566 	unsigned long elf_bss, elf_brk;
567 	int elf_exec_fileno;
568 	int retval, i;
569 	unsigned int size;
570 	unsigned long elf_entry;
571 	unsigned long interp_load_addr = 0;
572 	unsigned long start_code, end_code, start_data, end_data;
573 	unsigned long reloc_func_desc = 0;
574 	int executable_stack = EXSTACK_DEFAULT;
575 	unsigned long def_flags = 0;
576 	struct {
577 		struct elfhdr elf_ex;
578 		struct elfhdr interp_elf_ex;
579 	} *loc;
580 
581 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582 	if (!loc) {
583 		retval = -ENOMEM;
584 		goto out_ret;
585 	}
586 
587 	/* Get the exec-header */
588 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
589 
590 	retval = -ENOEXEC;
591 	/* First of all, some simple consistency checks */
592 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593 		goto out;
594 
595 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596 		goto out;
597 	if (!elf_check_arch(&loc->elf_ex))
598 		goto out;
599 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
600 		goto out;
601 
602 	/* Now read in all of the header information */
603 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604 		goto out;
605 	if (loc->elf_ex.e_phnum < 1 ||
606 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607 		goto out;
608 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609 	retval = -ENOMEM;
610 	elf_phdata = kmalloc(size, GFP_KERNEL);
611 	if (!elf_phdata)
612 		goto out;
613 
614 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615 			     (char *)elf_phdata, size);
616 	if (retval != size) {
617 		if (retval >= 0)
618 			retval = -EIO;
619 		goto out_free_ph;
620 	}
621 
622 	retval = get_unused_fd();
623 	if (retval < 0)
624 		goto out_free_ph;
625 	get_file(bprm->file);
626 	fd_install(elf_exec_fileno = retval, bprm->file);
627 
628 	elf_ppnt = elf_phdata;
629 	elf_bss = 0;
630 	elf_brk = 0;
631 
632 	start_code = ~0UL;
633 	end_code = 0;
634 	start_data = 0;
635 	end_data = 0;
636 
637 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
638 		if (elf_ppnt->p_type == PT_INTERP) {
639 			/* This is the program interpreter used for
640 			 * shared libraries - for now assume that this
641 			 * is an a.out format binary
642 			 */
643 			retval = -ENOEXEC;
644 			if (elf_ppnt->p_filesz > PATH_MAX ||
645 			    elf_ppnt->p_filesz < 2)
646 				goto out_free_file;
647 
648 			retval = -ENOMEM;
649 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
650 						  GFP_KERNEL);
651 			if (!elf_interpreter)
652 				goto out_free_file;
653 
654 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
655 					     elf_interpreter,
656 					     elf_ppnt->p_filesz);
657 			if (retval != elf_ppnt->p_filesz) {
658 				if (retval >= 0)
659 					retval = -EIO;
660 				goto out_free_interp;
661 			}
662 			/* make sure path is NULL terminated */
663 			retval = -ENOEXEC;
664 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
665 				goto out_free_interp;
666 
667 			/*
668 			 * The early SET_PERSONALITY here is so that the lookup
669 			 * for the interpreter happens in the namespace of the
670 			 * to-be-execed image.  SET_PERSONALITY can select an
671 			 * alternate root.
672 			 *
673 			 * However, SET_PERSONALITY is NOT allowed to switch
674 			 * this task into the new images's memory mapping
675 			 * policy - that is, TASK_SIZE must still evaluate to
676 			 * that which is appropriate to the execing application.
677 			 * This is because exit_mmap() needs to have TASK_SIZE
678 			 * evaluate to the size of the old image.
679 			 *
680 			 * So if (say) a 64-bit application is execing a 32-bit
681 			 * application it is the architecture's responsibility
682 			 * to defer changing the value of TASK_SIZE until the
683 			 * switch really is going to happen - do this in
684 			 * flush_thread().	- akpm
685 			 */
686 			SET_PERSONALITY(loc->elf_ex);
687 
688 			interpreter = open_exec(elf_interpreter);
689 			retval = PTR_ERR(interpreter);
690 			if (IS_ERR(interpreter))
691 				goto out_free_interp;
692 
693 			/*
694 			 * If the binary is not readable then enforce
695 			 * mm->dumpable = 0 regardless of the interpreter's
696 			 * permissions.
697 			 */
698 			if (file_permission(interpreter, MAY_READ) < 0)
699 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
700 
701 			retval = kernel_read(interpreter, 0, bprm->buf,
702 					     BINPRM_BUF_SIZE);
703 			if (retval != BINPRM_BUF_SIZE) {
704 				if (retval >= 0)
705 					retval = -EIO;
706 				goto out_free_dentry;
707 			}
708 
709 			/* Get the exec headers */
710 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
711 			break;
712 		}
713 		elf_ppnt++;
714 	}
715 
716 	elf_ppnt = elf_phdata;
717 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
718 		if (elf_ppnt->p_type == PT_GNU_STACK) {
719 			if (elf_ppnt->p_flags & PF_X)
720 				executable_stack = EXSTACK_ENABLE_X;
721 			else
722 				executable_stack = EXSTACK_DISABLE_X;
723 			break;
724 		}
725 
726 	/* Some simple consistency checks for the interpreter */
727 	if (elf_interpreter) {
728 		retval = -ELIBBAD;
729 		/* Not an ELF interpreter */
730 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
731 			goto out_free_dentry;
732 		/* Verify the interpreter has a valid arch */
733 		if (!elf_check_arch(&loc->interp_elf_ex))
734 			goto out_free_dentry;
735 	} else {
736 		/* Executables without an interpreter also need a personality  */
737 		SET_PERSONALITY(loc->elf_ex);
738 	}
739 
740 	/* Flush all traces of the currently running executable */
741 	retval = flush_old_exec(bprm);
742 	if (retval)
743 		goto out_free_dentry;
744 
745 	/* OK, This is the point of no return */
746 	current->flags &= ~PF_FORKNOEXEC;
747 	current->mm->def_flags = def_flags;
748 
749 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
750 	   may depend on the personality.  */
751 	SET_PERSONALITY(loc->elf_ex);
752 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
753 		current->personality |= READ_IMPLIES_EXEC;
754 
755 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
756 		current->flags |= PF_RANDOMIZE;
757 	arch_pick_mmap_layout(current->mm);
758 
759 	/* Do this so that we can load the interpreter, if need be.  We will
760 	   change some of these later */
761 	current->mm->free_area_cache = current->mm->mmap_base;
762 	current->mm->cached_hole_size = 0;
763 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
764 				 executable_stack);
765 	if (retval < 0) {
766 		send_sig(SIGKILL, current, 0);
767 		goto out_free_dentry;
768 	}
769 
770 	current->mm->start_stack = bprm->p;
771 
772 	/* Now we do a little grungy work by mmaping the ELF image into
773 	   the correct location in memory. */
774 	for(i = 0, elf_ppnt = elf_phdata;
775 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
776 		int elf_prot = 0, elf_flags;
777 		unsigned long k, vaddr;
778 
779 		if (elf_ppnt->p_type != PT_LOAD)
780 			continue;
781 
782 		if (unlikely (elf_brk > elf_bss)) {
783 			unsigned long nbyte;
784 
785 			/* There was a PT_LOAD segment with p_memsz > p_filesz
786 			   before this one. Map anonymous pages, if needed,
787 			   and clear the area.  */
788 			retval = set_brk (elf_bss + load_bias,
789 					  elf_brk + load_bias);
790 			if (retval) {
791 				send_sig(SIGKILL, current, 0);
792 				goto out_free_dentry;
793 			}
794 			nbyte = ELF_PAGEOFFSET(elf_bss);
795 			if (nbyte) {
796 				nbyte = ELF_MIN_ALIGN - nbyte;
797 				if (nbyte > elf_brk - elf_bss)
798 					nbyte = elf_brk - elf_bss;
799 				if (clear_user((void __user *)elf_bss +
800 							load_bias, nbyte)) {
801 					/*
802 					 * This bss-zeroing can fail if the ELF
803 					 * file specifies odd protections. So
804 					 * we don't check the return value
805 					 */
806 				}
807 			}
808 		}
809 
810 		if (elf_ppnt->p_flags & PF_R)
811 			elf_prot |= PROT_READ;
812 		if (elf_ppnt->p_flags & PF_W)
813 			elf_prot |= PROT_WRITE;
814 		if (elf_ppnt->p_flags & PF_X)
815 			elf_prot |= PROT_EXEC;
816 
817 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
818 
819 		vaddr = elf_ppnt->p_vaddr;
820 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
821 			elf_flags |= MAP_FIXED;
822 		} else if (loc->elf_ex.e_type == ET_DYN) {
823 			/* Try and get dynamic programs out of the way of the
824 			 * default mmap base, as well as whatever program they
825 			 * might try to exec.  This is because the brk will
826 			 * follow the loader, and is not movable.  */
827 #ifdef CONFIG_X86
828 			load_bias = 0;
829 #else
830 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
831 #endif
832 		}
833 
834 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
835 				elf_prot, elf_flags, 0);
836 		if (BAD_ADDR(error)) {
837 			send_sig(SIGKILL, current, 0);
838 			retval = IS_ERR((void *)error) ?
839 				PTR_ERR((void*)error) : -EINVAL;
840 			goto out_free_dentry;
841 		}
842 
843 		if (!load_addr_set) {
844 			load_addr_set = 1;
845 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
846 			if (loc->elf_ex.e_type == ET_DYN) {
847 				load_bias += error -
848 				             ELF_PAGESTART(load_bias + vaddr);
849 				load_addr += load_bias;
850 				reloc_func_desc = load_bias;
851 			}
852 		}
853 		k = elf_ppnt->p_vaddr;
854 		if (k < start_code)
855 			start_code = k;
856 		if (start_data < k)
857 			start_data = k;
858 
859 		/*
860 		 * Check to see if the section's size will overflow the
861 		 * allowed task size. Note that p_filesz must always be
862 		 * <= p_memsz so it is only necessary to check p_memsz.
863 		 */
864 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
865 		    elf_ppnt->p_memsz > TASK_SIZE ||
866 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
867 			/* set_brk can never work. Avoid overflows. */
868 			send_sig(SIGKILL, current, 0);
869 			retval = -EINVAL;
870 			goto out_free_dentry;
871 		}
872 
873 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
874 
875 		if (k > elf_bss)
876 			elf_bss = k;
877 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
878 			end_code = k;
879 		if (end_data < k)
880 			end_data = k;
881 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
882 		if (k > elf_brk)
883 			elf_brk = k;
884 	}
885 
886 	loc->elf_ex.e_entry += load_bias;
887 	elf_bss += load_bias;
888 	elf_brk += load_bias;
889 	start_code += load_bias;
890 	end_code += load_bias;
891 	start_data += load_bias;
892 	end_data += load_bias;
893 
894 	/* Calling set_brk effectively mmaps the pages that we need
895 	 * for the bss and break sections.  We must do this before
896 	 * mapping in the interpreter, to make sure it doesn't wind
897 	 * up getting placed where the bss needs to go.
898 	 */
899 	retval = set_brk(elf_bss, elf_brk);
900 	if (retval) {
901 		send_sig(SIGKILL, current, 0);
902 		goto out_free_dentry;
903 	}
904 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
905 		send_sig(SIGSEGV, current, 0);
906 		retval = -EFAULT; /* Nobody gets to see this, but.. */
907 		goto out_free_dentry;
908 	}
909 
910 	if (elf_interpreter) {
911 		unsigned long uninitialized_var(interp_map_addr);
912 
913 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
914 					    interpreter,
915 					    &interp_map_addr,
916 					    load_bias);
917 		if (!IS_ERR((void *)elf_entry)) {
918 			/*
919 			 * load_elf_interp() returns relocation
920 			 * adjustment
921 			 */
922 			interp_load_addr = elf_entry;
923 			elf_entry += loc->interp_elf_ex.e_entry;
924 		}
925 		if (BAD_ADDR(elf_entry)) {
926 			force_sig(SIGSEGV, current);
927 			retval = IS_ERR((void *)elf_entry) ?
928 					(int)elf_entry : -EINVAL;
929 			goto out_free_dentry;
930 		}
931 		reloc_func_desc = interp_load_addr;
932 
933 		allow_write_access(interpreter);
934 		fput(interpreter);
935 		kfree(elf_interpreter);
936 	} else {
937 		elf_entry = loc->elf_ex.e_entry;
938 		if (BAD_ADDR(elf_entry)) {
939 			force_sig(SIGSEGV, current);
940 			retval = -EINVAL;
941 			goto out_free_dentry;
942 		}
943 	}
944 
945 	kfree(elf_phdata);
946 
947 	sys_close(elf_exec_fileno);
948 
949 	set_binfmt(&elf_format);
950 
951 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
952 	retval = arch_setup_additional_pages(bprm, executable_stack);
953 	if (retval < 0) {
954 		send_sig(SIGKILL, current, 0);
955 		goto out;
956 	}
957 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
958 
959 	compute_creds(bprm);
960 	current->flags &= ~PF_FORKNOEXEC;
961 	retval = create_elf_tables(bprm, &loc->elf_ex,
962 			  load_addr, interp_load_addr);
963 	if (retval < 0) {
964 		send_sig(SIGKILL, current, 0);
965 		goto out;
966 	}
967 	/* N.B. passed_fileno might not be initialized? */
968 	current->mm->end_code = end_code;
969 	current->mm->start_code = start_code;
970 	current->mm->start_data = start_data;
971 	current->mm->end_data = end_data;
972 	current->mm->start_stack = bprm->p;
973 
974 #ifdef arch_randomize_brk
975 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
976 		current->mm->brk = current->mm->start_brk =
977 			arch_randomize_brk(current->mm);
978 #endif
979 
980 	if (current->personality & MMAP_PAGE_ZERO) {
981 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
982 		   and some applications "depend" upon this behavior.
983 		   Since we do not have the power to recompile these, we
984 		   emulate the SVr4 behavior. Sigh. */
985 		down_write(&current->mm->mmap_sem);
986 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
987 				MAP_FIXED | MAP_PRIVATE, 0);
988 		up_write(&current->mm->mmap_sem);
989 	}
990 
991 #ifdef ELF_PLAT_INIT
992 	/*
993 	 * The ABI may specify that certain registers be set up in special
994 	 * ways (on i386 %edx is the address of a DT_FINI function, for
995 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
996 	 * that the e_entry field is the address of the function descriptor
997 	 * for the startup routine, rather than the address of the startup
998 	 * routine itself.  This macro performs whatever initialization to
999 	 * the regs structure is required as well as any relocations to the
1000 	 * function descriptor entries when executing dynamically links apps.
1001 	 */
1002 	ELF_PLAT_INIT(regs, reloc_func_desc);
1003 #endif
1004 
1005 	start_thread(regs, elf_entry, bprm->p);
1006 	retval = 0;
1007 out:
1008 	kfree(loc);
1009 out_ret:
1010 	return retval;
1011 
1012 	/* error cleanup */
1013 out_free_dentry:
1014 	allow_write_access(interpreter);
1015 	if (interpreter)
1016 		fput(interpreter);
1017 out_free_interp:
1018 	kfree(elf_interpreter);
1019 out_free_file:
1020 	sys_close(elf_exec_fileno);
1021 out_free_ph:
1022 	kfree(elf_phdata);
1023 	goto out;
1024 }
1025 
1026 /* This is really simpleminded and specialized - we are loading an
1027    a.out library that is given an ELF header. */
1028 static int load_elf_library(struct file *file)
1029 {
1030 	struct elf_phdr *elf_phdata;
1031 	struct elf_phdr *eppnt;
1032 	unsigned long elf_bss, bss, len;
1033 	int retval, error, i, j;
1034 	struct elfhdr elf_ex;
1035 
1036 	error = -ENOEXEC;
1037 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1038 	if (retval != sizeof(elf_ex))
1039 		goto out;
1040 
1041 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1042 		goto out;
1043 
1044 	/* First of all, some simple consistency checks */
1045 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1046 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1047 		goto out;
1048 
1049 	/* Now read in all of the header information */
1050 
1051 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1052 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1053 
1054 	error = -ENOMEM;
1055 	elf_phdata = kmalloc(j, GFP_KERNEL);
1056 	if (!elf_phdata)
1057 		goto out;
1058 
1059 	eppnt = elf_phdata;
1060 	error = -ENOEXEC;
1061 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1062 	if (retval != j)
1063 		goto out_free_ph;
1064 
1065 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1066 		if ((eppnt + i)->p_type == PT_LOAD)
1067 			j++;
1068 	if (j != 1)
1069 		goto out_free_ph;
1070 
1071 	while (eppnt->p_type != PT_LOAD)
1072 		eppnt++;
1073 
1074 	/* Now use mmap to map the library into memory. */
1075 	down_write(&current->mm->mmap_sem);
1076 	error = do_mmap(file,
1077 			ELF_PAGESTART(eppnt->p_vaddr),
1078 			(eppnt->p_filesz +
1079 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1080 			PROT_READ | PROT_WRITE | PROT_EXEC,
1081 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1082 			(eppnt->p_offset -
1083 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1084 	up_write(&current->mm->mmap_sem);
1085 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1086 		goto out_free_ph;
1087 
1088 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1089 	if (padzero(elf_bss)) {
1090 		error = -EFAULT;
1091 		goto out_free_ph;
1092 	}
1093 
1094 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1095 			    ELF_MIN_ALIGN - 1);
1096 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1097 	if (bss > len) {
1098 		down_write(&current->mm->mmap_sem);
1099 		do_brk(len, bss - len);
1100 		up_write(&current->mm->mmap_sem);
1101 	}
1102 	error = 0;
1103 
1104 out_free_ph:
1105 	kfree(elf_phdata);
1106 out:
1107 	return error;
1108 }
1109 
1110 /*
1111  * Note that some platforms still use traditional core dumps and not
1112  * the ELF core dump.  Each platform can select it as appropriate.
1113  */
1114 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1115 
1116 /*
1117  * ELF core dumper
1118  *
1119  * Modelled on fs/exec.c:aout_core_dump()
1120  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1121  */
1122 /*
1123  * These are the only things you should do on a core-file: use only these
1124  * functions to write out all the necessary info.
1125  */
1126 static int dump_write(struct file *file, const void *addr, int nr)
1127 {
1128 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1129 }
1130 
1131 static int dump_seek(struct file *file, loff_t off)
1132 {
1133 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1134 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1135 			return 0;
1136 	} else {
1137 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1138 		if (!buf)
1139 			return 0;
1140 		while (off > 0) {
1141 			unsigned long n = off;
1142 			if (n > PAGE_SIZE)
1143 				n = PAGE_SIZE;
1144 			if (!dump_write(file, buf, n))
1145 				return 0;
1146 			off -= n;
1147 		}
1148 		free_page((unsigned long)buf);
1149 	}
1150 	return 1;
1151 }
1152 
1153 /*
1154  * Decide what to dump of a segment, part, all or none.
1155  */
1156 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 				   unsigned long mm_flags)
1158 {
1159 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1160 
1161 	/* The vma can be set up to tell us the answer directly.  */
1162 	if (vma->vm_flags & VM_ALWAYSDUMP)
1163 		goto whole;
1164 
1165 	/* Hugetlb memory check */
1166 	if (vma->vm_flags & VM_HUGETLB) {
1167 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168 			goto whole;
1169 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170 			goto whole;
1171 	}
1172 
1173 	/* Do not dump I/O mapped devices or special mappings */
1174 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1175 		return 0;
1176 
1177 	/* By default, dump shared memory if mapped from an anonymous file. */
1178 	if (vma->vm_flags & VM_SHARED) {
1179 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1180 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1181 			goto whole;
1182 		return 0;
1183 	}
1184 
1185 	/* Dump segments that have been written to.  */
1186 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1187 		goto whole;
1188 	if (vma->vm_file == NULL)
1189 		return 0;
1190 
1191 	if (FILTER(MAPPED_PRIVATE))
1192 		goto whole;
1193 
1194 	/*
1195 	 * If this looks like the beginning of a DSO or executable mapping,
1196 	 * check for an ELF header.  If we find one, dump the first page to
1197 	 * aid in determining what was mapped here.
1198 	 */
1199 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1200 		u32 __user *header = (u32 __user *) vma->vm_start;
1201 		u32 word;
1202 		/*
1203 		 * Doing it this way gets the constant folded by GCC.
1204 		 */
1205 		union {
1206 			u32 cmp;
1207 			char elfmag[SELFMAG];
1208 		} magic;
1209 		BUILD_BUG_ON(SELFMAG != sizeof word);
1210 		magic.elfmag[EI_MAG0] = ELFMAG0;
1211 		magic.elfmag[EI_MAG1] = ELFMAG1;
1212 		magic.elfmag[EI_MAG2] = ELFMAG2;
1213 		magic.elfmag[EI_MAG3] = ELFMAG3;
1214 		if (get_user(word, header) == 0 && word == magic.cmp)
1215 			return PAGE_SIZE;
1216 	}
1217 
1218 #undef	FILTER
1219 
1220 	return 0;
1221 
1222 whole:
1223 	return vma->vm_end - vma->vm_start;
1224 }
1225 
1226 /* An ELF note in memory */
1227 struct memelfnote
1228 {
1229 	const char *name;
1230 	int type;
1231 	unsigned int datasz;
1232 	void *data;
1233 };
1234 
1235 static int notesize(struct memelfnote *en)
1236 {
1237 	int sz;
1238 
1239 	sz = sizeof(struct elf_note);
1240 	sz += roundup(strlen(en->name) + 1, 4);
1241 	sz += roundup(en->datasz, 4);
1242 
1243 	return sz;
1244 }
1245 
1246 #define DUMP_WRITE(addr, nr, foffset)	\
1247 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1248 
1249 static int alignfile(struct file *file, loff_t *foffset)
1250 {
1251 	static const char buf[4] = { 0, };
1252 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1253 	return 1;
1254 }
1255 
1256 static int writenote(struct memelfnote *men, struct file *file,
1257 			loff_t *foffset)
1258 {
1259 	struct elf_note en;
1260 	en.n_namesz = strlen(men->name) + 1;
1261 	en.n_descsz = men->datasz;
1262 	en.n_type = men->type;
1263 
1264 	DUMP_WRITE(&en, sizeof(en), foffset);
1265 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1266 	if (!alignfile(file, foffset))
1267 		return 0;
1268 	DUMP_WRITE(men->data, men->datasz, foffset);
1269 	if (!alignfile(file, foffset))
1270 		return 0;
1271 
1272 	return 1;
1273 }
1274 #undef DUMP_WRITE
1275 
1276 #define DUMP_WRITE(addr, nr)	\
1277 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1278 		goto end_coredump;
1279 #define DUMP_SEEK(off)	\
1280 	if (!dump_seek(file, (off))) \
1281 		goto end_coredump;
1282 
1283 static void fill_elf_header(struct elfhdr *elf, int segs,
1284 			    u16 machine, u32 flags, u8 osabi)
1285 {
1286 	memset(elf, 0, sizeof(*elf));
1287 
1288 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1289 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1290 	elf->e_ident[EI_DATA] = ELF_DATA;
1291 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1292 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1293 
1294 	elf->e_type = ET_CORE;
1295 	elf->e_machine = machine;
1296 	elf->e_version = EV_CURRENT;
1297 	elf->e_phoff = sizeof(struct elfhdr);
1298 	elf->e_flags = flags;
1299 	elf->e_ehsize = sizeof(struct elfhdr);
1300 	elf->e_phentsize = sizeof(struct elf_phdr);
1301 	elf->e_phnum = segs;
1302 
1303 	return;
1304 }
1305 
1306 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1307 {
1308 	phdr->p_type = PT_NOTE;
1309 	phdr->p_offset = offset;
1310 	phdr->p_vaddr = 0;
1311 	phdr->p_paddr = 0;
1312 	phdr->p_filesz = sz;
1313 	phdr->p_memsz = 0;
1314 	phdr->p_flags = 0;
1315 	phdr->p_align = 0;
1316 	return;
1317 }
1318 
1319 static void fill_note(struct memelfnote *note, const char *name, int type,
1320 		unsigned int sz, void *data)
1321 {
1322 	note->name = name;
1323 	note->type = type;
1324 	note->datasz = sz;
1325 	note->data = data;
1326 	return;
1327 }
1328 
1329 /*
1330  * fill up all the fields in prstatus from the given task struct, except
1331  * registers which need to be filled up separately.
1332  */
1333 static void fill_prstatus(struct elf_prstatus *prstatus,
1334 		struct task_struct *p, long signr)
1335 {
1336 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1337 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1338 	prstatus->pr_sighold = p->blocked.sig[0];
1339 	prstatus->pr_pid = task_pid_vnr(p);
1340 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1341 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1342 	prstatus->pr_sid = task_session_vnr(p);
1343 	if (thread_group_leader(p)) {
1344 		struct task_cputime cputime;
1345 
1346 		/*
1347 		 * This is the record for the group leader.  It shows the
1348 		 * group-wide total, not its individual thread total.
1349 		 */
1350 		thread_group_cputime(p, &cputime);
1351 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1352 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1353 	} else {
1354 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1355 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1356 	}
1357 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1358 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1359 }
1360 
1361 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1362 		       struct mm_struct *mm)
1363 {
1364 	unsigned int i, len;
1365 
1366 	/* first copy the parameters from user space */
1367 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1368 
1369 	len = mm->arg_end - mm->arg_start;
1370 	if (len >= ELF_PRARGSZ)
1371 		len = ELF_PRARGSZ-1;
1372 	if (copy_from_user(&psinfo->pr_psargs,
1373 		           (const char __user *)mm->arg_start, len))
1374 		return -EFAULT;
1375 	for(i = 0; i < len; i++)
1376 		if (psinfo->pr_psargs[i] == 0)
1377 			psinfo->pr_psargs[i] = ' ';
1378 	psinfo->pr_psargs[len] = 0;
1379 
1380 	psinfo->pr_pid = task_pid_vnr(p);
1381 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1382 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1383 	psinfo->pr_sid = task_session_vnr(p);
1384 
1385 	i = p->state ? ffz(~p->state) + 1 : 0;
1386 	psinfo->pr_state = i;
1387 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1388 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1389 	psinfo->pr_nice = task_nice(p);
1390 	psinfo->pr_flag = p->flags;
1391 	SET_UID(psinfo->pr_uid, p->uid);
1392 	SET_GID(psinfo->pr_gid, p->gid);
1393 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1394 
1395 	return 0;
1396 }
1397 
1398 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1399 {
1400 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1401 	int i = 0;
1402 	do
1403 		i += 2;
1404 	while (auxv[i - 2] != AT_NULL);
1405 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1406 }
1407 
1408 #ifdef CORE_DUMP_USE_REGSET
1409 #include <linux/regset.h>
1410 
1411 struct elf_thread_core_info {
1412 	struct elf_thread_core_info *next;
1413 	struct task_struct *task;
1414 	struct elf_prstatus prstatus;
1415 	struct memelfnote notes[0];
1416 };
1417 
1418 struct elf_note_info {
1419 	struct elf_thread_core_info *thread;
1420 	struct memelfnote psinfo;
1421 	struct memelfnote auxv;
1422 	size_t size;
1423 	int thread_notes;
1424 };
1425 
1426 /*
1427  * When a regset has a writeback hook, we call it on each thread before
1428  * dumping user memory.  On register window machines, this makes sure the
1429  * user memory backing the register data is up to date before we read it.
1430  */
1431 static void do_thread_regset_writeback(struct task_struct *task,
1432 				       const struct user_regset *regset)
1433 {
1434 	if (regset->writeback)
1435 		regset->writeback(task, regset, 1);
1436 }
1437 
1438 static int fill_thread_core_info(struct elf_thread_core_info *t,
1439 				 const struct user_regset_view *view,
1440 				 long signr, size_t *total)
1441 {
1442 	unsigned int i;
1443 
1444 	/*
1445 	 * NT_PRSTATUS is the one special case, because the regset data
1446 	 * goes into the pr_reg field inside the note contents, rather
1447 	 * than being the whole note contents.  We fill the reset in here.
1448 	 * We assume that regset 0 is NT_PRSTATUS.
1449 	 */
1450 	fill_prstatus(&t->prstatus, t->task, signr);
1451 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1452 				    0, sizeof(t->prstatus.pr_reg),
1453 				    &t->prstatus.pr_reg, NULL);
1454 
1455 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1456 		  sizeof(t->prstatus), &t->prstatus);
1457 	*total += notesize(&t->notes[0]);
1458 
1459 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1460 
1461 	/*
1462 	 * Each other regset might generate a note too.  For each regset
1463 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1464 	 * all zero and we'll know to skip writing it later.
1465 	 */
1466 	for (i = 1; i < view->n; ++i) {
1467 		const struct user_regset *regset = &view->regsets[i];
1468 		do_thread_regset_writeback(t->task, regset);
1469 		if (regset->core_note_type &&
1470 		    (!regset->active || regset->active(t->task, regset))) {
1471 			int ret;
1472 			size_t size = regset->n * regset->size;
1473 			void *data = kmalloc(size, GFP_KERNEL);
1474 			if (unlikely(!data))
1475 				return 0;
1476 			ret = regset->get(t->task, regset,
1477 					  0, size, data, NULL);
1478 			if (unlikely(ret))
1479 				kfree(data);
1480 			else {
1481 				if (regset->core_note_type != NT_PRFPREG)
1482 					fill_note(&t->notes[i], "LINUX",
1483 						  regset->core_note_type,
1484 						  size, data);
1485 				else {
1486 					t->prstatus.pr_fpvalid = 1;
1487 					fill_note(&t->notes[i], "CORE",
1488 						  NT_PRFPREG, size, data);
1489 				}
1490 				*total += notesize(&t->notes[i]);
1491 			}
1492 		}
1493 	}
1494 
1495 	return 1;
1496 }
1497 
1498 static int fill_note_info(struct elfhdr *elf, int phdrs,
1499 			  struct elf_note_info *info,
1500 			  long signr, struct pt_regs *regs)
1501 {
1502 	struct task_struct *dump_task = current;
1503 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1504 	struct elf_thread_core_info *t;
1505 	struct elf_prpsinfo *psinfo;
1506 	struct core_thread *ct;
1507 	unsigned int i;
1508 
1509 	info->size = 0;
1510 	info->thread = NULL;
1511 
1512 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1513 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1514 
1515 	if (psinfo == NULL)
1516 		return 0;
1517 
1518 	/*
1519 	 * Figure out how many notes we're going to need for each thread.
1520 	 */
1521 	info->thread_notes = 0;
1522 	for (i = 0; i < view->n; ++i)
1523 		if (view->regsets[i].core_note_type != 0)
1524 			++info->thread_notes;
1525 
1526 	/*
1527 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1528 	 * since it is our one special case.
1529 	 */
1530 	if (unlikely(info->thread_notes == 0) ||
1531 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1532 		WARN_ON(1);
1533 		return 0;
1534 	}
1535 
1536 	/*
1537 	 * Initialize the ELF file header.
1538 	 */
1539 	fill_elf_header(elf, phdrs,
1540 			view->e_machine, view->e_flags, view->ei_osabi);
1541 
1542 	/*
1543 	 * Allocate a structure for each thread.
1544 	 */
1545 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1546 		t = kzalloc(offsetof(struct elf_thread_core_info,
1547 				     notes[info->thread_notes]),
1548 			    GFP_KERNEL);
1549 		if (unlikely(!t))
1550 			return 0;
1551 
1552 		t->task = ct->task;
1553 		if (ct->task == dump_task || !info->thread) {
1554 			t->next = info->thread;
1555 			info->thread = t;
1556 		} else {
1557 			/*
1558 			 * Make sure to keep the original task at
1559 			 * the head of the list.
1560 			 */
1561 			t->next = info->thread->next;
1562 			info->thread->next = t;
1563 		}
1564 	}
1565 
1566 	/*
1567 	 * Now fill in each thread's information.
1568 	 */
1569 	for (t = info->thread; t != NULL; t = t->next)
1570 		if (!fill_thread_core_info(t, view, signr, &info->size))
1571 			return 0;
1572 
1573 	/*
1574 	 * Fill in the two process-wide notes.
1575 	 */
1576 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1577 	info->size += notesize(&info->psinfo);
1578 
1579 	fill_auxv_note(&info->auxv, current->mm);
1580 	info->size += notesize(&info->auxv);
1581 
1582 	return 1;
1583 }
1584 
1585 static size_t get_note_info_size(struct elf_note_info *info)
1586 {
1587 	return info->size;
1588 }
1589 
1590 /*
1591  * Write all the notes for each thread.  When writing the first thread, the
1592  * process-wide notes are interleaved after the first thread-specific note.
1593  */
1594 static int write_note_info(struct elf_note_info *info,
1595 			   struct file *file, loff_t *foffset)
1596 {
1597 	bool first = 1;
1598 	struct elf_thread_core_info *t = info->thread;
1599 
1600 	do {
1601 		int i;
1602 
1603 		if (!writenote(&t->notes[0], file, foffset))
1604 			return 0;
1605 
1606 		if (first && !writenote(&info->psinfo, file, foffset))
1607 			return 0;
1608 		if (first && !writenote(&info->auxv, file, foffset))
1609 			return 0;
1610 
1611 		for (i = 1; i < info->thread_notes; ++i)
1612 			if (t->notes[i].data &&
1613 			    !writenote(&t->notes[i], file, foffset))
1614 				return 0;
1615 
1616 		first = 0;
1617 		t = t->next;
1618 	} while (t);
1619 
1620 	return 1;
1621 }
1622 
1623 static void free_note_info(struct elf_note_info *info)
1624 {
1625 	struct elf_thread_core_info *threads = info->thread;
1626 	while (threads) {
1627 		unsigned int i;
1628 		struct elf_thread_core_info *t = threads;
1629 		threads = t->next;
1630 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1631 		for (i = 1; i < info->thread_notes; ++i)
1632 			kfree(t->notes[i].data);
1633 		kfree(t);
1634 	}
1635 	kfree(info->psinfo.data);
1636 }
1637 
1638 #else
1639 
1640 /* Here is the structure in which status of each thread is captured. */
1641 struct elf_thread_status
1642 {
1643 	struct list_head list;
1644 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1645 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1646 	struct task_struct *thread;
1647 #ifdef ELF_CORE_COPY_XFPREGS
1648 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1649 #endif
1650 	struct memelfnote notes[3];
1651 	int num_notes;
1652 };
1653 
1654 /*
1655  * In order to add the specific thread information for the elf file format,
1656  * we need to keep a linked list of every threads pr_status and then create
1657  * a single section for them in the final core file.
1658  */
1659 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1660 {
1661 	int sz = 0;
1662 	struct task_struct *p = t->thread;
1663 	t->num_notes = 0;
1664 
1665 	fill_prstatus(&t->prstatus, p, signr);
1666 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1667 
1668 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1669 		  &(t->prstatus));
1670 	t->num_notes++;
1671 	sz += notesize(&t->notes[0]);
1672 
1673 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1674 								&t->fpu))) {
1675 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1676 			  &(t->fpu));
1677 		t->num_notes++;
1678 		sz += notesize(&t->notes[1]);
1679 	}
1680 
1681 #ifdef ELF_CORE_COPY_XFPREGS
1682 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1683 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1684 			  sizeof(t->xfpu), &t->xfpu);
1685 		t->num_notes++;
1686 		sz += notesize(&t->notes[2]);
1687 	}
1688 #endif
1689 	return sz;
1690 }
1691 
1692 struct elf_note_info {
1693 	struct memelfnote *notes;
1694 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1695 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1696 	struct list_head thread_list;
1697 	elf_fpregset_t *fpu;
1698 #ifdef ELF_CORE_COPY_XFPREGS
1699 	elf_fpxregset_t *xfpu;
1700 #endif
1701 	int thread_status_size;
1702 	int numnote;
1703 };
1704 
1705 static int fill_note_info(struct elfhdr *elf, int phdrs,
1706 			  struct elf_note_info *info,
1707 			  long signr, struct pt_regs *regs)
1708 {
1709 #define	NUM_NOTES	6
1710 	struct list_head *t;
1711 
1712 	info->notes = NULL;
1713 	info->prstatus = NULL;
1714 	info->psinfo = NULL;
1715 	info->fpu = NULL;
1716 #ifdef ELF_CORE_COPY_XFPREGS
1717 	info->xfpu = NULL;
1718 #endif
1719 	INIT_LIST_HEAD(&info->thread_list);
1720 
1721 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1722 			      GFP_KERNEL);
1723 	if (!info->notes)
1724 		return 0;
1725 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1726 	if (!info->psinfo)
1727 		return 0;
1728 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1729 	if (!info->prstatus)
1730 		return 0;
1731 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1732 	if (!info->fpu)
1733 		return 0;
1734 #ifdef ELF_CORE_COPY_XFPREGS
1735 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1736 	if (!info->xfpu)
1737 		return 0;
1738 #endif
1739 
1740 	info->thread_status_size = 0;
1741 	if (signr) {
1742 		struct core_thread *ct;
1743 		struct elf_thread_status *ets;
1744 
1745 		for (ct = current->mm->core_state->dumper.next;
1746 						ct; ct = ct->next) {
1747 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1748 			if (!ets)
1749 				return 0;
1750 
1751 			ets->thread = ct->task;
1752 			list_add(&ets->list, &info->thread_list);
1753 		}
1754 
1755 		list_for_each(t, &info->thread_list) {
1756 			int sz;
1757 
1758 			ets = list_entry(t, struct elf_thread_status, list);
1759 			sz = elf_dump_thread_status(signr, ets);
1760 			info->thread_status_size += sz;
1761 		}
1762 	}
1763 	/* now collect the dump for the current */
1764 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1765 	fill_prstatus(info->prstatus, current, signr);
1766 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1767 
1768 	/* Set up header */
1769 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1770 
1771 	/*
1772 	 * Set up the notes in similar form to SVR4 core dumps made
1773 	 * with info from their /proc.
1774 	 */
1775 
1776 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1777 		  sizeof(*info->prstatus), info->prstatus);
1778 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1779 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1780 		  sizeof(*info->psinfo), info->psinfo);
1781 
1782 	info->numnote = 2;
1783 
1784 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1785 
1786 	/* Try to dump the FPU. */
1787 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1788 							       info->fpu);
1789 	if (info->prstatus->pr_fpvalid)
1790 		fill_note(info->notes + info->numnote++,
1791 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1792 #ifdef ELF_CORE_COPY_XFPREGS
1793 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1794 		fill_note(info->notes + info->numnote++,
1795 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1796 			  sizeof(*info->xfpu), info->xfpu);
1797 #endif
1798 
1799 	return 1;
1800 
1801 #undef NUM_NOTES
1802 }
1803 
1804 static size_t get_note_info_size(struct elf_note_info *info)
1805 {
1806 	int sz = 0;
1807 	int i;
1808 
1809 	for (i = 0; i < info->numnote; i++)
1810 		sz += notesize(info->notes + i);
1811 
1812 	sz += info->thread_status_size;
1813 
1814 	return sz;
1815 }
1816 
1817 static int write_note_info(struct elf_note_info *info,
1818 			   struct file *file, loff_t *foffset)
1819 {
1820 	int i;
1821 	struct list_head *t;
1822 
1823 	for (i = 0; i < info->numnote; i++)
1824 		if (!writenote(info->notes + i, file, foffset))
1825 			return 0;
1826 
1827 	/* write out the thread status notes section */
1828 	list_for_each(t, &info->thread_list) {
1829 		struct elf_thread_status *tmp =
1830 				list_entry(t, struct elf_thread_status, list);
1831 
1832 		for (i = 0; i < tmp->num_notes; i++)
1833 			if (!writenote(&tmp->notes[i], file, foffset))
1834 				return 0;
1835 	}
1836 
1837 	return 1;
1838 }
1839 
1840 static void free_note_info(struct elf_note_info *info)
1841 {
1842 	while (!list_empty(&info->thread_list)) {
1843 		struct list_head *tmp = info->thread_list.next;
1844 		list_del(tmp);
1845 		kfree(list_entry(tmp, struct elf_thread_status, list));
1846 	}
1847 
1848 	kfree(info->prstatus);
1849 	kfree(info->psinfo);
1850 	kfree(info->notes);
1851 	kfree(info->fpu);
1852 #ifdef ELF_CORE_COPY_XFPREGS
1853 	kfree(info->xfpu);
1854 #endif
1855 }
1856 
1857 #endif
1858 
1859 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1860 					struct vm_area_struct *gate_vma)
1861 {
1862 	struct vm_area_struct *ret = tsk->mm->mmap;
1863 
1864 	if (ret)
1865 		return ret;
1866 	return gate_vma;
1867 }
1868 /*
1869  * Helper function for iterating across a vma list.  It ensures that the caller
1870  * will visit `gate_vma' prior to terminating the search.
1871  */
1872 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1873 					struct vm_area_struct *gate_vma)
1874 {
1875 	struct vm_area_struct *ret;
1876 
1877 	ret = this_vma->vm_next;
1878 	if (ret)
1879 		return ret;
1880 	if (this_vma == gate_vma)
1881 		return NULL;
1882 	return gate_vma;
1883 }
1884 
1885 /*
1886  * Actual dumper
1887  *
1888  * This is a two-pass process; first we find the offsets of the bits,
1889  * and then they are actually written out.  If we run out of core limit
1890  * we just truncate.
1891  */
1892 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1893 {
1894 	int has_dumped = 0;
1895 	mm_segment_t fs;
1896 	int segs;
1897 	size_t size = 0;
1898 	struct vm_area_struct *vma, *gate_vma;
1899 	struct elfhdr *elf = NULL;
1900 	loff_t offset = 0, dataoff, foffset;
1901 	unsigned long mm_flags;
1902 	struct elf_note_info info;
1903 
1904 	/*
1905 	 * We no longer stop all VM operations.
1906 	 *
1907 	 * This is because those proceses that could possibly change map_count
1908 	 * or the mmap / vma pages are now blocked in do_exit on current
1909 	 * finishing this core dump.
1910 	 *
1911 	 * Only ptrace can touch these memory addresses, but it doesn't change
1912 	 * the map_count or the pages allocated. So no possibility of crashing
1913 	 * exists while dumping the mm->vm_next areas to the core file.
1914 	 */
1915 
1916 	/* alloc memory for large data structures: too large to be on stack */
1917 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1918 	if (!elf)
1919 		goto out;
1920 
1921 	segs = current->mm->map_count;
1922 #ifdef ELF_CORE_EXTRA_PHDRS
1923 	segs += ELF_CORE_EXTRA_PHDRS;
1924 #endif
1925 
1926 	gate_vma = get_gate_vma(current);
1927 	if (gate_vma != NULL)
1928 		segs++;
1929 
1930 	/*
1931 	 * Collect all the non-memory information about the process for the
1932 	 * notes.  This also sets up the file header.
1933 	 */
1934 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1935 			    &info, signr, regs))
1936 		goto cleanup;
1937 
1938 	has_dumped = 1;
1939 	current->flags |= PF_DUMPCORE;
1940 
1941 	fs = get_fs();
1942 	set_fs(KERNEL_DS);
1943 
1944 	DUMP_WRITE(elf, sizeof(*elf));
1945 	offset += sizeof(*elf);				/* Elf header */
1946 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1947 	foffset = offset;
1948 
1949 	/* Write notes phdr entry */
1950 	{
1951 		struct elf_phdr phdr;
1952 		size_t sz = get_note_info_size(&info);
1953 
1954 		sz += elf_coredump_extra_notes_size();
1955 
1956 		fill_elf_note_phdr(&phdr, sz, offset);
1957 		offset += sz;
1958 		DUMP_WRITE(&phdr, sizeof(phdr));
1959 	}
1960 
1961 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1962 
1963 	/*
1964 	 * We must use the same mm->flags while dumping core to avoid
1965 	 * inconsistency between the program headers and bodies, otherwise an
1966 	 * unusable core file can be generated.
1967 	 */
1968 	mm_flags = current->mm->flags;
1969 
1970 	/* Write program headers for segments dump */
1971 	for (vma = first_vma(current, gate_vma); vma != NULL;
1972 			vma = next_vma(vma, gate_vma)) {
1973 		struct elf_phdr phdr;
1974 
1975 		phdr.p_type = PT_LOAD;
1976 		phdr.p_offset = offset;
1977 		phdr.p_vaddr = vma->vm_start;
1978 		phdr.p_paddr = 0;
1979 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1980 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1981 		offset += phdr.p_filesz;
1982 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1983 		if (vma->vm_flags & VM_WRITE)
1984 			phdr.p_flags |= PF_W;
1985 		if (vma->vm_flags & VM_EXEC)
1986 			phdr.p_flags |= PF_X;
1987 		phdr.p_align = ELF_EXEC_PAGESIZE;
1988 
1989 		DUMP_WRITE(&phdr, sizeof(phdr));
1990 	}
1991 
1992 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1993 	ELF_CORE_WRITE_EXTRA_PHDRS;
1994 #endif
1995 
1996  	/* write out the notes section */
1997 	if (!write_note_info(&info, file, &foffset))
1998 		goto end_coredump;
1999 
2000 	if (elf_coredump_extra_notes_write(file, &foffset))
2001 		goto end_coredump;
2002 
2003 	/* Align to page */
2004 	DUMP_SEEK(dataoff - foffset);
2005 
2006 	for (vma = first_vma(current, gate_vma); vma != NULL;
2007 			vma = next_vma(vma, gate_vma)) {
2008 		unsigned long addr;
2009 		unsigned long end;
2010 
2011 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2012 
2013 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2014 			struct page *page;
2015 			struct vm_area_struct *tmp_vma;
2016 
2017 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2018 						&page, &tmp_vma) <= 0) {
2019 				DUMP_SEEK(PAGE_SIZE);
2020 			} else {
2021 				if (page == ZERO_PAGE(0)) {
2022 					if (!dump_seek(file, PAGE_SIZE)) {
2023 						page_cache_release(page);
2024 						goto end_coredump;
2025 					}
2026 				} else {
2027 					void *kaddr;
2028 					flush_cache_page(tmp_vma, addr,
2029 							 page_to_pfn(page));
2030 					kaddr = kmap(page);
2031 					if ((size += PAGE_SIZE) > limit ||
2032 					    !dump_write(file, kaddr,
2033 					    PAGE_SIZE)) {
2034 						kunmap(page);
2035 						page_cache_release(page);
2036 						goto end_coredump;
2037 					}
2038 					kunmap(page);
2039 				}
2040 				page_cache_release(page);
2041 			}
2042 		}
2043 	}
2044 
2045 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2046 	ELF_CORE_WRITE_EXTRA_DATA;
2047 #endif
2048 
2049 end_coredump:
2050 	set_fs(fs);
2051 
2052 cleanup:
2053 	free_note_info(&info);
2054 	kfree(elf);
2055 out:
2056 	return has_dumped;
2057 }
2058 
2059 #endif		/* USE_ELF_CORE_DUMP */
2060 
2061 static int __init init_elf_binfmt(void)
2062 {
2063 	return register_binfmt(&elf_format);
2064 }
2065 
2066 static void __exit exit_elf_binfmt(void)
2067 {
2068 	/* Remove the COFF and ELF loaders. */
2069 	unregister_binfmt(&elf_format);
2070 }
2071 
2072 core_initcall(init_elf_binfmt);
2073 module_exit(exit_elf_binfmt);
2074 MODULE_LICENSE("GPL");
2075