xref: /openbmc/linux/fs/binfmt_elf.c (revision 93dc544c)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	const char *k_platform = ELF_PLATFORM;
156 	const char *k_base_platform = ELF_BASE_PLATFORM;
157 	int items;
158 	elf_addr_t *elf_info;
159 	int ei_index = 0;
160 	struct task_struct *tsk = current;
161 	struct vm_area_struct *vma;
162 
163 	/*
164 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 	 * evictions by the processes running on the same package. One
166 	 * thing we can do is to shuffle the initial stack for them.
167 	 */
168 
169 	p = arch_align_stack(p);
170 
171 	/*
172 	 * If this architecture has a platform capability string, copy it
173 	 * to userspace.  In some cases (Sparc), this info is impossible
174 	 * for userspace to get any other way, in others (i386) it is
175 	 * merely difficult.
176 	 */
177 	u_platform = NULL;
178 	if (k_platform) {
179 		size_t len = strlen(k_platform) + 1;
180 
181 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
182 		if (__copy_to_user(u_platform, k_platform, len))
183 			return -EFAULT;
184 	}
185 
186 	/*
187 	 * If this architecture has a "base" platform capability
188 	 * string, copy it to userspace.
189 	 */
190 	u_base_platform = NULL;
191 	if (k_base_platform) {
192 		size_t len = strlen(k_base_platform) + 1;
193 
194 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 		if (__copy_to_user(u_base_platform, k_base_platform, len))
196 			return -EFAULT;
197 	}
198 
199 	/* Create the ELF interpreter info */
200 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
201 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
202 #define NEW_AUX_ENT(id, val) \
203 	do { \
204 		elf_info[ei_index++] = id; \
205 		elf_info[ei_index++] = val; \
206 	} while (0)
207 
208 #ifdef ARCH_DLINFO
209 	/*
210 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
211 	 * AUXV.
212 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
213 	 * ARCH_DLINFO changes
214 	 */
215 	ARCH_DLINFO;
216 #endif
217 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
218 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
219 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
220 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
221 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
222 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
223 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
224 	NEW_AUX_ENT(AT_FLAGS, 0);
225 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
226 	NEW_AUX_ENT(AT_UID, tsk->uid);
227 	NEW_AUX_ENT(AT_EUID, tsk->euid);
228 	NEW_AUX_ENT(AT_GID, tsk->gid);
229 	NEW_AUX_ENT(AT_EGID, tsk->egid);
230  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
232 	if (k_platform) {
233 		NEW_AUX_ENT(AT_PLATFORM,
234 			    (elf_addr_t)(unsigned long)u_platform);
235 	}
236 	if (k_base_platform) {
237 		NEW_AUX_ENT(AT_BASE_PLATFORM,
238 			    (elf_addr_t)(unsigned long)u_base_platform);
239 	}
240 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
241 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
242 	}
243 #undef NEW_AUX_ENT
244 	/* AT_NULL is zero; clear the rest too */
245 	memset(&elf_info[ei_index], 0,
246 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
247 
248 	/* And advance past the AT_NULL entry.  */
249 	ei_index += 2;
250 
251 	sp = STACK_ADD(p, ei_index);
252 
253 	items = (argc + 1) + (envc + 1) + 1;
254 	bprm->p = STACK_ROUND(sp, items);
255 
256 	/* Point sp at the lowest address on the stack */
257 #ifdef CONFIG_STACK_GROWSUP
258 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
259 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
260 #else
261 	sp = (elf_addr_t __user *)bprm->p;
262 #endif
263 
264 
265 	/*
266 	 * Grow the stack manually; some architectures have a limit on how
267 	 * far ahead a user-space access may be in order to grow the stack.
268 	 */
269 	vma = find_extend_vma(current->mm, bprm->p);
270 	if (!vma)
271 		return -EFAULT;
272 
273 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
274 	if (__put_user(argc, sp++))
275 		return -EFAULT;
276 	argv = sp;
277 	envp = argv + argc + 1;
278 
279 	/* Populate argv and envp */
280 	p = current->mm->arg_end = current->mm->arg_start;
281 	while (argc-- > 0) {
282 		size_t len;
283 		if (__put_user((elf_addr_t)p, argv++))
284 			return -EFAULT;
285 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
286 		if (!len || len > MAX_ARG_STRLEN)
287 			return -EINVAL;
288 		p += len;
289 	}
290 	if (__put_user(0, argv))
291 		return -EFAULT;
292 	current->mm->arg_end = current->mm->env_start = p;
293 	while (envc-- > 0) {
294 		size_t len;
295 		if (__put_user((elf_addr_t)p, envp++))
296 			return -EFAULT;
297 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 		if (!len || len > MAX_ARG_STRLEN)
299 			return -EINVAL;
300 		p += len;
301 	}
302 	if (__put_user(0, envp))
303 		return -EFAULT;
304 	current->mm->env_end = p;
305 
306 	/* Put the elf_info on the stack in the right place.  */
307 	sp = (elf_addr_t __user *)envp + 1;
308 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
309 		return -EFAULT;
310 	return 0;
311 }
312 
313 #ifndef elf_map
314 
315 static unsigned long elf_map(struct file *filep, unsigned long addr,
316 		struct elf_phdr *eppnt, int prot, int type,
317 		unsigned long total_size)
318 {
319 	unsigned long map_addr;
320 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
321 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
322 	addr = ELF_PAGESTART(addr);
323 	size = ELF_PAGEALIGN(size);
324 
325 	/* mmap() will return -EINVAL if given a zero size, but a
326 	 * segment with zero filesize is perfectly valid */
327 	if (!size)
328 		return addr;
329 
330 	down_write(&current->mm->mmap_sem);
331 	/*
332 	* total_size is the size of the ELF (interpreter) image.
333 	* The _first_ mmap needs to know the full size, otherwise
334 	* randomization might put this image into an overlapping
335 	* position with the ELF binary image. (since size < total_size)
336 	* So we first map the 'big' image - and unmap the remainder at
337 	* the end. (which unmap is needed for ELF images with holes.)
338 	*/
339 	if (total_size) {
340 		total_size = ELF_PAGEALIGN(total_size);
341 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
342 		if (!BAD_ADDR(map_addr))
343 			do_munmap(current->mm, map_addr+size, total_size-size);
344 	} else
345 		map_addr = do_mmap(filep, addr, size, prot, type, off);
346 
347 	up_write(&current->mm->mmap_sem);
348 	return(map_addr);
349 }
350 
351 #endif /* !elf_map */
352 
353 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
354 {
355 	int i, first_idx = -1, last_idx = -1;
356 
357 	for (i = 0; i < nr; i++) {
358 		if (cmds[i].p_type == PT_LOAD) {
359 			last_idx = i;
360 			if (first_idx == -1)
361 				first_idx = i;
362 		}
363 	}
364 	if (first_idx == -1)
365 		return 0;
366 
367 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
368 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 }
370 
371 
372 /* This is much more generalized than the library routine read function,
373    so we keep this separate.  Technically the library read function
374    is only provided so that we can read a.out libraries that have
375    an ELF header */
376 
377 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
378 		struct file *interpreter, unsigned long *interp_map_addr,
379 		unsigned long no_base)
380 {
381 	struct elf_phdr *elf_phdata;
382 	struct elf_phdr *eppnt;
383 	unsigned long load_addr = 0;
384 	int load_addr_set = 0;
385 	unsigned long last_bss = 0, elf_bss = 0;
386 	unsigned long error = ~0UL;
387 	unsigned long total_size;
388 	int retval, i, size;
389 
390 	/* First of all, some simple consistency checks */
391 	if (interp_elf_ex->e_type != ET_EXEC &&
392 	    interp_elf_ex->e_type != ET_DYN)
393 		goto out;
394 	if (!elf_check_arch(interp_elf_ex))
395 		goto out;
396 	if (!interpreter->f_op || !interpreter->f_op->mmap)
397 		goto out;
398 
399 	/*
400 	 * If the size of this structure has changed, then punt, since
401 	 * we will be doing the wrong thing.
402 	 */
403 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
404 		goto out;
405 	if (interp_elf_ex->e_phnum < 1 ||
406 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
407 		goto out;
408 
409 	/* Now read in all of the header information */
410 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
411 	if (size > ELF_MIN_ALIGN)
412 		goto out;
413 	elf_phdata = kmalloc(size, GFP_KERNEL);
414 	if (!elf_phdata)
415 		goto out;
416 
417 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
418 			     (char *)elf_phdata,size);
419 	error = -EIO;
420 	if (retval != size) {
421 		if (retval < 0)
422 			error = retval;
423 		goto out_close;
424 	}
425 
426 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
427 	if (!total_size) {
428 		error = -EINVAL;
429 		goto out_close;
430 	}
431 
432 	eppnt = elf_phdata;
433 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
434 		if (eppnt->p_type == PT_LOAD) {
435 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
436 			int elf_prot = 0;
437 			unsigned long vaddr = 0;
438 			unsigned long k, map_addr;
439 
440 			if (eppnt->p_flags & PF_R)
441 		    		elf_prot = PROT_READ;
442 			if (eppnt->p_flags & PF_W)
443 				elf_prot |= PROT_WRITE;
444 			if (eppnt->p_flags & PF_X)
445 				elf_prot |= PROT_EXEC;
446 			vaddr = eppnt->p_vaddr;
447 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
448 				elf_type |= MAP_FIXED;
449 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
450 				load_addr = -vaddr;
451 
452 			map_addr = elf_map(interpreter, load_addr + vaddr,
453 					eppnt, elf_prot, elf_type, total_size);
454 			total_size = 0;
455 			if (!*interp_map_addr)
456 				*interp_map_addr = map_addr;
457 			error = map_addr;
458 			if (BAD_ADDR(map_addr))
459 				goto out_close;
460 
461 			if (!load_addr_set &&
462 			    interp_elf_ex->e_type == ET_DYN) {
463 				load_addr = map_addr - ELF_PAGESTART(vaddr);
464 				load_addr_set = 1;
465 			}
466 
467 			/*
468 			 * Check to see if the section's size will overflow the
469 			 * allowed task size. Note that p_filesz must always be
470 			 * <= p_memsize so it's only necessary to check p_memsz.
471 			 */
472 			k = load_addr + eppnt->p_vaddr;
473 			if (BAD_ADDR(k) ||
474 			    eppnt->p_filesz > eppnt->p_memsz ||
475 			    eppnt->p_memsz > TASK_SIZE ||
476 			    TASK_SIZE - eppnt->p_memsz < k) {
477 				error = -ENOMEM;
478 				goto out_close;
479 			}
480 
481 			/*
482 			 * Find the end of the file mapping for this phdr, and
483 			 * keep track of the largest address we see for this.
484 			 */
485 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
486 			if (k > elf_bss)
487 				elf_bss = k;
488 
489 			/*
490 			 * Do the same thing for the memory mapping - between
491 			 * elf_bss and last_bss is the bss section.
492 			 */
493 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
494 			if (k > last_bss)
495 				last_bss = k;
496 		}
497 	}
498 
499 	/*
500 	 * Now fill out the bss section.  First pad the last page up
501 	 * to the page boundary, and then perform a mmap to make sure
502 	 * that there are zero-mapped pages up to and including the
503 	 * last bss page.
504 	 */
505 	if (padzero(elf_bss)) {
506 		error = -EFAULT;
507 		goto out_close;
508 	}
509 
510 	/* What we have mapped so far */
511 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
512 
513 	/* Map the last of the bss segment */
514 	if (last_bss > elf_bss) {
515 		down_write(&current->mm->mmap_sem);
516 		error = do_brk(elf_bss, last_bss - elf_bss);
517 		up_write(&current->mm->mmap_sem);
518 		if (BAD_ADDR(error))
519 			goto out_close;
520 	}
521 
522 	error = load_addr;
523 
524 out_close:
525 	kfree(elf_phdata);
526 out:
527 	return error;
528 }
529 
530 /*
531  * These are the functions used to load ELF style executables and shared
532  * libraries.  There is no binary dependent code anywhere else.
533  */
534 
535 #define INTERPRETER_NONE 0
536 #define INTERPRETER_ELF 2
537 
538 #ifndef STACK_RND_MASK
539 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
540 #endif
541 
542 static unsigned long randomize_stack_top(unsigned long stack_top)
543 {
544 	unsigned int random_variable = 0;
545 
546 	if ((current->flags & PF_RANDOMIZE) &&
547 		!(current->personality & ADDR_NO_RANDOMIZE)) {
548 		random_variable = get_random_int() & STACK_RND_MASK;
549 		random_variable <<= PAGE_SHIFT;
550 	}
551 #ifdef CONFIG_STACK_GROWSUP
552 	return PAGE_ALIGN(stack_top) + random_variable;
553 #else
554 	return PAGE_ALIGN(stack_top) - random_variable;
555 #endif
556 }
557 
558 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
559 {
560 	struct file *interpreter = NULL; /* to shut gcc up */
561  	unsigned long load_addr = 0, load_bias = 0;
562 	int load_addr_set = 0;
563 	char * elf_interpreter = NULL;
564 	unsigned long error;
565 	struct elf_phdr *elf_ppnt, *elf_phdata;
566 	unsigned long elf_bss, elf_brk;
567 	int elf_exec_fileno;
568 	int retval, i;
569 	unsigned int size;
570 	unsigned long elf_entry;
571 	unsigned long interp_load_addr = 0;
572 	unsigned long start_code, end_code, start_data, end_data;
573 	unsigned long reloc_func_desc = 0;
574 	int executable_stack = EXSTACK_DEFAULT;
575 	unsigned long def_flags = 0;
576 	struct {
577 		struct elfhdr elf_ex;
578 		struct elfhdr interp_elf_ex;
579 	} *loc;
580 
581 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582 	if (!loc) {
583 		retval = -ENOMEM;
584 		goto out_ret;
585 	}
586 
587 	/* Get the exec-header */
588 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
589 
590 	retval = -ENOEXEC;
591 	/* First of all, some simple consistency checks */
592 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593 		goto out;
594 
595 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596 		goto out;
597 	if (!elf_check_arch(&loc->elf_ex))
598 		goto out;
599 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
600 		goto out;
601 
602 	/* Now read in all of the header information */
603 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604 		goto out;
605 	if (loc->elf_ex.e_phnum < 1 ||
606 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607 		goto out;
608 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609 	retval = -ENOMEM;
610 	elf_phdata = kmalloc(size, GFP_KERNEL);
611 	if (!elf_phdata)
612 		goto out;
613 
614 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615 			     (char *)elf_phdata, size);
616 	if (retval != size) {
617 		if (retval >= 0)
618 			retval = -EIO;
619 		goto out_free_ph;
620 	}
621 
622 	retval = get_unused_fd();
623 	if (retval < 0)
624 		goto out_free_ph;
625 	get_file(bprm->file);
626 	fd_install(elf_exec_fileno = retval, bprm->file);
627 
628 	elf_ppnt = elf_phdata;
629 	elf_bss = 0;
630 	elf_brk = 0;
631 
632 	start_code = ~0UL;
633 	end_code = 0;
634 	start_data = 0;
635 	end_data = 0;
636 
637 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
638 		if (elf_ppnt->p_type == PT_INTERP) {
639 			/* This is the program interpreter used for
640 			 * shared libraries - for now assume that this
641 			 * is an a.out format binary
642 			 */
643 			retval = -ENOEXEC;
644 			if (elf_ppnt->p_filesz > PATH_MAX ||
645 			    elf_ppnt->p_filesz < 2)
646 				goto out_free_file;
647 
648 			retval = -ENOMEM;
649 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
650 						  GFP_KERNEL);
651 			if (!elf_interpreter)
652 				goto out_free_file;
653 
654 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
655 					     elf_interpreter,
656 					     elf_ppnt->p_filesz);
657 			if (retval != elf_ppnt->p_filesz) {
658 				if (retval >= 0)
659 					retval = -EIO;
660 				goto out_free_interp;
661 			}
662 			/* make sure path is NULL terminated */
663 			retval = -ENOEXEC;
664 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
665 				goto out_free_interp;
666 
667 			/*
668 			 * The early SET_PERSONALITY here is so that the lookup
669 			 * for the interpreter happens in the namespace of the
670 			 * to-be-execed image.  SET_PERSONALITY can select an
671 			 * alternate root.
672 			 *
673 			 * However, SET_PERSONALITY is NOT allowed to switch
674 			 * this task into the new images's memory mapping
675 			 * policy - that is, TASK_SIZE must still evaluate to
676 			 * that which is appropriate to the execing application.
677 			 * This is because exit_mmap() needs to have TASK_SIZE
678 			 * evaluate to the size of the old image.
679 			 *
680 			 * So if (say) a 64-bit application is execing a 32-bit
681 			 * application it is the architecture's responsibility
682 			 * to defer changing the value of TASK_SIZE until the
683 			 * switch really is going to happen - do this in
684 			 * flush_thread().	- akpm
685 			 */
686 			SET_PERSONALITY(loc->elf_ex, 0);
687 
688 			interpreter = open_exec(elf_interpreter);
689 			retval = PTR_ERR(interpreter);
690 			if (IS_ERR(interpreter))
691 				goto out_free_interp;
692 
693 			/*
694 			 * If the binary is not readable then enforce
695 			 * mm->dumpable = 0 regardless of the interpreter's
696 			 * permissions.
697 			 */
698 			if (file_permission(interpreter, MAY_READ) < 0)
699 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
700 
701 			retval = kernel_read(interpreter, 0, bprm->buf,
702 					     BINPRM_BUF_SIZE);
703 			if (retval != BINPRM_BUF_SIZE) {
704 				if (retval >= 0)
705 					retval = -EIO;
706 				goto out_free_dentry;
707 			}
708 
709 			/* Get the exec headers */
710 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
711 			break;
712 		}
713 		elf_ppnt++;
714 	}
715 
716 	elf_ppnt = elf_phdata;
717 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
718 		if (elf_ppnt->p_type == PT_GNU_STACK) {
719 			if (elf_ppnt->p_flags & PF_X)
720 				executable_stack = EXSTACK_ENABLE_X;
721 			else
722 				executable_stack = EXSTACK_DISABLE_X;
723 			break;
724 		}
725 
726 	/* Some simple consistency checks for the interpreter */
727 	if (elf_interpreter) {
728 		retval = -ELIBBAD;
729 		/* Not an ELF interpreter */
730 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
731 			goto out_free_dentry;
732 		/* Verify the interpreter has a valid arch */
733 		if (!elf_check_arch(&loc->interp_elf_ex))
734 			goto out_free_dentry;
735 	} else {
736 		/* Executables without an interpreter also need a personality  */
737 		SET_PERSONALITY(loc->elf_ex, 0);
738 	}
739 
740 	/* Flush all traces of the currently running executable */
741 	retval = flush_old_exec(bprm);
742 	if (retval)
743 		goto out_free_dentry;
744 
745 	/* OK, This is the point of no return */
746 	current->flags &= ~PF_FORKNOEXEC;
747 	current->mm->def_flags = def_flags;
748 
749 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
750 	   may depend on the personality.  */
751 	SET_PERSONALITY(loc->elf_ex, 0);
752 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
753 		current->personality |= READ_IMPLIES_EXEC;
754 
755 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
756 		current->flags |= PF_RANDOMIZE;
757 	arch_pick_mmap_layout(current->mm);
758 
759 	/* Do this so that we can load the interpreter, if need be.  We will
760 	   change some of these later */
761 	current->mm->free_area_cache = current->mm->mmap_base;
762 	current->mm->cached_hole_size = 0;
763 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
764 				 executable_stack);
765 	if (retval < 0) {
766 		send_sig(SIGKILL, current, 0);
767 		goto out_free_dentry;
768 	}
769 
770 	current->mm->start_stack = bprm->p;
771 
772 	/* Now we do a little grungy work by mmaping the ELF image into
773 	   the correct location in memory. */
774 	for(i = 0, elf_ppnt = elf_phdata;
775 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
776 		int elf_prot = 0, elf_flags;
777 		unsigned long k, vaddr;
778 
779 		if (elf_ppnt->p_type != PT_LOAD)
780 			continue;
781 
782 		if (unlikely (elf_brk > elf_bss)) {
783 			unsigned long nbyte;
784 
785 			/* There was a PT_LOAD segment with p_memsz > p_filesz
786 			   before this one. Map anonymous pages, if needed,
787 			   and clear the area.  */
788 			retval = set_brk (elf_bss + load_bias,
789 					  elf_brk + load_bias);
790 			if (retval) {
791 				send_sig(SIGKILL, current, 0);
792 				goto out_free_dentry;
793 			}
794 			nbyte = ELF_PAGEOFFSET(elf_bss);
795 			if (nbyte) {
796 				nbyte = ELF_MIN_ALIGN - nbyte;
797 				if (nbyte > elf_brk - elf_bss)
798 					nbyte = elf_brk - elf_bss;
799 				if (clear_user((void __user *)elf_bss +
800 							load_bias, nbyte)) {
801 					/*
802 					 * This bss-zeroing can fail if the ELF
803 					 * file specifies odd protections. So
804 					 * we don't check the return value
805 					 */
806 				}
807 			}
808 		}
809 
810 		if (elf_ppnt->p_flags & PF_R)
811 			elf_prot |= PROT_READ;
812 		if (elf_ppnt->p_flags & PF_W)
813 			elf_prot |= PROT_WRITE;
814 		if (elf_ppnt->p_flags & PF_X)
815 			elf_prot |= PROT_EXEC;
816 
817 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
818 
819 		vaddr = elf_ppnt->p_vaddr;
820 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
821 			elf_flags |= MAP_FIXED;
822 		} else if (loc->elf_ex.e_type == ET_DYN) {
823 			/* Try and get dynamic programs out of the way of the
824 			 * default mmap base, as well as whatever program they
825 			 * might try to exec.  This is because the brk will
826 			 * follow the loader, and is not movable.  */
827 #ifdef CONFIG_X86
828 			load_bias = 0;
829 #else
830 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
831 #endif
832 		}
833 
834 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
835 				elf_prot, elf_flags, 0);
836 		if (BAD_ADDR(error)) {
837 			send_sig(SIGKILL, current, 0);
838 			retval = IS_ERR((void *)error) ?
839 				PTR_ERR((void*)error) : -EINVAL;
840 			goto out_free_dentry;
841 		}
842 
843 		if (!load_addr_set) {
844 			load_addr_set = 1;
845 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
846 			if (loc->elf_ex.e_type == ET_DYN) {
847 				load_bias += error -
848 				             ELF_PAGESTART(load_bias + vaddr);
849 				load_addr += load_bias;
850 				reloc_func_desc = load_bias;
851 			}
852 		}
853 		k = elf_ppnt->p_vaddr;
854 		if (k < start_code)
855 			start_code = k;
856 		if (start_data < k)
857 			start_data = k;
858 
859 		/*
860 		 * Check to see if the section's size will overflow the
861 		 * allowed task size. Note that p_filesz must always be
862 		 * <= p_memsz so it is only necessary to check p_memsz.
863 		 */
864 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
865 		    elf_ppnt->p_memsz > TASK_SIZE ||
866 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
867 			/* set_brk can never work. Avoid overflows. */
868 			send_sig(SIGKILL, current, 0);
869 			retval = -EINVAL;
870 			goto out_free_dentry;
871 		}
872 
873 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
874 
875 		if (k > elf_bss)
876 			elf_bss = k;
877 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
878 			end_code = k;
879 		if (end_data < k)
880 			end_data = k;
881 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
882 		if (k > elf_brk)
883 			elf_brk = k;
884 	}
885 
886 	loc->elf_ex.e_entry += load_bias;
887 	elf_bss += load_bias;
888 	elf_brk += load_bias;
889 	start_code += load_bias;
890 	end_code += load_bias;
891 	start_data += load_bias;
892 	end_data += load_bias;
893 
894 	/* Calling set_brk effectively mmaps the pages that we need
895 	 * for the bss and break sections.  We must do this before
896 	 * mapping in the interpreter, to make sure it doesn't wind
897 	 * up getting placed where the bss needs to go.
898 	 */
899 	retval = set_brk(elf_bss, elf_brk);
900 	if (retval) {
901 		send_sig(SIGKILL, current, 0);
902 		goto out_free_dentry;
903 	}
904 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
905 		send_sig(SIGSEGV, current, 0);
906 		retval = -EFAULT; /* Nobody gets to see this, but.. */
907 		goto out_free_dentry;
908 	}
909 
910 	if (elf_interpreter) {
911 		unsigned long uninitialized_var(interp_map_addr);
912 
913 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
914 					    interpreter,
915 					    &interp_map_addr,
916 					    load_bias);
917 		if (!IS_ERR((void *)elf_entry)) {
918 			/*
919 			 * load_elf_interp() returns relocation
920 			 * adjustment
921 			 */
922 			interp_load_addr = elf_entry;
923 			elf_entry += loc->interp_elf_ex.e_entry;
924 		}
925 		if (BAD_ADDR(elf_entry)) {
926 			force_sig(SIGSEGV, current);
927 			retval = IS_ERR((void *)elf_entry) ?
928 					(int)elf_entry : -EINVAL;
929 			goto out_free_dentry;
930 		}
931 		reloc_func_desc = interp_load_addr;
932 
933 		allow_write_access(interpreter);
934 		fput(interpreter);
935 		kfree(elf_interpreter);
936 	} else {
937 		elf_entry = loc->elf_ex.e_entry;
938 		if (BAD_ADDR(elf_entry)) {
939 			force_sig(SIGSEGV, current);
940 			retval = -EINVAL;
941 			goto out_free_dentry;
942 		}
943 	}
944 
945 	kfree(elf_phdata);
946 
947 	sys_close(elf_exec_fileno);
948 
949 	set_binfmt(&elf_format);
950 
951 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
952 	retval = arch_setup_additional_pages(bprm, executable_stack);
953 	if (retval < 0) {
954 		send_sig(SIGKILL, current, 0);
955 		goto out;
956 	}
957 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
958 
959 	compute_creds(bprm);
960 	current->flags &= ~PF_FORKNOEXEC;
961 	retval = create_elf_tables(bprm, &loc->elf_ex,
962 			  load_addr, interp_load_addr);
963 	if (retval < 0) {
964 		send_sig(SIGKILL, current, 0);
965 		goto out;
966 	}
967 	/* N.B. passed_fileno might not be initialized? */
968 	current->mm->end_code = end_code;
969 	current->mm->start_code = start_code;
970 	current->mm->start_data = start_data;
971 	current->mm->end_data = end_data;
972 	current->mm->start_stack = bprm->p;
973 
974 #ifdef arch_randomize_brk
975 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
976 		current->mm->brk = current->mm->start_brk =
977 			arch_randomize_brk(current->mm);
978 #endif
979 
980 	if (current->personality & MMAP_PAGE_ZERO) {
981 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
982 		   and some applications "depend" upon this behavior.
983 		   Since we do not have the power to recompile these, we
984 		   emulate the SVr4 behavior. Sigh. */
985 		down_write(&current->mm->mmap_sem);
986 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
987 				MAP_FIXED | MAP_PRIVATE, 0);
988 		up_write(&current->mm->mmap_sem);
989 	}
990 
991 #ifdef ELF_PLAT_INIT
992 	/*
993 	 * The ABI may specify that certain registers be set up in special
994 	 * ways (on i386 %edx is the address of a DT_FINI function, for
995 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
996 	 * that the e_entry field is the address of the function descriptor
997 	 * for the startup routine, rather than the address of the startup
998 	 * routine itself.  This macro performs whatever initialization to
999 	 * the regs structure is required as well as any relocations to the
1000 	 * function descriptor entries when executing dynamically links apps.
1001 	 */
1002 	ELF_PLAT_INIT(regs, reloc_func_desc);
1003 #endif
1004 
1005 	start_thread(regs, elf_entry, bprm->p);
1006 	retval = 0;
1007 out:
1008 	kfree(loc);
1009 out_ret:
1010 	return retval;
1011 
1012 	/* error cleanup */
1013 out_free_dentry:
1014 	allow_write_access(interpreter);
1015 	if (interpreter)
1016 		fput(interpreter);
1017 out_free_interp:
1018 	kfree(elf_interpreter);
1019 out_free_file:
1020 	sys_close(elf_exec_fileno);
1021 out_free_ph:
1022 	kfree(elf_phdata);
1023 	goto out;
1024 }
1025 
1026 /* This is really simpleminded and specialized - we are loading an
1027    a.out library that is given an ELF header. */
1028 static int load_elf_library(struct file *file)
1029 {
1030 	struct elf_phdr *elf_phdata;
1031 	struct elf_phdr *eppnt;
1032 	unsigned long elf_bss, bss, len;
1033 	int retval, error, i, j;
1034 	struct elfhdr elf_ex;
1035 
1036 	error = -ENOEXEC;
1037 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1038 	if (retval != sizeof(elf_ex))
1039 		goto out;
1040 
1041 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1042 		goto out;
1043 
1044 	/* First of all, some simple consistency checks */
1045 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1046 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1047 		goto out;
1048 
1049 	/* Now read in all of the header information */
1050 
1051 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1052 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1053 
1054 	error = -ENOMEM;
1055 	elf_phdata = kmalloc(j, GFP_KERNEL);
1056 	if (!elf_phdata)
1057 		goto out;
1058 
1059 	eppnt = elf_phdata;
1060 	error = -ENOEXEC;
1061 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1062 	if (retval != j)
1063 		goto out_free_ph;
1064 
1065 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1066 		if ((eppnt + i)->p_type == PT_LOAD)
1067 			j++;
1068 	if (j != 1)
1069 		goto out_free_ph;
1070 
1071 	while (eppnt->p_type != PT_LOAD)
1072 		eppnt++;
1073 
1074 	/* Now use mmap to map the library into memory. */
1075 	down_write(&current->mm->mmap_sem);
1076 	error = do_mmap(file,
1077 			ELF_PAGESTART(eppnt->p_vaddr),
1078 			(eppnt->p_filesz +
1079 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1080 			PROT_READ | PROT_WRITE | PROT_EXEC,
1081 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1082 			(eppnt->p_offset -
1083 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1084 	up_write(&current->mm->mmap_sem);
1085 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1086 		goto out_free_ph;
1087 
1088 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1089 	if (padzero(elf_bss)) {
1090 		error = -EFAULT;
1091 		goto out_free_ph;
1092 	}
1093 
1094 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1095 			    ELF_MIN_ALIGN - 1);
1096 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1097 	if (bss > len) {
1098 		down_write(&current->mm->mmap_sem);
1099 		do_brk(len, bss - len);
1100 		up_write(&current->mm->mmap_sem);
1101 	}
1102 	error = 0;
1103 
1104 out_free_ph:
1105 	kfree(elf_phdata);
1106 out:
1107 	return error;
1108 }
1109 
1110 /*
1111  * Note that some platforms still use traditional core dumps and not
1112  * the ELF core dump.  Each platform can select it as appropriate.
1113  */
1114 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1115 
1116 /*
1117  * ELF core dumper
1118  *
1119  * Modelled on fs/exec.c:aout_core_dump()
1120  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1121  */
1122 /*
1123  * These are the only things you should do on a core-file: use only these
1124  * functions to write out all the necessary info.
1125  */
1126 static int dump_write(struct file *file, const void *addr, int nr)
1127 {
1128 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1129 }
1130 
1131 static int dump_seek(struct file *file, loff_t off)
1132 {
1133 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1134 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1135 			return 0;
1136 	} else {
1137 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1138 		if (!buf)
1139 			return 0;
1140 		while (off > 0) {
1141 			unsigned long n = off;
1142 			if (n > PAGE_SIZE)
1143 				n = PAGE_SIZE;
1144 			if (!dump_write(file, buf, n))
1145 				return 0;
1146 			off -= n;
1147 		}
1148 		free_page((unsigned long)buf);
1149 	}
1150 	return 1;
1151 }
1152 
1153 /*
1154  * Decide what to dump of a segment, part, all or none.
1155  */
1156 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 				   unsigned long mm_flags)
1158 {
1159 	/* The vma can be set up to tell us the answer directly.  */
1160 	if (vma->vm_flags & VM_ALWAYSDUMP)
1161 		goto whole;
1162 
1163 	/* Do not dump I/O mapped devices or special mappings */
1164 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1165 		return 0;
1166 
1167 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1168 
1169 	/* By default, dump shared memory if mapped from an anonymous file. */
1170 	if (vma->vm_flags & VM_SHARED) {
1171 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1172 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1173 			goto whole;
1174 		return 0;
1175 	}
1176 
1177 	/* Dump segments that have been written to.  */
1178 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1179 		goto whole;
1180 	if (vma->vm_file == NULL)
1181 		return 0;
1182 
1183 	if (FILTER(MAPPED_PRIVATE))
1184 		goto whole;
1185 
1186 	/*
1187 	 * If this looks like the beginning of a DSO or executable mapping,
1188 	 * check for an ELF header.  If we find one, dump the first page to
1189 	 * aid in determining what was mapped here.
1190 	 */
1191 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1192 		u32 __user *header = (u32 __user *) vma->vm_start;
1193 		u32 word;
1194 		/*
1195 		 * Doing it this way gets the constant folded by GCC.
1196 		 */
1197 		union {
1198 			u32 cmp;
1199 			char elfmag[SELFMAG];
1200 		} magic;
1201 		BUILD_BUG_ON(SELFMAG != sizeof word);
1202 		magic.elfmag[EI_MAG0] = ELFMAG0;
1203 		magic.elfmag[EI_MAG1] = ELFMAG1;
1204 		magic.elfmag[EI_MAG2] = ELFMAG2;
1205 		magic.elfmag[EI_MAG3] = ELFMAG3;
1206 		if (get_user(word, header) == 0 && word == magic.cmp)
1207 			return PAGE_SIZE;
1208 	}
1209 
1210 #undef	FILTER
1211 
1212 	return 0;
1213 
1214 whole:
1215 	return vma->vm_end - vma->vm_start;
1216 }
1217 
1218 /* An ELF note in memory */
1219 struct memelfnote
1220 {
1221 	const char *name;
1222 	int type;
1223 	unsigned int datasz;
1224 	void *data;
1225 };
1226 
1227 static int notesize(struct memelfnote *en)
1228 {
1229 	int sz;
1230 
1231 	sz = sizeof(struct elf_note);
1232 	sz += roundup(strlen(en->name) + 1, 4);
1233 	sz += roundup(en->datasz, 4);
1234 
1235 	return sz;
1236 }
1237 
1238 #define DUMP_WRITE(addr, nr, foffset)	\
1239 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1240 
1241 static int alignfile(struct file *file, loff_t *foffset)
1242 {
1243 	static const char buf[4] = { 0, };
1244 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1245 	return 1;
1246 }
1247 
1248 static int writenote(struct memelfnote *men, struct file *file,
1249 			loff_t *foffset)
1250 {
1251 	struct elf_note en;
1252 	en.n_namesz = strlen(men->name) + 1;
1253 	en.n_descsz = men->datasz;
1254 	en.n_type = men->type;
1255 
1256 	DUMP_WRITE(&en, sizeof(en), foffset);
1257 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1258 	if (!alignfile(file, foffset))
1259 		return 0;
1260 	DUMP_WRITE(men->data, men->datasz, foffset);
1261 	if (!alignfile(file, foffset))
1262 		return 0;
1263 
1264 	return 1;
1265 }
1266 #undef DUMP_WRITE
1267 
1268 #define DUMP_WRITE(addr, nr)	\
1269 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1270 		goto end_coredump;
1271 #define DUMP_SEEK(off)	\
1272 	if (!dump_seek(file, (off))) \
1273 		goto end_coredump;
1274 
1275 static void fill_elf_header(struct elfhdr *elf, int segs,
1276 			    u16 machine, u32 flags, u8 osabi)
1277 {
1278 	memset(elf, 0, sizeof(*elf));
1279 
1280 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1281 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1282 	elf->e_ident[EI_DATA] = ELF_DATA;
1283 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1284 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1285 
1286 	elf->e_type = ET_CORE;
1287 	elf->e_machine = machine;
1288 	elf->e_version = EV_CURRENT;
1289 	elf->e_phoff = sizeof(struct elfhdr);
1290 	elf->e_flags = flags;
1291 	elf->e_ehsize = sizeof(struct elfhdr);
1292 	elf->e_phentsize = sizeof(struct elf_phdr);
1293 	elf->e_phnum = segs;
1294 
1295 	return;
1296 }
1297 
1298 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1299 {
1300 	phdr->p_type = PT_NOTE;
1301 	phdr->p_offset = offset;
1302 	phdr->p_vaddr = 0;
1303 	phdr->p_paddr = 0;
1304 	phdr->p_filesz = sz;
1305 	phdr->p_memsz = 0;
1306 	phdr->p_flags = 0;
1307 	phdr->p_align = 0;
1308 	return;
1309 }
1310 
1311 static void fill_note(struct memelfnote *note, const char *name, int type,
1312 		unsigned int sz, void *data)
1313 {
1314 	note->name = name;
1315 	note->type = type;
1316 	note->datasz = sz;
1317 	note->data = data;
1318 	return;
1319 }
1320 
1321 /*
1322  * fill up all the fields in prstatus from the given task struct, except
1323  * registers which need to be filled up separately.
1324  */
1325 static void fill_prstatus(struct elf_prstatus *prstatus,
1326 		struct task_struct *p, long signr)
1327 {
1328 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1329 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1330 	prstatus->pr_sighold = p->blocked.sig[0];
1331 	prstatus->pr_pid = task_pid_vnr(p);
1332 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1333 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1334 	prstatus->pr_sid = task_session_vnr(p);
1335 	if (thread_group_leader(p)) {
1336 		/*
1337 		 * This is the record for the group leader.  Add in the
1338 		 * cumulative times of previous dead threads.  This total
1339 		 * won't include the time of each live thread whose state
1340 		 * is included in the core dump.  The final total reported
1341 		 * to our parent process when it calls wait4 will include
1342 		 * those sums as well as the little bit more time it takes
1343 		 * this and each other thread to finish dying after the
1344 		 * core dump synchronization phase.
1345 		 */
1346 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1347 				   &prstatus->pr_utime);
1348 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1349 				   &prstatus->pr_stime);
1350 	} else {
1351 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1352 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1353 	}
1354 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1355 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1356 }
1357 
1358 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1359 		       struct mm_struct *mm)
1360 {
1361 	unsigned int i, len;
1362 
1363 	/* first copy the parameters from user space */
1364 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1365 
1366 	len = mm->arg_end - mm->arg_start;
1367 	if (len >= ELF_PRARGSZ)
1368 		len = ELF_PRARGSZ-1;
1369 	if (copy_from_user(&psinfo->pr_psargs,
1370 		           (const char __user *)mm->arg_start, len))
1371 		return -EFAULT;
1372 	for(i = 0; i < len; i++)
1373 		if (psinfo->pr_psargs[i] == 0)
1374 			psinfo->pr_psargs[i] = ' ';
1375 	psinfo->pr_psargs[len] = 0;
1376 
1377 	psinfo->pr_pid = task_pid_vnr(p);
1378 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1379 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1380 	psinfo->pr_sid = task_session_vnr(p);
1381 
1382 	i = p->state ? ffz(~p->state) + 1 : 0;
1383 	psinfo->pr_state = i;
1384 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1385 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1386 	psinfo->pr_nice = task_nice(p);
1387 	psinfo->pr_flag = p->flags;
1388 	SET_UID(psinfo->pr_uid, p->uid);
1389 	SET_GID(psinfo->pr_gid, p->gid);
1390 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1391 
1392 	return 0;
1393 }
1394 
1395 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1396 {
1397 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1398 	int i = 0;
1399 	do
1400 		i += 2;
1401 	while (auxv[i - 2] != AT_NULL);
1402 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1403 }
1404 
1405 #ifdef CORE_DUMP_USE_REGSET
1406 #include <linux/regset.h>
1407 
1408 struct elf_thread_core_info {
1409 	struct elf_thread_core_info *next;
1410 	struct task_struct *task;
1411 	struct elf_prstatus prstatus;
1412 	struct memelfnote notes[0];
1413 };
1414 
1415 struct elf_note_info {
1416 	struct elf_thread_core_info *thread;
1417 	struct memelfnote psinfo;
1418 	struct memelfnote auxv;
1419 	size_t size;
1420 	int thread_notes;
1421 };
1422 
1423 /*
1424  * When a regset has a writeback hook, we call it on each thread before
1425  * dumping user memory.  On register window machines, this makes sure the
1426  * user memory backing the register data is up to date before we read it.
1427  */
1428 static void do_thread_regset_writeback(struct task_struct *task,
1429 				       const struct user_regset *regset)
1430 {
1431 	if (regset->writeback)
1432 		regset->writeback(task, regset, 1);
1433 }
1434 
1435 static int fill_thread_core_info(struct elf_thread_core_info *t,
1436 				 const struct user_regset_view *view,
1437 				 long signr, size_t *total)
1438 {
1439 	unsigned int i;
1440 
1441 	/*
1442 	 * NT_PRSTATUS is the one special case, because the regset data
1443 	 * goes into the pr_reg field inside the note contents, rather
1444 	 * than being the whole note contents.  We fill the reset in here.
1445 	 * We assume that regset 0 is NT_PRSTATUS.
1446 	 */
1447 	fill_prstatus(&t->prstatus, t->task, signr);
1448 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1449 				    0, sizeof(t->prstatus.pr_reg),
1450 				    &t->prstatus.pr_reg, NULL);
1451 
1452 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1453 		  sizeof(t->prstatus), &t->prstatus);
1454 	*total += notesize(&t->notes[0]);
1455 
1456 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1457 
1458 	/*
1459 	 * Each other regset might generate a note too.  For each regset
1460 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1461 	 * all zero and we'll know to skip writing it later.
1462 	 */
1463 	for (i = 1; i < view->n; ++i) {
1464 		const struct user_regset *regset = &view->regsets[i];
1465 		do_thread_regset_writeback(t->task, regset);
1466 		if (regset->core_note_type &&
1467 		    (!regset->active || regset->active(t->task, regset))) {
1468 			int ret;
1469 			size_t size = regset->n * regset->size;
1470 			void *data = kmalloc(size, GFP_KERNEL);
1471 			if (unlikely(!data))
1472 				return 0;
1473 			ret = regset->get(t->task, regset,
1474 					  0, size, data, NULL);
1475 			if (unlikely(ret))
1476 				kfree(data);
1477 			else {
1478 				if (regset->core_note_type != NT_PRFPREG)
1479 					fill_note(&t->notes[i], "LINUX",
1480 						  regset->core_note_type,
1481 						  size, data);
1482 				else {
1483 					t->prstatus.pr_fpvalid = 1;
1484 					fill_note(&t->notes[i], "CORE",
1485 						  NT_PRFPREG, size, data);
1486 				}
1487 				*total += notesize(&t->notes[i]);
1488 			}
1489 		}
1490 	}
1491 
1492 	return 1;
1493 }
1494 
1495 static int fill_note_info(struct elfhdr *elf, int phdrs,
1496 			  struct elf_note_info *info,
1497 			  long signr, struct pt_regs *regs)
1498 {
1499 	struct task_struct *dump_task = current;
1500 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1501 	struct elf_thread_core_info *t;
1502 	struct elf_prpsinfo *psinfo;
1503 	struct core_thread *ct;
1504 	unsigned int i;
1505 
1506 	info->size = 0;
1507 	info->thread = NULL;
1508 
1509 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1510 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1511 
1512 	if (psinfo == NULL)
1513 		return 0;
1514 
1515 	/*
1516 	 * Figure out how many notes we're going to need for each thread.
1517 	 */
1518 	info->thread_notes = 0;
1519 	for (i = 0; i < view->n; ++i)
1520 		if (view->regsets[i].core_note_type != 0)
1521 			++info->thread_notes;
1522 
1523 	/*
1524 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1525 	 * since it is our one special case.
1526 	 */
1527 	if (unlikely(info->thread_notes == 0) ||
1528 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1529 		WARN_ON(1);
1530 		return 0;
1531 	}
1532 
1533 	/*
1534 	 * Initialize the ELF file header.
1535 	 */
1536 	fill_elf_header(elf, phdrs,
1537 			view->e_machine, view->e_flags, view->ei_osabi);
1538 
1539 	/*
1540 	 * Allocate a structure for each thread.
1541 	 */
1542 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1543 		t = kzalloc(offsetof(struct elf_thread_core_info,
1544 				     notes[info->thread_notes]),
1545 			    GFP_KERNEL);
1546 		if (unlikely(!t))
1547 			return 0;
1548 
1549 		t->task = ct->task;
1550 		if (ct->task == dump_task || !info->thread) {
1551 			t->next = info->thread;
1552 			info->thread = t;
1553 		} else {
1554 			/*
1555 			 * Make sure to keep the original task at
1556 			 * the head of the list.
1557 			 */
1558 			t->next = info->thread->next;
1559 			info->thread->next = t;
1560 		}
1561 	}
1562 
1563 	/*
1564 	 * Now fill in each thread's information.
1565 	 */
1566 	for (t = info->thread; t != NULL; t = t->next)
1567 		if (!fill_thread_core_info(t, view, signr, &info->size))
1568 			return 0;
1569 
1570 	/*
1571 	 * Fill in the two process-wide notes.
1572 	 */
1573 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1574 	info->size += notesize(&info->psinfo);
1575 
1576 	fill_auxv_note(&info->auxv, current->mm);
1577 	info->size += notesize(&info->auxv);
1578 
1579 	return 1;
1580 }
1581 
1582 static size_t get_note_info_size(struct elf_note_info *info)
1583 {
1584 	return info->size;
1585 }
1586 
1587 /*
1588  * Write all the notes for each thread.  When writing the first thread, the
1589  * process-wide notes are interleaved after the first thread-specific note.
1590  */
1591 static int write_note_info(struct elf_note_info *info,
1592 			   struct file *file, loff_t *foffset)
1593 {
1594 	bool first = 1;
1595 	struct elf_thread_core_info *t = info->thread;
1596 
1597 	do {
1598 		int i;
1599 
1600 		if (!writenote(&t->notes[0], file, foffset))
1601 			return 0;
1602 
1603 		if (first && !writenote(&info->psinfo, file, foffset))
1604 			return 0;
1605 		if (first && !writenote(&info->auxv, file, foffset))
1606 			return 0;
1607 
1608 		for (i = 1; i < info->thread_notes; ++i)
1609 			if (t->notes[i].data &&
1610 			    !writenote(&t->notes[i], file, foffset))
1611 				return 0;
1612 
1613 		first = 0;
1614 		t = t->next;
1615 	} while (t);
1616 
1617 	return 1;
1618 }
1619 
1620 static void free_note_info(struct elf_note_info *info)
1621 {
1622 	struct elf_thread_core_info *threads = info->thread;
1623 	while (threads) {
1624 		unsigned int i;
1625 		struct elf_thread_core_info *t = threads;
1626 		threads = t->next;
1627 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1628 		for (i = 1; i < info->thread_notes; ++i)
1629 			kfree(t->notes[i].data);
1630 		kfree(t);
1631 	}
1632 	kfree(info->psinfo.data);
1633 }
1634 
1635 #else
1636 
1637 /* Here is the structure in which status of each thread is captured. */
1638 struct elf_thread_status
1639 {
1640 	struct list_head list;
1641 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1642 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1643 	struct task_struct *thread;
1644 #ifdef ELF_CORE_COPY_XFPREGS
1645 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1646 #endif
1647 	struct memelfnote notes[3];
1648 	int num_notes;
1649 };
1650 
1651 /*
1652  * In order to add the specific thread information for the elf file format,
1653  * we need to keep a linked list of every threads pr_status and then create
1654  * a single section for them in the final core file.
1655  */
1656 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1657 {
1658 	int sz = 0;
1659 	struct task_struct *p = t->thread;
1660 	t->num_notes = 0;
1661 
1662 	fill_prstatus(&t->prstatus, p, signr);
1663 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1664 
1665 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1666 		  &(t->prstatus));
1667 	t->num_notes++;
1668 	sz += notesize(&t->notes[0]);
1669 
1670 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1671 								&t->fpu))) {
1672 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1673 			  &(t->fpu));
1674 		t->num_notes++;
1675 		sz += notesize(&t->notes[1]);
1676 	}
1677 
1678 #ifdef ELF_CORE_COPY_XFPREGS
1679 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1680 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1681 			  sizeof(t->xfpu), &t->xfpu);
1682 		t->num_notes++;
1683 		sz += notesize(&t->notes[2]);
1684 	}
1685 #endif
1686 	return sz;
1687 }
1688 
1689 struct elf_note_info {
1690 	struct memelfnote *notes;
1691 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1692 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1693 	struct list_head thread_list;
1694 	elf_fpregset_t *fpu;
1695 #ifdef ELF_CORE_COPY_XFPREGS
1696 	elf_fpxregset_t *xfpu;
1697 #endif
1698 	int thread_status_size;
1699 	int numnote;
1700 };
1701 
1702 static int fill_note_info(struct elfhdr *elf, int phdrs,
1703 			  struct elf_note_info *info,
1704 			  long signr, struct pt_regs *regs)
1705 {
1706 #define	NUM_NOTES	6
1707 	struct list_head *t;
1708 
1709 	info->notes = NULL;
1710 	info->prstatus = NULL;
1711 	info->psinfo = NULL;
1712 	info->fpu = NULL;
1713 #ifdef ELF_CORE_COPY_XFPREGS
1714 	info->xfpu = NULL;
1715 #endif
1716 	INIT_LIST_HEAD(&info->thread_list);
1717 
1718 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1719 			      GFP_KERNEL);
1720 	if (!info->notes)
1721 		return 0;
1722 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1723 	if (!info->psinfo)
1724 		return 0;
1725 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1726 	if (!info->prstatus)
1727 		return 0;
1728 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1729 	if (!info->fpu)
1730 		return 0;
1731 #ifdef ELF_CORE_COPY_XFPREGS
1732 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1733 	if (!info->xfpu)
1734 		return 0;
1735 #endif
1736 
1737 	info->thread_status_size = 0;
1738 	if (signr) {
1739 		struct core_thread *ct;
1740 		struct elf_thread_status *ets;
1741 
1742 		for (ct = current->mm->core_state->dumper.next;
1743 						ct; ct = ct->next) {
1744 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1745 			if (!ets)
1746 				return 0;
1747 
1748 			ets->thread = ct->task;
1749 			list_add(&ets->list, &info->thread_list);
1750 		}
1751 
1752 		list_for_each(t, &info->thread_list) {
1753 			int sz;
1754 
1755 			ets = list_entry(t, struct elf_thread_status, list);
1756 			sz = elf_dump_thread_status(signr, ets);
1757 			info->thread_status_size += sz;
1758 		}
1759 	}
1760 	/* now collect the dump for the current */
1761 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1762 	fill_prstatus(info->prstatus, current, signr);
1763 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1764 
1765 	/* Set up header */
1766 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1767 
1768 	/*
1769 	 * Set up the notes in similar form to SVR4 core dumps made
1770 	 * with info from their /proc.
1771 	 */
1772 
1773 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1774 		  sizeof(*info->prstatus), info->prstatus);
1775 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1776 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1777 		  sizeof(*info->psinfo), info->psinfo);
1778 
1779 	info->numnote = 2;
1780 
1781 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1782 
1783 	/* Try to dump the FPU. */
1784 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1785 							       info->fpu);
1786 	if (info->prstatus->pr_fpvalid)
1787 		fill_note(info->notes + info->numnote++,
1788 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1789 #ifdef ELF_CORE_COPY_XFPREGS
1790 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1791 		fill_note(info->notes + info->numnote++,
1792 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1793 			  sizeof(*info->xfpu), info->xfpu);
1794 #endif
1795 
1796 	return 1;
1797 
1798 #undef NUM_NOTES
1799 }
1800 
1801 static size_t get_note_info_size(struct elf_note_info *info)
1802 {
1803 	int sz = 0;
1804 	int i;
1805 
1806 	for (i = 0; i < info->numnote; i++)
1807 		sz += notesize(info->notes + i);
1808 
1809 	sz += info->thread_status_size;
1810 
1811 	return sz;
1812 }
1813 
1814 static int write_note_info(struct elf_note_info *info,
1815 			   struct file *file, loff_t *foffset)
1816 {
1817 	int i;
1818 	struct list_head *t;
1819 
1820 	for (i = 0; i < info->numnote; i++)
1821 		if (!writenote(info->notes + i, file, foffset))
1822 			return 0;
1823 
1824 	/* write out the thread status notes section */
1825 	list_for_each(t, &info->thread_list) {
1826 		struct elf_thread_status *tmp =
1827 				list_entry(t, struct elf_thread_status, list);
1828 
1829 		for (i = 0; i < tmp->num_notes; i++)
1830 			if (!writenote(&tmp->notes[i], file, foffset))
1831 				return 0;
1832 	}
1833 
1834 	return 1;
1835 }
1836 
1837 static void free_note_info(struct elf_note_info *info)
1838 {
1839 	while (!list_empty(&info->thread_list)) {
1840 		struct list_head *tmp = info->thread_list.next;
1841 		list_del(tmp);
1842 		kfree(list_entry(tmp, struct elf_thread_status, list));
1843 	}
1844 
1845 	kfree(info->prstatus);
1846 	kfree(info->psinfo);
1847 	kfree(info->notes);
1848 	kfree(info->fpu);
1849 #ifdef ELF_CORE_COPY_XFPREGS
1850 	kfree(info->xfpu);
1851 #endif
1852 }
1853 
1854 #endif
1855 
1856 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1857 					struct vm_area_struct *gate_vma)
1858 {
1859 	struct vm_area_struct *ret = tsk->mm->mmap;
1860 
1861 	if (ret)
1862 		return ret;
1863 	return gate_vma;
1864 }
1865 /*
1866  * Helper function for iterating across a vma list.  It ensures that the caller
1867  * will visit `gate_vma' prior to terminating the search.
1868  */
1869 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1870 					struct vm_area_struct *gate_vma)
1871 {
1872 	struct vm_area_struct *ret;
1873 
1874 	ret = this_vma->vm_next;
1875 	if (ret)
1876 		return ret;
1877 	if (this_vma == gate_vma)
1878 		return NULL;
1879 	return gate_vma;
1880 }
1881 
1882 /*
1883  * Actual dumper
1884  *
1885  * This is a two-pass process; first we find the offsets of the bits,
1886  * and then they are actually written out.  If we run out of core limit
1887  * we just truncate.
1888  */
1889 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1890 {
1891 	int has_dumped = 0;
1892 	mm_segment_t fs;
1893 	int segs;
1894 	size_t size = 0;
1895 	struct vm_area_struct *vma, *gate_vma;
1896 	struct elfhdr *elf = NULL;
1897 	loff_t offset = 0, dataoff, foffset;
1898 	unsigned long mm_flags;
1899 	struct elf_note_info info;
1900 
1901 	/*
1902 	 * We no longer stop all VM operations.
1903 	 *
1904 	 * This is because those proceses that could possibly change map_count
1905 	 * or the mmap / vma pages are now blocked in do_exit on current
1906 	 * finishing this core dump.
1907 	 *
1908 	 * Only ptrace can touch these memory addresses, but it doesn't change
1909 	 * the map_count or the pages allocated. So no possibility of crashing
1910 	 * exists while dumping the mm->vm_next areas to the core file.
1911 	 */
1912 
1913 	/* alloc memory for large data structures: too large to be on stack */
1914 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1915 	if (!elf)
1916 		goto out;
1917 
1918 	segs = current->mm->map_count;
1919 #ifdef ELF_CORE_EXTRA_PHDRS
1920 	segs += ELF_CORE_EXTRA_PHDRS;
1921 #endif
1922 
1923 	gate_vma = get_gate_vma(current);
1924 	if (gate_vma != NULL)
1925 		segs++;
1926 
1927 	/*
1928 	 * Collect all the non-memory information about the process for the
1929 	 * notes.  This also sets up the file header.
1930 	 */
1931 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1932 			    &info, signr, regs))
1933 		goto cleanup;
1934 
1935 	has_dumped = 1;
1936 	current->flags |= PF_DUMPCORE;
1937 
1938 	fs = get_fs();
1939 	set_fs(KERNEL_DS);
1940 
1941 	DUMP_WRITE(elf, sizeof(*elf));
1942 	offset += sizeof(*elf);				/* Elf header */
1943 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1944 	foffset = offset;
1945 
1946 	/* Write notes phdr entry */
1947 	{
1948 		struct elf_phdr phdr;
1949 		size_t sz = get_note_info_size(&info);
1950 
1951 		sz += elf_coredump_extra_notes_size();
1952 
1953 		fill_elf_note_phdr(&phdr, sz, offset);
1954 		offset += sz;
1955 		DUMP_WRITE(&phdr, sizeof(phdr));
1956 	}
1957 
1958 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1959 
1960 	/*
1961 	 * We must use the same mm->flags while dumping core to avoid
1962 	 * inconsistency between the program headers and bodies, otherwise an
1963 	 * unusable core file can be generated.
1964 	 */
1965 	mm_flags = current->mm->flags;
1966 
1967 	/* Write program headers for segments dump */
1968 	for (vma = first_vma(current, gate_vma); vma != NULL;
1969 			vma = next_vma(vma, gate_vma)) {
1970 		struct elf_phdr phdr;
1971 
1972 		phdr.p_type = PT_LOAD;
1973 		phdr.p_offset = offset;
1974 		phdr.p_vaddr = vma->vm_start;
1975 		phdr.p_paddr = 0;
1976 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1977 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1978 		offset += phdr.p_filesz;
1979 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1980 		if (vma->vm_flags & VM_WRITE)
1981 			phdr.p_flags |= PF_W;
1982 		if (vma->vm_flags & VM_EXEC)
1983 			phdr.p_flags |= PF_X;
1984 		phdr.p_align = ELF_EXEC_PAGESIZE;
1985 
1986 		DUMP_WRITE(&phdr, sizeof(phdr));
1987 	}
1988 
1989 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1990 	ELF_CORE_WRITE_EXTRA_PHDRS;
1991 #endif
1992 
1993  	/* write out the notes section */
1994 	if (!write_note_info(&info, file, &foffset))
1995 		goto end_coredump;
1996 
1997 	if (elf_coredump_extra_notes_write(file, &foffset))
1998 		goto end_coredump;
1999 
2000 	/* Align to page */
2001 	DUMP_SEEK(dataoff - foffset);
2002 
2003 	for (vma = first_vma(current, gate_vma); vma != NULL;
2004 			vma = next_vma(vma, gate_vma)) {
2005 		unsigned long addr;
2006 		unsigned long end;
2007 
2008 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2009 
2010 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2011 			struct page *page;
2012 			struct vm_area_struct *tmp_vma;
2013 
2014 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2015 						&page, &tmp_vma) <= 0) {
2016 				DUMP_SEEK(PAGE_SIZE);
2017 			} else {
2018 				if (page == ZERO_PAGE(0)) {
2019 					if (!dump_seek(file, PAGE_SIZE)) {
2020 						page_cache_release(page);
2021 						goto end_coredump;
2022 					}
2023 				} else {
2024 					void *kaddr;
2025 					flush_cache_page(tmp_vma, addr,
2026 							 page_to_pfn(page));
2027 					kaddr = kmap(page);
2028 					if ((size += PAGE_SIZE) > limit ||
2029 					    !dump_write(file, kaddr,
2030 					    PAGE_SIZE)) {
2031 						kunmap(page);
2032 						page_cache_release(page);
2033 						goto end_coredump;
2034 					}
2035 					kunmap(page);
2036 				}
2037 				page_cache_release(page);
2038 			}
2039 		}
2040 	}
2041 
2042 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2043 	ELF_CORE_WRITE_EXTRA_DATA;
2044 #endif
2045 
2046 end_coredump:
2047 	set_fs(fs);
2048 
2049 cleanup:
2050 	free_note_info(&info);
2051 	kfree(elf);
2052 out:
2053 	return has_dumped;
2054 }
2055 
2056 #endif		/* USE_ELF_CORE_DUMP */
2057 
2058 static int __init init_elf_binfmt(void)
2059 {
2060 	return register_binfmt(&elf_format);
2061 }
2062 
2063 static void __exit exit_elf_binfmt(void)
2064 {
2065 	/* Remove the COFF and ELF loaders. */
2066 	unregister_binfmt(&elf_format);
2067 }
2068 
2069 core_initcall(init_elf_binfmt);
2070 module_exit(exit_elf_binfmt);
2071 MODULE_LICENSE("GPL");
2072