xref: /openbmc/linux/fs/binfmt_elf.c (revision f42b3800)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49 				int, int, unsigned long);
50 
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump	NULL
59 #endif
60 
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN	PAGE_SIZE
65 #endif
66 
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS	0
69 #endif
70 
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74 
75 static struct linux_binfmt elf_format = {
76 		.module		= THIS_MODULE,
77 		.load_binary	= load_elf_binary,
78 		.load_shlib	= load_elf_library,
79 		.core_dump	= elf_core_dump,
80 		.min_coredump	= ELF_EXEC_PAGESIZE,
81 		.hasvdso	= 1
82 };
83 
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85 
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88 	start = ELF_PAGEALIGN(start);
89 	end = ELF_PAGEALIGN(end);
90 	if (end > start) {
91 		unsigned long addr;
92 		down_write(&current->mm->mmap_sem);
93 		addr = do_brk(start, end - start);
94 		up_write(&current->mm->mmap_sem);
95 		if (BAD_ADDR(addr))
96 			return addr;
97 	}
98 	current->mm->start_brk = current->mm->brk = end;
99 	return 0;
100 }
101 
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109 	unsigned long nbyte;
110 
111 	nbyte = ELF_PAGEOFFSET(elf_bss);
112 	if (nbyte) {
113 		nbyte = ELF_MIN_ALIGN - nbyte;
114 		if (clear_user((void __user *) elf_bss, nbyte))
115 			return -EFAULT;
116 	}
117 	return 0;
118 }
119 
120 /* Let's use some macros to make this stack manipulation a little clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127 	old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131 	(((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134 
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137 		unsigned long load_addr, unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	const char *k_platform = ELF_PLATFORM;
147 	int items;
148 	elf_addr_t *elf_info;
149 	int ei_index = 0;
150 	struct task_struct *tsk = current;
151 	struct vm_area_struct *vma;
152 
153 	/*
154 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
155 	 * evictions by the processes running on the same package. One
156 	 * thing we can do is to shuffle the initial stack for them.
157 	 */
158 
159 	p = arch_align_stack(p);
160 
161 	/*
162 	 * If this architecture has a platform capability string, copy it
163 	 * to userspace.  In some cases (Sparc), this info is impossible
164 	 * for userspace to get any other way, in others (i386) it is
165 	 * merely difficult.
166 	 */
167 	u_platform = NULL;
168 	if (k_platform) {
169 		size_t len = strlen(k_platform) + 1;
170 
171 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 		if (__copy_to_user(u_platform, k_platform, len))
173 			return -EFAULT;
174 	}
175 
176 	/* Create the ELF interpreter info */
177 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
178 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
179 #define NEW_AUX_ENT(id, val) \
180 	do { \
181 		elf_info[ei_index++] = id; \
182 		elf_info[ei_index++] = val; \
183 	} while (0)
184 
185 #ifdef ARCH_DLINFO
186 	/*
187 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
188 	 * AUXV.
189 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
190 	 * ARCH_DLINFO changes
191 	 */
192 	ARCH_DLINFO;
193 #endif
194 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 	NEW_AUX_ENT(AT_FLAGS, 0);
202 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 	NEW_AUX_ENT(AT_UID, tsk->uid);
204 	NEW_AUX_ENT(AT_EUID, tsk->euid);
205 	NEW_AUX_ENT(AT_GID, tsk->gid);
206 	NEW_AUX_ENT(AT_EGID, tsk->egid);
207  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1) + 1;
226 	bprm->p = STACK_ROUND(sp, items);
227 
228 	/* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233 	sp = (elf_addr_t __user *)bprm->p;
234 #endif
235 
236 
237 	/*
238 	 * Grow the stack manually; some architectures have a limit on how
239 	 * far ahead a user-space access may be in order to grow the stack.
240 	 */
241 	vma = find_extend_vma(current->mm, bprm->p);
242 	if (!vma)
243 		return -EFAULT;
244 
245 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
246 	if (__put_user(argc, sp++))
247 		return -EFAULT;
248 	argv = sp;
249 	envp = argv + argc + 1;
250 
251 	/* Populate argv and envp */
252 	p = current->mm->arg_end = current->mm->arg_start;
253 	while (argc-- > 0) {
254 		size_t len;
255 		if (__put_user((elf_addr_t)p, argv++))
256 			return -EFAULT;
257 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258 		if (!len || len > MAX_ARG_STRLEN)
259 			return 0;
260 		p += len;
261 	}
262 	if (__put_user(0, argv))
263 		return -EFAULT;
264 	current->mm->arg_end = current->mm->env_start = p;
265 	while (envc-- > 0) {
266 		size_t len;
267 		if (__put_user((elf_addr_t)p, envp++))
268 			return -EFAULT;
269 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270 		if (!len || len > MAX_ARG_STRLEN)
271 			return 0;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type,
289 		unsigned long total_size)
290 {
291 	unsigned long map_addr;
292 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 	addr = ELF_PAGESTART(addr);
295 	size = ELF_PAGEALIGN(size);
296 
297 	/* mmap() will return -EINVAL if given a zero size, but a
298 	 * segment with zero filesize is perfectly valid */
299 	if (!size)
300 		return addr;
301 
302 	down_write(&current->mm->mmap_sem);
303 	/*
304 	* total_size is the size of the ELF (interpreter) image.
305 	* The _first_ mmap needs to know the full size, otherwise
306 	* randomization might put this image into an overlapping
307 	* position with the ELF binary image. (since size < total_size)
308 	* So we first map the 'big' image - and unmap the remainder at
309 	* the end. (which unmap is needed for ELF images with holes.)
310 	*/
311 	if (total_size) {
312 		total_size = ELF_PAGEALIGN(total_size);
313 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 		if (!BAD_ADDR(map_addr))
315 			do_munmap(current->mm, map_addr+size, total_size-size);
316 	} else
317 		map_addr = do_mmap(filep, addr, size, prot, type, off);
318 
319 	up_write(&current->mm->mmap_sem);
320 	return(map_addr);
321 }
322 
323 #endif /* !elf_map */
324 
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327 	int i, first_idx = -1, last_idx = -1;
328 
329 	for (i = 0; i < nr; i++) {
330 		if (cmds[i].p_type == PT_LOAD) {
331 			last_idx = i;
332 			if (first_idx == -1)
333 				first_idx = i;
334 		}
335 	}
336 	if (first_idx == -1)
337 		return 0;
338 
339 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342 
343 
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348 
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350 		struct file *interpreter, unsigned long *interp_map_addr,
351 		unsigned long no_base)
352 {
353 	struct elf_phdr *elf_phdata;
354 	struct elf_phdr *eppnt;
355 	unsigned long load_addr = 0;
356 	int load_addr_set = 0;
357 	unsigned long last_bss = 0, elf_bss = 0;
358 	unsigned long error = ~0UL;
359 	unsigned long total_size;
360 	int retval, i, size;
361 
362 	/* First of all, some simple consistency checks */
363 	if (interp_elf_ex->e_type != ET_EXEC &&
364 	    interp_elf_ex->e_type != ET_DYN)
365 		goto out;
366 	if (!elf_check_arch(interp_elf_ex))
367 		goto out;
368 	if (!interpreter->f_op || !interpreter->f_op->mmap)
369 		goto out;
370 
371 	/*
372 	 * If the size of this structure has changed, then punt, since
373 	 * we will be doing the wrong thing.
374 	 */
375 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 		goto out;
377 	if (interp_elf_ex->e_phnum < 1 ||
378 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 		goto out;
380 
381 	/* Now read in all of the header information */
382 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 	if (size > ELF_MIN_ALIGN)
384 		goto out;
385 	elf_phdata = kmalloc(size, GFP_KERNEL);
386 	if (!elf_phdata)
387 		goto out;
388 
389 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 			     (char *)elf_phdata,size);
391 	error = -EIO;
392 	if (retval != size) {
393 		if (retval < 0)
394 			error = retval;
395 		goto out_close;
396 	}
397 
398 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 	if (!total_size) {
400 		error = -EINVAL;
401 		goto out_close;
402 	}
403 
404 	eppnt = elf_phdata;
405 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 		if (eppnt->p_type == PT_LOAD) {
407 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 			int elf_prot = 0;
409 			unsigned long vaddr = 0;
410 			unsigned long k, map_addr;
411 
412 			if (eppnt->p_flags & PF_R)
413 		    		elf_prot = PROT_READ;
414 			if (eppnt->p_flags & PF_W)
415 				elf_prot |= PROT_WRITE;
416 			if (eppnt->p_flags & PF_X)
417 				elf_prot |= PROT_EXEC;
418 			vaddr = eppnt->p_vaddr;
419 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 				elf_type |= MAP_FIXED;
421 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 				load_addr = -vaddr;
423 
424 			map_addr = elf_map(interpreter, load_addr + vaddr,
425 					eppnt, elf_prot, elf_type, total_size);
426 			total_size = 0;
427 			if (!*interp_map_addr)
428 				*interp_map_addr = map_addr;
429 			error = map_addr;
430 			if (BAD_ADDR(map_addr))
431 				goto out_close;
432 
433 			if (!load_addr_set &&
434 			    interp_elf_ex->e_type == ET_DYN) {
435 				load_addr = map_addr - ELF_PAGESTART(vaddr);
436 				load_addr_set = 1;
437 			}
438 
439 			/*
440 			 * Check to see if the section's size will overflow the
441 			 * allowed task size. Note that p_filesz must always be
442 			 * <= p_memsize so it's only necessary to check p_memsz.
443 			 */
444 			k = load_addr + eppnt->p_vaddr;
445 			if (BAD_ADDR(k) ||
446 			    eppnt->p_filesz > eppnt->p_memsz ||
447 			    eppnt->p_memsz > TASK_SIZE ||
448 			    TASK_SIZE - eppnt->p_memsz < k) {
449 				error = -ENOMEM;
450 				goto out_close;
451 			}
452 
453 			/*
454 			 * Find the end of the file mapping for this phdr, and
455 			 * keep track of the largest address we see for this.
456 			 */
457 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 			if (k > elf_bss)
459 				elf_bss = k;
460 
461 			/*
462 			 * Do the same thing for the memory mapping - between
463 			 * elf_bss and last_bss is the bss section.
464 			 */
465 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 			if (k > last_bss)
467 				last_bss = k;
468 		}
469 	}
470 
471 	/*
472 	 * Now fill out the bss section.  First pad the last page up
473 	 * to the page boundary, and then perform a mmap to make sure
474 	 * that there are zero-mapped pages up to and including the
475 	 * last bss page.
476 	 */
477 	if (padzero(elf_bss)) {
478 		error = -EFAULT;
479 		goto out_close;
480 	}
481 
482 	/* What we have mapped so far */
483 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484 
485 	/* Map the last of the bss segment */
486 	if (last_bss > elf_bss) {
487 		down_write(&current->mm->mmap_sem);
488 		error = do_brk(elf_bss, last_bss - elf_bss);
489 		up_write(&current->mm->mmap_sem);
490 		if (BAD_ADDR(error))
491 			goto out_close;
492 	}
493 
494 	error = load_addr;
495 
496 out_close:
497 	kfree(elf_phdata);
498 out:
499 	return error;
500 }
501 
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506 
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if ((current->flags & PF_RANDOMIZE) &&
519 		!(current->personality & ADDR_NO_RANDOMIZE)) {
520 		random_variable = get_random_int() & STACK_RND_MASK;
521 		random_variable <<= PAGE_SHIFT;
522 	}
523 #ifdef CONFIG_STACK_GROWSUP
524 	return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526 	return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529 
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532 	struct file *interpreter = NULL; /* to shut gcc up */
533  	unsigned long load_addr = 0, load_bias = 0;
534 	int load_addr_set = 0;
535 	char * elf_interpreter = NULL;
536 	unsigned long error;
537 	struct elf_phdr *elf_ppnt, *elf_phdata;
538 	unsigned long elf_bss, elf_brk;
539 	int elf_exec_fileno;
540 	int retval, i;
541 	unsigned int size;
542 	unsigned long elf_entry;
543 	unsigned long interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	struct files_struct *files;
547 	int executable_stack = EXSTACK_DEFAULT;
548 	unsigned long def_flags = 0;
549 	struct {
550 		struct elfhdr elf_ex;
551 		struct elfhdr interp_elf_ex;
552   		struct exec interp_ex;
553 	} *loc;
554 
555 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
556 	if (!loc) {
557 		retval = -ENOMEM;
558 		goto out_ret;
559 	}
560 
561 	/* Get the exec-header */
562 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
563 
564 	retval = -ENOEXEC;
565 	/* First of all, some simple consistency checks */
566 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
567 		goto out;
568 
569 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
570 		goto out;
571 	if (!elf_check_arch(&loc->elf_ex))
572 		goto out;
573 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
574 		goto out;
575 
576 	/* Now read in all of the header information */
577 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
578 		goto out;
579 	if (loc->elf_ex.e_phnum < 1 ||
580 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
581 		goto out;
582 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
583 	retval = -ENOMEM;
584 	elf_phdata = kmalloc(size, GFP_KERNEL);
585 	if (!elf_phdata)
586 		goto out;
587 
588 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
589 			     (char *)elf_phdata, size);
590 	if (retval != size) {
591 		if (retval >= 0)
592 			retval = -EIO;
593 		goto out_free_ph;
594 	}
595 
596 	files = current->files;	/* Refcounted so ok */
597 	retval = unshare_files();
598 	if (retval < 0)
599 		goto out_free_ph;
600 	if (files == current->files) {
601 		put_files_struct(files);
602 		files = NULL;
603 	}
604 
605 	/* exec will make our files private anyway, but for the a.out
606 	   loader stuff we need to do it earlier */
607 	retval = get_unused_fd();
608 	if (retval < 0)
609 		goto out_free_fh;
610 	get_file(bprm->file);
611 	fd_install(elf_exec_fileno = retval, bprm->file);
612 
613 	elf_ppnt = elf_phdata;
614 	elf_bss = 0;
615 	elf_brk = 0;
616 
617 	start_code = ~0UL;
618 	end_code = 0;
619 	start_data = 0;
620 	end_data = 0;
621 
622 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
623 		if (elf_ppnt->p_type == PT_INTERP) {
624 			/* This is the program interpreter used for
625 			 * shared libraries - for now assume that this
626 			 * is an a.out format binary
627 			 */
628 			retval = -ENOEXEC;
629 			if (elf_ppnt->p_filesz > PATH_MAX ||
630 			    elf_ppnt->p_filesz < 2)
631 				goto out_free_file;
632 
633 			retval = -ENOMEM;
634 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
635 						  GFP_KERNEL);
636 			if (!elf_interpreter)
637 				goto out_free_file;
638 
639 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
640 					     elf_interpreter,
641 					     elf_ppnt->p_filesz);
642 			if (retval != elf_ppnt->p_filesz) {
643 				if (retval >= 0)
644 					retval = -EIO;
645 				goto out_free_interp;
646 			}
647 			/* make sure path is NULL terminated */
648 			retval = -ENOEXEC;
649 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
650 				goto out_free_interp;
651 
652 			/*
653 			 * The early SET_PERSONALITY here is so that the lookup
654 			 * for the interpreter happens in the namespace of the
655 			 * to-be-execed image.  SET_PERSONALITY can select an
656 			 * alternate root.
657 			 *
658 			 * However, SET_PERSONALITY is NOT allowed to switch
659 			 * this task into the new images's memory mapping
660 			 * policy - that is, TASK_SIZE must still evaluate to
661 			 * that which is appropriate to the execing application.
662 			 * This is because exit_mmap() needs to have TASK_SIZE
663 			 * evaluate to the size of the old image.
664 			 *
665 			 * So if (say) a 64-bit application is execing a 32-bit
666 			 * application it is the architecture's responsibility
667 			 * to defer changing the value of TASK_SIZE until the
668 			 * switch really is going to happen - do this in
669 			 * flush_thread().	- akpm
670 			 */
671 			SET_PERSONALITY(loc->elf_ex, 0);
672 
673 			interpreter = open_exec(elf_interpreter);
674 			retval = PTR_ERR(interpreter);
675 			if (IS_ERR(interpreter))
676 				goto out_free_interp;
677 
678 			/*
679 			 * If the binary is not readable then enforce
680 			 * mm->dumpable = 0 regardless of the interpreter's
681 			 * permissions.
682 			 */
683 			if (file_permission(interpreter, MAY_READ) < 0)
684 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
685 
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 
712 	/* Some simple consistency checks for the interpreter */
713 	if (elf_interpreter) {
714 		retval = -ELIBBAD;
715 		/* Not an ELF interpreter */
716 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
717 			goto out_free_dentry;
718 		/* Verify the interpreter has a valid arch */
719 		if (!elf_check_arch(&loc->interp_elf_ex))
720 			goto out_free_dentry;
721 	} else {
722 		/* Executables without an interpreter also need a personality  */
723 		SET_PERSONALITY(loc->elf_ex, 0);
724 	}
725 
726 	/* Flush all traces of the currently running executable */
727 	retval = flush_old_exec(bprm);
728 	if (retval)
729 		goto out_free_dentry;
730 
731 	/* Discard our unneeded old files struct */
732 	if (files) {
733 		put_files_struct(files);
734 		files = NULL;
735 	}
736 
737 	/* OK, This is the point of no return */
738 	current->flags &= ~PF_FORKNOEXEC;
739 	current->mm->def_flags = def_flags;
740 
741 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
742 	   may depend on the personality.  */
743 	SET_PERSONALITY(loc->elf_ex, 0);
744 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
745 		current->personality |= READ_IMPLIES_EXEC;
746 
747 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
748 		current->flags |= PF_RANDOMIZE;
749 	arch_pick_mmap_layout(current->mm);
750 
751 	/* Do this so that we can load the interpreter, if need be.  We will
752 	   change some of these later */
753 	current->mm->free_area_cache = current->mm->mmap_base;
754 	current->mm->cached_hole_size = 0;
755 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
756 				 executable_stack);
757 	if (retval < 0) {
758 		send_sig(SIGKILL, current, 0);
759 		goto out_free_dentry;
760 	}
761 
762 	current->mm->start_stack = bprm->p;
763 
764 	/* Now we do a little grungy work by mmaping the ELF image into
765 	   the correct location in memory. */
766 	for(i = 0, elf_ppnt = elf_phdata;
767 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
768 		int elf_prot = 0, elf_flags;
769 		unsigned long k, vaddr;
770 
771 		if (elf_ppnt->p_type != PT_LOAD)
772 			continue;
773 
774 		if (unlikely (elf_brk > elf_bss)) {
775 			unsigned long nbyte;
776 
777 			/* There was a PT_LOAD segment with p_memsz > p_filesz
778 			   before this one. Map anonymous pages, if needed,
779 			   and clear the area.  */
780 			retval = set_brk (elf_bss + load_bias,
781 					  elf_brk + load_bias);
782 			if (retval) {
783 				send_sig(SIGKILL, current, 0);
784 				goto out_free_dentry;
785 			}
786 			nbyte = ELF_PAGEOFFSET(elf_bss);
787 			if (nbyte) {
788 				nbyte = ELF_MIN_ALIGN - nbyte;
789 				if (nbyte > elf_brk - elf_bss)
790 					nbyte = elf_brk - elf_bss;
791 				if (clear_user((void __user *)elf_bss +
792 							load_bias, nbyte)) {
793 					/*
794 					 * This bss-zeroing can fail if the ELF
795 					 * file specifies odd protections. So
796 					 * we don't check the return value
797 					 */
798 				}
799 			}
800 		}
801 
802 		if (elf_ppnt->p_flags & PF_R)
803 			elf_prot |= PROT_READ;
804 		if (elf_ppnt->p_flags & PF_W)
805 			elf_prot |= PROT_WRITE;
806 		if (elf_ppnt->p_flags & PF_X)
807 			elf_prot |= PROT_EXEC;
808 
809 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
810 
811 		vaddr = elf_ppnt->p_vaddr;
812 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
813 			elf_flags |= MAP_FIXED;
814 		} else if (loc->elf_ex.e_type == ET_DYN) {
815 			/* Try and get dynamic programs out of the way of the
816 			 * default mmap base, as well as whatever program they
817 			 * might try to exec.  This is because the brk will
818 			 * follow the loader, and is not movable.  */
819 #ifdef CONFIG_X86
820 			load_bias = 0;
821 #else
822 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
823 #endif
824 		}
825 
826 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
827 				elf_prot, elf_flags, 0);
828 		if (BAD_ADDR(error)) {
829 			send_sig(SIGKILL, current, 0);
830 			retval = IS_ERR((void *)error) ?
831 				PTR_ERR((void*)error) : -EINVAL;
832 			goto out_free_dentry;
833 		}
834 
835 		if (!load_addr_set) {
836 			load_addr_set = 1;
837 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
838 			if (loc->elf_ex.e_type == ET_DYN) {
839 				load_bias += error -
840 				             ELF_PAGESTART(load_bias + vaddr);
841 				load_addr += load_bias;
842 				reloc_func_desc = load_bias;
843 			}
844 		}
845 		k = elf_ppnt->p_vaddr;
846 		if (k < start_code)
847 			start_code = k;
848 		if (start_data < k)
849 			start_data = k;
850 
851 		/*
852 		 * Check to see if the section's size will overflow the
853 		 * allowed task size. Note that p_filesz must always be
854 		 * <= p_memsz so it is only necessary to check p_memsz.
855 		 */
856 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
857 		    elf_ppnt->p_memsz > TASK_SIZE ||
858 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
859 			/* set_brk can never work. Avoid overflows. */
860 			send_sig(SIGKILL, current, 0);
861 			retval = -EINVAL;
862 			goto out_free_dentry;
863 		}
864 
865 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
866 
867 		if (k > elf_bss)
868 			elf_bss = k;
869 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
870 			end_code = k;
871 		if (end_data < k)
872 			end_data = k;
873 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
874 		if (k > elf_brk)
875 			elf_brk = k;
876 	}
877 
878 	loc->elf_ex.e_entry += load_bias;
879 	elf_bss += load_bias;
880 	elf_brk += load_bias;
881 	start_code += load_bias;
882 	end_code += load_bias;
883 	start_data += load_bias;
884 	end_data += load_bias;
885 
886 	/* Calling set_brk effectively mmaps the pages that we need
887 	 * for the bss and break sections.  We must do this before
888 	 * mapping in the interpreter, to make sure it doesn't wind
889 	 * up getting placed where the bss needs to go.
890 	 */
891 	retval = set_brk(elf_bss, elf_brk);
892 	if (retval) {
893 		send_sig(SIGKILL, current, 0);
894 		goto out_free_dentry;
895 	}
896 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
897 		send_sig(SIGSEGV, current, 0);
898 		retval = -EFAULT; /* Nobody gets to see this, but.. */
899 		goto out_free_dentry;
900 	}
901 
902 	if (elf_interpreter) {
903 		unsigned long uninitialized_var(interp_map_addr);
904 
905 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
906 					    interpreter,
907 					    &interp_map_addr,
908 					    load_bias);
909 		if (!IS_ERR((void *)elf_entry)) {
910 			/*
911 			 * load_elf_interp() returns relocation
912 			 * adjustment
913 			 */
914 			interp_load_addr = elf_entry;
915 			elf_entry += loc->interp_elf_ex.e_entry;
916 		}
917 		if (BAD_ADDR(elf_entry)) {
918 			force_sig(SIGSEGV, current);
919 			retval = IS_ERR((void *)elf_entry) ?
920 					(int)elf_entry : -EINVAL;
921 			goto out_free_dentry;
922 		}
923 		reloc_func_desc = interp_load_addr;
924 
925 		allow_write_access(interpreter);
926 		fput(interpreter);
927 		kfree(elf_interpreter);
928 	} else {
929 		elf_entry = loc->elf_ex.e_entry;
930 		if (BAD_ADDR(elf_entry)) {
931 			force_sig(SIGSEGV, current);
932 			retval = -EINVAL;
933 			goto out_free_dentry;
934 		}
935 	}
936 
937 	kfree(elf_phdata);
938 
939 	sys_close(elf_exec_fileno);
940 
941 	set_binfmt(&elf_format);
942 
943 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
944 	retval = arch_setup_additional_pages(bprm, executable_stack);
945 	if (retval < 0) {
946 		send_sig(SIGKILL, current, 0);
947 		goto out;
948 	}
949 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
950 
951 	compute_creds(bprm);
952 	current->flags &= ~PF_FORKNOEXEC;
953 	retval = create_elf_tables(bprm, &loc->elf_ex,
954 			  load_addr, interp_load_addr);
955 	if (retval < 0) {
956 		send_sig(SIGKILL, current, 0);
957 		goto out;
958 	}
959 	/* N.B. passed_fileno might not be initialized? */
960 	current->mm->end_code = end_code;
961 	current->mm->start_code = start_code;
962 	current->mm->start_data = start_data;
963 	current->mm->end_data = end_data;
964 	current->mm->start_stack = bprm->p;
965 
966 #ifdef arch_randomize_brk
967 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
968 		current->mm->brk = current->mm->start_brk =
969 			arch_randomize_brk(current->mm);
970 #endif
971 
972 	if (current->personality & MMAP_PAGE_ZERO) {
973 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
974 		   and some applications "depend" upon this behavior.
975 		   Since we do not have the power to recompile these, we
976 		   emulate the SVr4 behavior. Sigh. */
977 		down_write(&current->mm->mmap_sem);
978 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
979 				MAP_FIXED | MAP_PRIVATE, 0);
980 		up_write(&current->mm->mmap_sem);
981 	}
982 
983 #ifdef ELF_PLAT_INIT
984 	/*
985 	 * The ABI may specify that certain registers be set up in special
986 	 * ways (on i386 %edx is the address of a DT_FINI function, for
987 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
988 	 * that the e_entry field is the address of the function descriptor
989 	 * for the startup routine, rather than the address of the startup
990 	 * routine itself.  This macro performs whatever initialization to
991 	 * the regs structure is required as well as any relocations to the
992 	 * function descriptor entries when executing dynamically links apps.
993 	 */
994 	ELF_PLAT_INIT(regs, reloc_func_desc);
995 #endif
996 
997 	start_thread(regs, elf_entry, bprm->p);
998 	if (unlikely(current->ptrace & PT_PTRACED)) {
999 		if (current->ptrace & PT_TRACE_EXEC)
1000 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1001 		else
1002 			send_sig(SIGTRAP, current, 0);
1003 	}
1004 	retval = 0;
1005 out:
1006 	kfree(loc);
1007 out_ret:
1008 	return retval;
1009 
1010 	/* error cleanup */
1011 out_free_dentry:
1012 	allow_write_access(interpreter);
1013 	if (interpreter)
1014 		fput(interpreter);
1015 out_free_interp:
1016 	kfree(elf_interpreter);
1017 out_free_file:
1018 	sys_close(elf_exec_fileno);
1019 out_free_fh:
1020 	if (files)
1021 		reset_files_struct(current, files);
1022 out_free_ph:
1023 	kfree(elf_phdata);
1024 	goto out;
1025 }
1026 
1027 /* This is really simpleminded and specialized - we are loading an
1028    a.out library that is given an ELF header. */
1029 static int load_elf_library(struct file *file)
1030 {
1031 	struct elf_phdr *elf_phdata;
1032 	struct elf_phdr *eppnt;
1033 	unsigned long elf_bss, bss, len;
1034 	int retval, error, i, j;
1035 	struct elfhdr elf_ex;
1036 
1037 	error = -ENOEXEC;
1038 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1039 	if (retval != sizeof(elf_ex))
1040 		goto out;
1041 
1042 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1043 		goto out;
1044 
1045 	/* First of all, some simple consistency checks */
1046 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1047 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1048 		goto out;
1049 
1050 	/* Now read in all of the header information */
1051 
1052 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1053 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1054 
1055 	error = -ENOMEM;
1056 	elf_phdata = kmalloc(j, GFP_KERNEL);
1057 	if (!elf_phdata)
1058 		goto out;
1059 
1060 	eppnt = elf_phdata;
1061 	error = -ENOEXEC;
1062 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1063 	if (retval != j)
1064 		goto out_free_ph;
1065 
1066 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1067 		if ((eppnt + i)->p_type == PT_LOAD)
1068 			j++;
1069 	if (j != 1)
1070 		goto out_free_ph;
1071 
1072 	while (eppnt->p_type != PT_LOAD)
1073 		eppnt++;
1074 
1075 	/* Now use mmap to map the library into memory. */
1076 	down_write(&current->mm->mmap_sem);
1077 	error = do_mmap(file,
1078 			ELF_PAGESTART(eppnt->p_vaddr),
1079 			(eppnt->p_filesz +
1080 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1081 			PROT_READ | PROT_WRITE | PROT_EXEC,
1082 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1083 			(eppnt->p_offset -
1084 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1085 	up_write(&current->mm->mmap_sem);
1086 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1087 		goto out_free_ph;
1088 
1089 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1090 	if (padzero(elf_bss)) {
1091 		error = -EFAULT;
1092 		goto out_free_ph;
1093 	}
1094 
1095 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1096 			    ELF_MIN_ALIGN - 1);
1097 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1098 	if (bss > len) {
1099 		down_write(&current->mm->mmap_sem);
1100 		do_brk(len, bss - len);
1101 		up_write(&current->mm->mmap_sem);
1102 	}
1103 	error = 0;
1104 
1105 out_free_ph:
1106 	kfree(elf_phdata);
1107 out:
1108 	return error;
1109 }
1110 
1111 /*
1112  * Note that some platforms still use traditional core dumps and not
1113  * the ELF core dump.  Each platform can select it as appropriate.
1114  */
1115 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1116 
1117 /*
1118  * ELF core dumper
1119  *
1120  * Modelled on fs/exec.c:aout_core_dump()
1121  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1122  */
1123 /*
1124  * These are the only things you should do on a core-file: use only these
1125  * functions to write out all the necessary info.
1126  */
1127 static int dump_write(struct file *file, const void *addr, int nr)
1128 {
1129 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1130 }
1131 
1132 static int dump_seek(struct file *file, loff_t off)
1133 {
1134 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1135 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1136 			return 0;
1137 	} else {
1138 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1139 		if (!buf)
1140 			return 0;
1141 		while (off > 0) {
1142 			unsigned long n = off;
1143 			if (n > PAGE_SIZE)
1144 				n = PAGE_SIZE;
1145 			if (!dump_write(file, buf, n))
1146 				return 0;
1147 			off -= n;
1148 		}
1149 		free_page((unsigned long)buf);
1150 	}
1151 	return 1;
1152 }
1153 
1154 /*
1155  * Decide what to dump of a segment, part, all or none.
1156  */
1157 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1158 				   unsigned long mm_flags)
1159 {
1160 	/* The vma can be set up to tell us the answer directly.  */
1161 	if (vma->vm_flags & VM_ALWAYSDUMP)
1162 		goto whole;
1163 
1164 	/* Do not dump I/O mapped devices or special mappings */
1165 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1166 		return 0;
1167 
1168 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1169 
1170 	/* By default, dump shared memory if mapped from an anonymous file. */
1171 	if (vma->vm_flags & VM_SHARED) {
1172 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1173 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1174 			goto whole;
1175 		return 0;
1176 	}
1177 
1178 	/* Dump segments that have been written to.  */
1179 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1180 		goto whole;
1181 	if (vma->vm_file == NULL)
1182 		return 0;
1183 
1184 	if (FILTER(MAPPED_PRIVATE))
1185 		goto whole;
1186 
1187 	/*
1188 	 * If this looks like the beginning of a DSO or executable mapping,
1189 	 * check for an ELF header.  If we find one, dump the first page to
1190 	 * aid in determining what was mapped here.
1191 	 */
1192 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1193 		u32 __user *header = (u32 __user *) vma->vm_start;
1194 		u32 word;
1195 		/*
1196 		 * Doing it this way gets the constant folded by GCC.
1197 		 */
1198 		union {
1199 			u32 cmp;
1200 			char elfmag[SELFMAG];
1201 		} magic;
1202 		BUILD_BUG_ON(SELFMAG != sizeof word);
1203 		magic.elfmag[EI_MAG0] = ELFMAG0;
1204 		magic.elfmag[EI_MAG1] = ELFMAG1;
1205 		magic.elfmag[EI_MAG2] = ELFMAG2;
1206 		magic.elfmag[EI_MAG3] = ELFMAG3;
1207 		if (get_user(word, header) == 0 && word == magic.cmp)
1208 			return PAGE_SIZE;
1209 	}
1210 
1211 #undef	FILTER
1212 
1213 	return 0;
1214 
1215 whole:
1216 	return vma->vm_end - vma->vm_start;
1217 }
1218 
1219 /* An ELF note in memory */
1220 struct memelfnote
1221 {
1222 	const char *name;
1223 	int type;
1224 	unsigned int datasz;
1225 	void *data;
1226 };
1227 
1228 static int notesize(struct memelfnote *en)
1229 {
1230 	int sz;
1231 
1232 	sz = sizeof(struct elf_note);
1233 	sz += roundup(strlen(en->name) + 1, 4);
1234 	sz += roundup(en->datasz, 4);
1235 
1236 	return sz;
1237 }
1238 
1239 #define DUMP_WRITE(addr, nr, foffset)	\
1240 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1241 
1242 static int alignfile(struct file *file, loff_t *foffset)
1243 {
1244 	static const char buf[4] = { 0, };
1245 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1246 	return 1;
1247 }
1248 
1249 static int writenote(struct memelfnote *men, struct file *file,
1250 			loff_t *foffset)
1251 {
1252 	struct elf_note en;
1253 	en.n_namesz = strlen(men->name) + 1;
1254 	en.n_descsz = men->datasz;
1255 	en.n_type = men->type;
1256 
1257 	DUMP_WRITE(&en, sizeof(en), foffset);
1258 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1259 	if (!alignfile(file, foffset))
1260 		return 0;
1261 	DUMP_WRITE(men->data, men->datasz, foffset);
1262 	if (!alignfile(file, foffset))
1263 		return 0;
1264 
1265 	return 1;
1266 }
1267 #undef DUMP_WRITE
1268 
1269 #define DUMP_WRITE(addr, nr)	\
1270 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1271 		goto end_coredump;
1272 #define DUMP_SEEK(off)	\
1273 	if (!dump_seek(file, (off))) \
1274 		goto end_coredump;
1275 
1276 static void fill_elf_header(struct elfhdr *elf, int segs,
1277 			    u16 machine, u32 flags, u8 osabi)
1278 {
1279 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1280 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1281 	elf->e_ident[EI_DATA] = ELF_DATA;
1282 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1283 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1284 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1285 
1286 	elf->e_type = ET_CORE;
1287 	elf->e_machine = machine;
1288 	elf->e_version = EV_CURRENT;
1289 	elf->e_entry = 0;
1290 	elf->e_phoff = sizeof(struct elfhdr);
1291 	elf->e_shoff = 0;
1292 	elf->e_flags = flags;
1293 	elf->e_ehsize = sizeof(struct elfhdr);
1294 	elf->e_phentsize = sizeof(struct elf_phdr);
1295 	elf->e_phnum = segs;
1296 	elf->e_shentsize = 0;
1297 	elf->e_shnum = 0;
1298 	elf->e_shstrndx = 0;
1299 	return;
1300 }
1301 
1302 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1303 {
1304 	phdr->p_type = PT_NOTE;
1305 	phdr->p_offset = offset;
1306 	phdr->p_vaddr = 0;
1307 	phdr->p_paddr = 0;
1308 	phdr->p_filesz = sz;
1309 	phdr->p_memsz = 0;
1310 	phdr->p_flags = 0;
1311 	phdr->p_align = 0;
1312 	return;
1313 }
1314 
1315 static void fill_note(struct memelfnote *note, const char *name, int type,
1316 		unsigned int sz, void *data)
1317 {
1318 	note->name = name;
1319 	note->type = type;
1320 	note->datasz = sz;
1321 	note->data = data;
1322 	return;
1323 }
1324 
1325 /*
1326  * fill up all the fields in prstatus from the given task struct, except
1327  * registers which need to be filled up separately.
1328  */
1329 static void fill_prstatus(struct elf_prstatus *prstatus,
1330 		struct task_struct *p, long signr)
1331 {
1332 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1333 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1334 	prstatus->pr_sighold = p->blocked.sig[0];
1335 	prstatus->pr_pid = task_pid_vnr(p);
1336 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1337 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1338 	prstatus->pr_sid = task_session_vnr(p);
1339 	if (thread_group_leader(p)) {
1340 		/*
1341 		 * This is the record for the group leader.  Add in the
1342 		 * cumulative times of previous dead threads.  This total
1343 		 * won't include the time of each live thread whose state
1344 		 * is included in the core dump.  The final total reported
1345 		 * to our parent process when it calls wait4 will include
1346 		 * those sums as well as the little bit more time it takes
1347 		 * this and each other thread to finish dying after the
1348 		 * core dump synchronization phase.
1349 		 */
1350 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1351 				   &prstatus->pr_utime);
1352 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1353 				   &prstatus->pr_stime);
1354 	} else {
1355 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1356 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1357 	}
1358 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1359 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1360 }
1361 
1362 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1363 		       struct mm_struct *mm)
1364 {
1365 	unsigned int i, len;
1366 
1367 	/* first copy the parameters from user space */
1368 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1369 
1370 	len = mm->arg_end - mm->arg_start;
1371 	if (len >= ELF_PRARGSZ)
1372 		len = ELF_PRARGSZ-1;
1373 	if (copy_from_user(&psinfo->pr_psargs,
1374 		           (const char __user *)mm->arg_start, len))
1375 		return -EFAULT;
1376 	for(i = 0; i < len; i++)
1377 		if (psinfo->pr_psargs[i] == 0)
1378 			psinfo->pr_psargs[i] = ' ';
1379 	psinfo->pr_psargs[len] = 0;
1380 
1381 	psinfo->pr_pid = task_pid_vnr(p);
1382 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1383 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1384 	psinfo->pr_sid = task_session_vnr(p);
1385 
1386 	i = p->state ? ffz(~p->state) + 1 : 0;
1387 	psinfo->pr_state = i;
1388 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1389 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1390 	psinfo->pr_nice = task_nice(p);
1391 	psinfo->pr_flag = p->flags;
1392 	SET_UID(psinfo->pr_uid, p->uid);
1393 	SET_GID(psinfo->pr_gid, p->gid);
1394 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1395 
1396 	return 0;
1397 }
1398 
1399 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1400 {
1401 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1402 	int i = 0;
1403 	do
1404 		i += 2;
1405 	while (auxv[i - 2] != AT_NULL);
1406 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1407 }
1408 
1409 #ifdef CORE_DUMP_USE_REGSET
1410 #include <linux/regset.h>
1411 
1412 struct elf_thread_core_info {
1413 	struct elf_thread_core_info *next;
1414 	struct task_struct *task;
1415 	struct elf_prstatus prstatus;
1416 	struct memelfnote notes[0];
1417 };
1418 
1419 struct elf_note_info {
1420 	struct elf_thread_core_info *thread;
1421 	struct memelfnote psinfo;
1422 	struct memelfnote auxv;
1423 	size_t size;
1424 	int thread_notes;
1425 };
1426 
1427 /*
1428  * When a regset has a writeback hook, we call it on each thread before
1429  * dumping user memory.  On register window machines, this makes sure the
1430  * user memory backing the register data is up to date before we read it.
1431  */
1432 static void do_thread_regset_writeback(struct task_struct *task,
1433 				       const struct user_regset *regset)
1434 {
1435 	if (regset->writeback)
1436 		regset->writeback(task, regset, 1);
1437 }
1438 
1439 static int fill_thread_core_info(struct elf_thread_core_info *t,
1440 				 const struct user_regset_view *view,
1441 				 long signr, size_t *total)
1442 {
1443 	unsigned int i;
1444 
1445 	/*
1446 	 * NT_PRSTATUS is the one special case, because the regset data
1447 	 * goes into the pr_reg field inside the note contents, rather
1448 	 * than being the whole note contents.  We fill the reset in here.
1449 	 * We assume that regset 0 is NT_PRSTATUS.
1450 	 */
1451 	fill_prstatus(&t->prstatus, t->task, signr);
1452 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1453 				    0, sizeof(t->prstatus.pr_reg),
1454 				    &t->prstatus.pr_reg, NULL);
1455 
1456 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1457 		  sizeof(t->prstatus), &t->prstatus);
1458 	*total += notesize(&t->notes[0]);
1459 
1460 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1461 
1462 	/*
1463 	 * Each other regset might generate a note too.  For each regset
1464 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1465 	 * all zero and we'll know to skip writing it later.
1466 	 */
1467 	for (i = 1; i < view->n; ++i) {
1468 		const struct user_regset *regset = &view->regsets[i];
1469 		do_thread_regset_writeback(t->task, regset);
1470 		if (regset->core_note_type &&
1471 		    (!regset->active || regset->active(t->task, regset))) {
1472 			int ret;
1473 			size_t size = regset->n * regset->size;
1474 			void *data = kmalloc(size, GFP_KERNEL);
1475 			if (unlikely(!data))
1476 				return 0;
1477 			ret = regset->get(t->task, regset,
1478 					  0, size, data, NULL);
1479 			if (unlikely(ret))
1480 				kfree(data);
1481 			else {
1482 				if (regset->core_note_type != NT_PRFPREG)
1483 					fill_note(&t->notes[i], "LINUX",
1484 						  regset->core_note_type,
1485 						  size, data);
1486 				else {
1487 					t->prstatus.pr_fpvalid = 1;
1488 					fill_note(&t->notes[i], "CORE",
1489 						  NT_PRFPREG, size, data);
1490 				}
1491 				*total += notesize(&t->notes[i]);
1492 			}
1493 		}
1494 	}
1495 
1496 	return 1;
1497 }
1498 
1499 static int fill_note_info(struct elfhdr *elf, int phdrs,
1500 			  struct elf_note_info *info,
1501 			  long signr, struct pt_regs *regs)
1502 {
1503 	struct task_struct *dump_task = current;
1504 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1505 	struct elf_thread_core_info *t;
1506 	struct elf_prpsinfo *psinfo;
1507 	struct task_struct *g, *p;
1508 	unsigned int i;
1509 
1510 	info->size = 0;
1511 	info->thread = NULL;
1512 
1513 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1514 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1515 
1516 	if (psinfo == NULL)
1517 		return 0;
1518 
1519 	/*
1520 	 * Figure out how many notes we're going to need for each thread.
1521 	 */
1522 	info->thread_notes = 0;
1523 	for (i = 0; i < view->n; ++i)
1524 		if (view->regsets[i].core_note_type != 0)
1525 			++info->thread_notes;
1526 
1527 	/*
1528 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1529 	 * since it is our one special case.
1530 	 */
1531 	if (unlikely(info->thread_notes == 0) ||
1532 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1533 		WARN_ON(1);
1534 		return 0;
1535 	}
1536 
1537 	/*
1538 	 * Initialize the ELF file header.
1539 	 */
1540 	fill_elf_header(elf, phdrs,
1541 			view->e_machine, view->e_flags, view->ei_osabi);
1542 
1543 	/*
1544 	 * Allocate a structure for each thread.
1545 	 */
1546 	rcu_read_lock();
1547 	do_each_thread(g, p)
1548 		if (p->mm == dump_task->mm) {
1549 			t = kzalloc(offsetof(struct elf_thread_core_info,
1550 					     notes[info->thread_notes]),
1551 				    GFP_ATOMIC);
1552 			if (unlikely(!t)) {
1553 				rcu_read_unlock();
1554 				return 0;
1555 			}
1556 			t->task = p;
1557 			if (p == dump_task || !info->thread) {
1558 				t->next = info->thread;
1559 				info->thread = t;
1560 			} else {
1561 				/*
1562 				 * Make sure to keep the original task at
1563 				 * the head of the list.
1564 				 */
1565 				t->next = info->thread->next;
1566 				info->thread->next = t;
1567 			}
1568 		}
1569 	while_each_thread(g, p);
1570 	rcu_read_unlock();
1571 
1572 	/*
1573 	 * Now fill in each thread's information.
1574 	 */
1575 	for (t = info->thread; t != NULL; t = t->next)
1576 		if (!fill_thread_core_info(t, view, signr, &info->size))
1577 			return 0;
1578 
1579 	/*
1580 	 * Fill in the two process-wide notes.
1581 	 */
1582 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1583 	info->size += notesize(&info->psinfo);
1584 
1585 	fill_auxv_note(&info->auxv, current->mm);
1586 	info->size += notesize(&info->auxv);
1587 
1588 	return 1;
1589 }
1590 
1591 static size_t get_note_info_size(struct elf_note_info *info)
1592 {
1593 	return info->size;
1594 }
1595 
1596 /*
1597  * Write all the notes for each thread.  When writing the first thread, the
1598  * process-wide notes are interleaved after the first thread-specific note.
1599  */
1600 static int write_note_info(struct elf_note_info *info,
1601 			   struct file *file, loff_t *foffset)
1602 {
1603 	bool first = 1;
1604 	struct elf_thread_core_info *t = info->thread;
1605 
1606 	do {
1607 		int i;
1608 
1609 		if (!writenote(&t->notes[0], file, foffset))
1610 			return 0;
1611 
1612 		if (first && !writenote(&info->psinfo, file, foffset))
1613 			return 0;
1614 		if (first && !writenote(&info->auxv, file, foffset))
1615 			return 0;
1616 
1617 		for (i = 1; i < info->thread_notes; ++i)
1618 			if (t->notes[i].data &&
1619 			    !writenote(&t->notes[i], file, foffset))
1620 				return 0;
1621 
1622 		first = 0;
1623 		t = t->next;
1624 	} while (t);
1625 
1626 	return 1;
1627 }
1628 
1629 static void free_note_info(struct elf_note_info *info)
1630 {
1631 	struct elf_thread_core_info *threads = info->thread;
1632 	while (threads) {
1633 		unsigned int i;
1634 		struct elf_thread_core_info *t = threads;
1635 		threads = t->next;
1636 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1637 		for (i = 1; i < info->thread_notes; ++i)
1638 			kfree(t->notes[i].data);
1639 		kfree(t);
1640 	}
1641 	kfree(info->psinfo.data);
1642 }
1643 
1644 #else
1645 
1646 /* Here is the structure in which status of each thread is captured. */
1647 struct elf_thread_status
1648 {
1649 	struct list_head list;
1650 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1651 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1652 	struct task_struct *thread;
1653 #ifdef ELF_CORE_COPY_XFPREGS
1654 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1655 #endif
1656 	struct memelfnote notes[3];
1657 	int num_notes;
1658 };
1659 
1660 /*
1661  * In order to add the specific thread information for the elf file format,
1662  * we need to keep a linked list of every threads pr_status and then create
1663  * a single section for them in the final core file.
1664  */
1665 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1666 {
1667 	int sz = 0;
1668 	struct task_struct *p = t->thread;
1669 	t->num_notes = 0;
1670 
1671 	fill_prstatus(&t->prstatus, p, signr);
1672 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1673 
1674 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1675 		  &(t->prstatus));
1676 	t->num_notes++;
1677 	sz += notesize(&t->notes[0]);
1678 
1679 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1680 								&t->fpu))) {
1681 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1682 			  &(t->fpu));
1683 		t->num_notes++;
1684 		sz += notesize(&t->notes[1]);
1685 	}
1686 
1687 #ifdef ELF_CORE_COPY_XFPREGS
1688 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1689 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1690 			  sizeof(t->xfpu), &t->xfpu);
1691 		t->num_notes++;
1692 		sz += notesize(&t->notes[2]);
1693 	}
1694 #endif
1695 	return sz;
1696 }
1697 
1698 struct elf_note_info {
1699 	struct memelfnote *notes;
1700 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1701 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1702 	struct list_head thread_list;
1703 	elf_fpregset_t *fpu;
1704 #ifdef ELF_CORE_COPY_XFPREGS
1705 	elf_fpxregset_t *xfpu;
1706 #endif
1707 	int thread_status_size;
1708 	int numnote;
1709 };
1710 
1711 static int fill_note_info(struct elfhdr *elf, int phdrs,
1712 			  struct elf_note_info *info,
1713 			  long signr, struct pt_regs *regs)
1714 {
1715 #define	NUM_NOTES	6
1716 	struct list_head *t;
1717 	struct task_struct *g, *p;
1718 
1719 	info->notes = NULL;
1720 	info->prstatus = NULL;
1721 	info->psinfo = NULL;
1722 	info->fpu = NULL;
1723 #ifdef ELF_CORE_COPY_XFPREGS
1724 	info->xfpu = NULL;
1725 #endif
1726 	INIT_LIST_HEAD(&info->thread_list);
1727 
1728 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1729 			      GFP_KERNEL);
1730 	if (!info->notes)
1731 		return 0;
1732 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1733 	if (!info->psinfo)
1734 		return 0;
1735 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1736 	if (!info->prstatus)
1737 		return 0;
1738 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1739 	if (!info->fpu)
1740 		return 0;
1741 #ifdef ELF_CORE_COPY_XFPREGS
1742 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1743 	if (!info->xfpu)
1744 		return 0;
1745 #endif
1746 
1747 	info->thread_status_size = 0;
1748 	if (signr) {
1749 		struct elf_thread_status *tmp;
1750 		rcu_read_lock();
1751 		do_each_thread(g, p)
1752 			if (current->mm == p->mm && current != p) {
1753 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1754 				if (!tmp) {
1755 					rcu_read_unlock();
1756 					return 0;
1757 				}
1758 				tmp->thread = p;
1759 				list_add(&tmp->list, &info->thread_list);
1760 			}
1761 		while_each_thread(g, p);
1762 		rcu_read_unlock();
1763 		list_for_each(t, &info->thread_list) {
1764 			struct elf_thread_status *tmp;
1765 			int sz;
1766 
1767 			tmp = list_entry(t, struct elf_thread_status, list);
1768 			sz = elf_dump_thread_status(signr, tmp);
1769 			info->thread_status_size += sz;
1770 		}
1771 	}
1772 	/* now collect the dump for the current */
1773 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1774 	fill_prstatus(info->prstatus, current, signr);
1775 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1776 
1777 	/* Set up header */
1778 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1779 
1780 	/*
1781 	 * Set up the notes in similar form to SVR4 core dumps made
1782 	 * with info from their /proc.
1783 	 */
1784 
1785 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1786 		  sizeof(*info->prstatus), info->prstatus);
1787 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1788 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1789 		  sizeof(*info->psinfo), info->psinfo);
1790 
1791 	info->numnote = 2;
1792 
1793 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1794 
1795 	/* Try to dump the FPU. */
1796 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1797 							       info->fpu);
1798 	if (info->prstatus->pr_fpvalid)
1799 		fill_note(info->notes + info->numnote++,
1800 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1801 #ifdef ELF_CORE_COPY_XFPREGS
1802 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1803 		fill_note(info->notes + info->numnote++,
1804 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1805 			  sizeof(*info->xfpu), info->xfpu);
1806 #endif
1807 
1808 	return 1;
1809 
1810 #undef NUM_NOTES
1811 }
1812 
1813 static size_t get_note_info_size(struct elf_note_info *info)
1814 {
1815 	int sz = 0;
1816 	int i;
1817 
1818 	for (i = 0; i < info->numnote; i++)
1819 		sz += notesize(info->notes + i);
1820 
1821 	sz += info->thread_status_size;
1822 
1823 	return sz;
1824 }
1825 
1826 static int write_note_info(struct elf_note_info *info,
1827 			   struct file *file, loff_t *foffset)
1828 {
1829 	int i;
1830 	struct list_head *t;
1831 
1832 	for (i = 0; i < info->numnote; i++)
1833 		if (!writenote(info->notes + i, file, foffset))
1834 			return 0;
1835 
1836 	/* write out the thread status notes section */
1837 	list_for_each(t, &info->thread_list) {
1838 		struct elf_thread_status *tmp =
1839 				list_entry(t, struct elf_thread_status, list);
1840 
1841 		for (i = 0; i < tmp->num_notes; i++)
1842 			if (!writenote(&tmp->notes[i], file, foffset))
1843 				return 0;
1844 	}
1845 
1846 	return 1;
1847 }
1848 
1849 static void free_note_info(struct elf_note_info *info)
1850 {
1851 	while (!list_empty(&info->thread_list)) {
1852 		struct list_head *tmp = info->thread_list.next;
1853 		list_del(tmp);
1854 		kfree(list_entry(tmp, struct elf_thread_status, list));
1855 	}
1856 
1857 	kfree(info->prstatus);
1858 	kfree(info->psinfo);
1859 	kfree(info->notes);
1860 	kfree(info->fpu);
1861 #ifdef ELF_CORE_COPY_XFPREGS
1862 	kfree(info->xfpu);
1863 #endif
1864 }
1865 
1866 #endif
1867 
1868 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1869 					struct vm_area_struct *gate_vma)
1870 {
1871 	struct vm_area_struct *ret = tsk->mm->mmap;
1872 
1873 	if (ret)
1874 		return ret;
1875 	return gate_vma;
1876 }
1877 /*
1878  * Helper function for iterating across a vma list.  It ensures that the caller
1879  * will visit `gate_vma' prior to terminating the search.
1880  */
1881 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1882 					struct vm_area_struct *gate_vma)
1883 {
1884 	struct vm_area_struct *ret;
1885 
1886 	ret = this_vma->vm_next;
1887 	if (ret)
1888 		return ret;
1889 	if (this_vma == gate_vma)
1890 		return NULL;
1891 	return gate_vma;
1892 }
1893 
1894 /*
1895  * Actual dumper
1896  *
1897  * This is a two-pass process; first we find the offsets of the bits,
1898  * and then they are actually written out.  If we run out of core limit
1899  * we just truncate.
1900  */
1901 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1902 {
1903 	int has_dumped = 0;
1904 	mm_segment_t fs;
1905 	int segs;
1906 	size_t size = 0;
1907 	struct vm_area_struct *vma, *gate_vma;
1908 	struct elfhdr *elf = NULL;
1909 	loff_t offset = 0, dataoff, foffset;
1910 	unsigned long mm_flags;
1911 	struct elf_note_info info;
1912 
1913 	/*
1914 	 * We no longer stop all VM operations.
1915 	 *
1916 	 * This is because those proceses that could possibly change map_count
1917 	 * or the mmap / vma pages are now blocked in do_exit on current
1918 	 * finishing this core dump.
1919 	 *
1920 	 * Only ptrace can touch these memory addresses, but it doesn't change
1921 	 * the map_count or the pages allocated. So no possibility of crashing
1922 	 * exists while dumping the mm->vm_next areas to the core file.
1923 	 */
1924 
1925 	/* alloc memory for large data structures: too large to be on stack */
1926 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1927 	if (!elf)
1928 		goto cleanup;
1929 
1930 	segs = current->mm->map_count;
1931 #ifdef ELF_CORE_EXTRA_PHDRS
1932 	segs += ELF_CORE_EXTRA_PHDRS;
1933 #endif
1934 
1935 	gate_vma = get_gate_vma(current);
1936 	if (gate_vma != NULL)
1937 		segs++;
1938 
1939 	/*
1940 	 * Collect all the non-memory information about the process for the
1941 	 * notes.  This also sets up the file header.
1942 	 */
1943 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1944 			    &info, signr, regs))
1945 		goto cleanup;
1946 
1947 	has_dumped = 1;
1948 	current->flags |= PF_DUMPCORE;
1949 
1950 	fs = get_fs();
1951 	set_fs(KERNEL_DS);
1952 
1953 	DUMP_WRITE(elf, sizeof(*elf));
1954 	offset += sizeof(*elf);				/* Elf header */
1955 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1956 	foffset = offset;
1957 
1958 	/* Write notes phdr entry */
1959 	{
1960 		struct elf_phdr phdr;
1961 		size_t sz = get_note_info_size(&info);
1962 
1963 		sz += elf_coredump_extra_notes_size();
1964 
1965 		fill_elf_note_phdr(&phdr, sz, offset);
1966 		offset += sz;
1967 		DUMP_WRITE(&phdr, sizeof(phdr));
1968 	}
1969 
1970 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1971 
1972 	/*
1973 	 * We must use the same mm->flags while dumping core to avoid
1974 	 * inconsistency between the program headers and bodies, otherwise an
1975 	 * unusable core file can be generated.
1976 	 */
1977 	mm_flags = current->mm->flags;
1978 
1979 	/* Write program headers for segments dump */
1980 	for (vma = first_vma(current, gate_vma); vma != NULL;
1981 			vma = next_vma(vma, gate_vma)) {
1982 		struct elf_phdr phdr;
1983 
1984 		phdr.p_type = PT_LOAD;
1985 		phdr.p_offset = offset;
1986 		phdr.p_vaddr = vma->vm_start;
1987 		phdr.p_paddr = 0;
1988 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1989 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1990 		offset += phdr.p_filesz;
1991 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1992 		if (vma->vm_flags & VM_WRITE)
1993 			phdr.p_flags |= PF_W;
1994 		if (vma->vm_flags & VM_EXEC)
1995 			phdr.p_flags |= PF_X;
1996 		phdr.p_align = ELF_EXEC_PAGESIZE;
1997 
1998 		DUMP_WRITE(&phdr, sizeof(phdr));
1999 	}
2000 
2001 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2002 	ELF_CORE_WRITE_EXTRA_PHDRS;
2003 #endif
2004 
2005  	/* write out the notes section */
2006 	if (!write_note_info(&info, file, &foffset))
2007 		goto end_coredump;
2008 
2009 	if (elf_coredump_extra_notes_write(file, &foffset))
2010 		goto end_coredump;
2011 
2012 	/* Align to page */
2013 	DUMP_SEEK(dataoff - foffset);
2014 
2015 	for (vma = first_vma(current, gate_vma); vma != NULL;
2016 			vma = next_vma(vma, gate_vma)) {
2017 		unsigned long addr;
2018 		unsigned long end;
2019 
2020 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2021 
2022 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2023 			struct page *page;
2024 			struct vm_area_struct *vma;
2025 
2026 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2027 						&page, &vma) <= 0) {
2028 				DUMP_SEEK(PAGE_SIZE);
2029 			} else {
2030 				if (page == ZERO_PAGE(0)) {
2031 					if (!dump_seek(file, PAGE_SIZE)) {
2032 						page_cache_release(page);
2033 						goto end_coredump;
2034 					}
2035 				} else {
2036 					void *kaddr;
2037 					flush_cache_page(vma, addr,
2038 							 page_to_pfn(page));
2039 					kaddr = kmap(page);
2040 					if ((size += PAGE_SIZE) > limit ||
2041 					    !dump_write(file, kaddr,
2042 					    PAGE_SIZE)) {
2043 						kunmap(page);
2044 						page_cache_release(page);
2045 						goto end_coredump;
2046 					}
2047 					kunmap(page);
2048 				}
2049 				page_cache_release(page);
2050 			}
2051 		}
2052 	}
2053 
2054 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2055 	ELF_CORE_WRITE_EXTRA_DATA;
2056 #endif
2057 
2058 end_coredump:
2059 	set_fs(fs);
2060 
2061 cleanup:
2062 	kfree(elf);
2063 	free_note_info(&info);
2064 	return has_dumped;
2065 }
2066 
2067 #endif		/* USE_ELF_CORE_DUMP */
2068 
2069 static int __init init_elf_binfmt(void)
2070 {
2071 	return register_binfmt(&elf_format);
2072 }
2073 
2074 static void __exit exit_elf_binfmt(void)
2075 {
2076 	/* Remove the COFF and ELF loaders. */
2077 	unregister_binfmt(&elf_format);
2078 }
2079 
2080 core_initcall(init_elf_binfmt);
2081 module_exit(exit_elf_binfmt);
2082 MODULE_LICENSE("GPL");
2083