xref: /openbmc/linux/fs/binfmt_elf.c (revision b68e31d0)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 #ifndef elf_addr_t
51 #define elf_addr_t unsigned long
52 #endif
53 
54 /*
55  * If we don't support core dumping, then supply a NULL so we
56  * don't even try.
57  */
58 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
59 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
60 #else
61 #define elf_core_dump	NULL
62 #endif
63 
64 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
65 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
66 #else
67 #define ELF_MIN_ALIGN	PAGE_SIZE
68 #endif
69 
70 #ifndef ELF_CORE_EFLAGS
71 #define ELF_CORE_EFLAGS	0
72 #endif
73 
74 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
75 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
76 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
77 
78 static struct linux_binfmt elf_format = {
79 		.module		= THIS_MODULE,
80 		.load_binary	= load_elf_binary,
81 		.load_shlib	= load_elf_library,
82 		.core_dump	= elf_core_dump,
83 		.min_coredump	= ELF_EXEC_PAGESIZE
84 };
85 
86 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
87 
88 static int set_brk(unsigned long start, unsigned long end)
89 {
90 	start = ELF_PAGEALIGN(start);
91 	end = ELF_PAGEALIGN(end);
92 	if (end > start) {
93 		unsigned long addr;
94 		down_write(&current->mm->mmap_sem);
95 		addr = do_brk(start, end - start);
96 		up_write(&current->mm->mmap_sem);
97 		if (BAD_ADDR(addr))
98 			return addr;
99 	}
100 	current->mm->start_brk = current->mm->brk = end;
101 	return 0;
102 }
103 
104 /* We need to explicitly zero any fractional pages
105    after the data section (i.e. bss).  This would
106    contain the junk from the file that should not
107    be in memory
108  */
109 static int padzero(unsigned long elf_bss)
110 {
111 	unsigned long nbyte;
112 
113 	nbyte = ELF_PAGEOFFSET(elf_bss);
114 	if (nbyte) {
115 		nbyte = ELF_MIN_ALIGN - nbyte;
116 		if (clear_user((void __user *) elf_bss, nbyte))
117 			return -EFAULT;
118 	}
119 	return 0;
120 }
121 
122 /* Let's use some macros to make this stack manipulation a litle clearer */
123 #ifdef CONFIG_STACK_GROWSUP
124 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
125 #define STACK_ROUND(sp, items) \
126 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
127 #define STACK_ALLOC(sp, len) ({ \
128 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
129 	old_sp; })
130 #else
131 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
132 #define STACK_ROUND(sp, items) \
133 	(((unsigned long) (sp - items)) &~ 15UL)
134 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
135 #endif
136 
137 static int
138 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
139 		int interp_aout, unsigned long load_addr,
140 		unsigned long interp_load_addr)
141 {
142 	unsigned long p = bprm->p;
143 	int argc = bprm->argc;
144 	int envc = bprm->envc;
145 	elf_addr_t __user *argv;
146 	elf_addr_t __user *envp;
147 	elf_addr_t __user *sp;
148 	elf_addr_t __user *u_platform;
149 	const char *k_platform = ELF_PLATFORM;
150 	int items;
151 	elf_addr_t *elf_info;
152 	int ei_index = 0;
153 	struct task_struct *tsk = current;
154 
155 	/*
156 	 * If this architecture has a platform capability string, copy it
157 	 * to userspace.  In some cases (Sparc), this info is impossible
158 	 * for userspace to get any other way, in others (i386) it is
159 	 * merely difficult.
160 	 */
161 	u_platform = NULL;
162 	if (k_platform) {
163 		size_t len = strlen(k_platform) + 1;
164 
165 		/*
166 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 		 * evictions by the processes running on the same package. One
168 		 * thing we can do is to shuffle the initial stack for them.
169 		 */
170 
171 		p = arch_align_stack(p);
172 
173 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
174 		if (__copy_to_user(u_platform, k_platform, len))
175 			return -EFAULT;
176 	}
177 
178 	/* Create the ELF interpreter info */
179 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
180 #define NEW_AUX_ENT(id, val) \
181 	do { \
182 		elf_info[ei_index++] = id; \
183 		elf_info[ei_index++] = val; \
184 	} while (0)
185 
186 #ifdef ARCH_DLINFO
187 	/*
188 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
189 	 * AUXV.
190 	 */
191 	ARCH_DLINFO;
192 #endif
193 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
200 	NEW_AUX_ENT(AT_FLAGS, 0);
201 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202 	NEW_AUX_ENT(AT_UID, tsk->uid);
203 	NEW_AUX_ENT(AT_EUID, tsk->euid);
204 	NEW_AUX_ENT(AT_GID, tsk->gid);
205 	NEW_AUX_ENT(AT_EGID, tsk->egid);
206  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207 	if (k_platform) {
208 		NEW_AUX_ENT(AT_PLATFORM,
209 			    (elf_addr_t)(unsigned long)u_platform);
210 	}
211 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213 	}
214 #undef NEW_AUX_ENT
215 	/* AT_NULL is zero; clear the rest too */
216 	memset(&elf_info[ei_index], 0,
217 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
218 
219 	/* And advance past the AT_NULL entry.  */
220 	ei_index += 2;
221 
222 	sp = STACK_ADD(p, ei_index);
223 
224 	items = (argc + 1) + (envc + 1);
225 	if (interp_aout) {
226 		items += 3; /* a.out interpreters require argv & envp too */
227 	} else {
228 		items += 1; /* ELF interpreters only put argc on the stack */
229 	}
230 	bprm->p = STACK_ROUND(sp, items);
231 
232 	/* Point sp at the lowest address on the stack */
233 #ifdef CONFIG_STACK_GROWSUP
234 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
235 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
236 #else
237 	sp = (elf_addr_t __user *)bprm->p;
238 #endif
239 
240 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
241 	if (__put_user(argc, sp++))
242 		return -EFAULT;
243 	if (interp_aout) {
244 		argv = sp + 2;
245 		envp = argv + argc + 1;
246 		__put_user((elf_addr_t)(unsigned long)argv, sp++);
247 		__put_user((elf_addr_t)(unsigned long)envp, sp++);
248 	} else {
249 		argv = sp;
250 		envp = argv + argc + 1;
251 	}
252 
253 	/* Populate argv and envp */
254 	p = current->mm->arg_end = current->mm->arg_start;
255 	while (argc-- > 0) {
256 		size_t len;
257 		__put_user((elf_addr_t)p, argv++);
258 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
259 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
260 			return 0;
261 		p += len;
262 	}
263 	if (__put_user(0, argv))
264 		return -EFAULT;
265 	current->mm->arg_end = current->mm->env_start = p;
266 	while (envc-- > 0) {
267 		size_t len;
268 		__put_user((elf_addr_t)p, envp++);
269 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 			return 0;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type)
289 {
290 	unsigned long map_addr;
291 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
292 
293 	down_write(&current->mm->mmap_sem);
294 	/* mmap() will return -EINVAL if given a zero size, but a
295 	 * segment with zero filesize is perfectly valid */
296 	if (eppnt->p_filesz + pageoffset)
297 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 				   eppnt->p_filesz + pageoffset, prot, type,
299 				   eppnt->p_offset - pageoffset);
300 	else
301 		map_addr = ELF_PAGESTART(addr);
302 	up_write(&current->mm->mmap_sem);
303 	return(map_addr);
304 }
305 
306 #endif /* !elf_map */
307 
308 /* This is much more generalized than the library routine read function,
309    so we keep this separate.  Technically the library read function
310    is only provided so that we can read a.out libraries that have
311    an ELF header */
312 
313 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314 		struct file *interpreter, unsigned long *interp_load_addr)
315 {
316 	struct elf_phdr *elf_phdata;
317 	struct elf_phdr *eppnt;
318 	unsigned long load_addr = 0;
319 	int load_addr_set = 0;
320 	unsigned long last_bss = 0, elf_bss = 0;
321 	unsigned long error = ~0UL;
322 	int retval, i, size;
323 
324 	/* First of all, some simple consistency checks */
325 	if (interp_elf_ex->e_type != ET_EXEC &&
326 	    interp_elf_ex->e_type != ET_DYN)
327 		goto out;
328 	if (!elf_check_arch(interp_elf_ex))
329 		goto out;
330 	if (!interpreter->f_op || !interpreter->f_op->mmap)
331 		goto out;
332 
333 	/*
334 	 * If the size of this structure has changed, then punt, since
335 	 * we will be doing the wrong thing.
336 	 */
337 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
338 		goto out;
339 	if (interp_elf_ex->e_phnum < 1 ||
340 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
341 		goto out;
342 
343 	/* Now read in all of the header information */
344 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
345 	if (size > ELF_MIN_ALIGN)
346 		goto out;
347 	elf_phdata = kmalloc(size, GFP_KERNEL);
348 	if (!elf_phdata)
349 		goto out;
350 
351 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
352 			     (char *)elf_phdata,size);
353 	error = -EIO;
354 	if (retval != size) {
355 		if (retval < 0)
356 			error = retval;
357 		goto out_close;
358 	}
359 
360 	eppnt = elf_phdata;
361 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
362 		if (eppnt->p_type == PT_LOAD) {
363 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
364 			int elf_prot = 0;
365 			unsigned long vaddr = 0;
366 			unsigned long k, map_addr;
367 
368 			if (eppnt->p_flags & PF_R)
369 		    		elf_prot = PROT_READ;
370 			if (eppnt->p_flags & PF_W)
371 				elf_prot |= PROT_WRITE;
372 			if (eppnt->p_flags & PF_X)
373 				elf_prot |= PROT_EXEC;
374 			vaddr = eppnt->p_vaddr;
375 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
376 				elf_type |= MAP_FIXED;
377 
378 			map_addr = elf_map(interpreter, load_addr + vaddr,
379 					   eppnt, elf_prot, elf_type);
380 			error = map_addr;
381 			if (BAD_ADDR(map_addr))
382 				goto out_close;
383 
384 			if (!load_addr_set &&
385 			    interp_elf_ex->e_type == ET_DYN) {
386 				load_addr = map_addr - ELF_PAGESTART(vaddr);
387 				load_addr_set = 1;
388 			}
389 
390 			/*
391 			 * Check to see if the section's size will overflow the
392 			 * allowed task size. Note that p_filesz must always be
393 			 * <= p_memsize so it's only necessary to check p_memsz.
394 			 */
395 			k = load_addr + eppnt->p_vaddr;
396 			if (BAD_ADDR(k) ||
397 			    eppnt->p_filesz > eppnt->p_memsz ||
398 			    eppnt->p_memsz > TASK_SIZE ||
399 			    TASK_SIZE - eppnt->p_memsz < k) {
400 				error = -ENOMEM;
401 				goto out_close;
402 			}
403 
404 			/*
405 			 * Find the end of the file mapping for this phdr, and
406 			 * keep track of the largest address we see for this.
407 			 */
408 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
409 			if (k > elf_bss)
410 				elf_bss = k;
411 
412 			/*
413 			 * Do the same thing for the memory mapping - between
414 			 * elf_bss and last_bss is the bss section.
415 			 */
416 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
417 			if (k > last_bss)
418 				last_bss = k;
419 		}
420 	}
421 
422 	/*
423 	 * Now fill out the bss section.  First pad the last page up
424 	 * to the page boundary, and then perform a mmap to make sure
425 	 * that there are zero-mapped pages up to and including the
426 	 * last bss page.
427 	 */
428 	if (padzero(elf_bss)) {
429 		error = -EFAULT;
430 		goto out_close;
431 	}
432 
433 	/* What we have mapped so far */
434 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
435 
436 	/* Map the last of the bss segment */
437 	if (last_bss > elf_bss) {
438 		down_write(&current->mm->mmap_sem);
439 		error = do_brk(elf_bss, last_bss - elf_bss);
440 		up_write(&current->mm->mmap_sem);
441 		if (BAD_ADDR(error))
442 			goto out_close;
443 	}
444 
445 	*interp_load_addr = load_addr;
446 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
447 
448 out_close:
449 	kfree(elf_phdata);
450 out:
451 	return error;
452 }
453 
454 static unsigned long load_aout_interp(struct exec *interp_ex,
455 		struct file *interpreter)
456 {
457 	unsigned long text_data, elf_entry = ~0UL;
458 	char __user * addr;
459 	loff_t offset;
460 
461 	current->mm->end_code = interp_ex->a_text;
462 	text_data = interp_ex->a_text + interp_ex->a_data;
463 	current->mm->end_data = text_data;
464 	current->mm->brk = interp_ex->a_bss + text_data;
465 
466 	switch (N_MAGIC(*interp_ex)) {
467 	case OMAGIC:
468 		offset = 32;
469 		addr = (char __user *)0;
470 		break;
471 	case ZMAGIC:
472 	case QMAGIC:
473 		offset = N_TXTOFF(*interp_ex);
474 		addr = (char __user *)N_TXTADDR(*interp_ex);
475 		break;
476 	default:
477 		goto out;
478 	}
479 
480 	down_write(&current->mm->mmap_sem);
481 	do_brk(0, text_data);
482 	up_write(&current->mm->mmap_sem);
483 	if (!interpreter->f_op || !interpreter->f_op->read)
484 		goto out;
485 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
486 		goto out;
487 	flush_icache_range((unsigned long)addr,
488 	                   (unsigned long)addr + text_data);
489 
490 	down_write(&current->mm->mmap_sem);
491 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
492 		interp_ex->a_bss);
493 	up_write(&current->mm->mmap_sem);
494 	elf_entry = interp_ex->a_entry;
495 
496 out:
497 	return elf_entry;
498 }
499 
500 /*
501  * These are the functions used to load ELF style executables and shared
502  * libraries.  There is no binary dependent code anywhere else.
503  */
504 
505 #define INTERPRETER_NONE 0
506 #define INTERPRETER_AOUT 1
507 #define INTERPRETER_ELF 2
508 
509 #ifndef STACK_RND_MASK
510 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
511 #endif
512 
513 static unsigned long randomize_stack_top(unsigned long stack_top)
514 {
515 	unsigned int random_variable = 0;
516 
517 	if ((current->flags & PF_RANDOMIZE) &&
518 		!(current->personality & ADDR_NO_RANDOMIZE)) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
712 
713 	/* Some simple consistency checks for the interpreter */
714 	if (elf_interpreter) {
715 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
716 
717 		/* Now figure out which format our binary is */
718 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
719 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
720 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
721 			interpreter_type = INTERPRETER_ELF;
722 
723 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
724 			interpreter_type &= ~INTERPRETER_ELF;
725 
726 		retval = -ELIBBAD;
727 		if (!interpreter_type)
728 			goto out_free_dentry;
729 
730 		/* Make sure only one type was selected */
731 		if ((interpreter_type & INTERPRETER_ELF) &&
732 		     interpreter_type != INTERPRETER_ELF) {
733 	     		// FIXME - ratelimit this before re-enabling
734 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
735 			interpreter_type = INTERPRETER_ELF;
736 		}
737 		/* Verify the interpreter has a valid arch */
738 		if ((interpreter_type == INTERPRETER_ELF) &&
739 		    !elf_check_arch(&loc->interp_elf_ex))
740 			goto out_free_dentry;
741 	} else {
742 		/* Executables without an interpreter also need a personality  */
743 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
744 	}
745 
746 	/* OK, we are done with that, now set up the arg stuff,
747 	   and then start this sucker up */
748 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
749 		char *passed_p = passed_fileno;
750 		sprintf(passed_fileno, "%d", elf_exec_fileno);
751 
752 		if (elf_interpreter) {
753 			retval = copy_strings_kernel(1, &passed_p, bprm);
754 			if (retval)
755 				goto out_free_dentry;
756 			bprm->argc++;
757 		}
758 	}
759 
760 	/* Flush all traces of the currently running executable */
761 	retval = flush_old_exec(bprm);
762 	if (retval)
763 		goto out_free_dentry;
764 
765 	/* Discard our unneeded old files struct */
766 	if (files) {
767 		put_files_struct(files);
768 		files = NULL;
769 	}
770 
771 	/* OK, This is the point of no return */
772 	current->mm->start_data = 0;
773 	current->mm->end_data = 0;
774 	current->mm->end_code = 0;
775 	current->mm->mmap = NULL;
776 	current->flags &= ~PF_FORKNOEXEC;
777 	current->mm->def_flags = def_flags;
778 
779 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
780 	   may depend on the personality.  */
781 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
782 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
783 		current->personality |= READ_IMPLIES_EXEC;
784 
785 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
786 		current->flags |= PF_RANDOMIZE;
787 	arch_pick_mmap_layout(current->mm);
788 
789 	/* Do this so that we can load the interpreter, if need be.  We will
790 	   change some of these later */
791 	current->mm->free_area_cache = current->mm->mmap_base;
792 	current->mm->cached_hole_size = 0;
793 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
794 				 executable_stack);
795 	if (retval < 0) {
796 		send_sig(SIGKILL, current, 0);
797 		goto out_free_dentry;
798 	}
799 
800 	current->mm->start_stack = bprm->p;
801 
802 	/* Now we do a little grungy work by mmaping the ELF image into
803 	   the correct location in memory.  At this point, we assume that
804 	   the image should be loaded at fixed address, not at a variable
805 	   address. */
806 	for(i = 0, elf_ppnt = elf_phdata;
807 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
808 		int elf_prot = 0, elf_flags;
809 		unsigned long k, vaddr;
810 
811 		if (elf_ppnt->p_type != PT_LOAD)
812 			continue;
813 
814 		if (unlikely (elf_brk > elf_bss)) {
815 			unsigned long nbyte;
816 
817 			/* There was a PT_LOAD segment with p_memsz > p_filesz
818 			   before this one. Map anonymous pages, if needed,
819 			   and clear the area.  */
820 			retval = set_brk (elf_bss + load_bias,
821 					  elf_brk + load_bias);
822 			if (retval) {
823 				send_sig(SIGKILL, current, 0);
824 				goto out_free_dentry;
825 			}
826 			nbyte = ELF_PAGEOFFSET(elf_bss);
827 			if (nbyte) {
828 				nbyte = ELF_MIN_ALIGN - nbyte;
829 				if (nbyte > elf_brk - elf_bss)
830 					nbyte = elf_brk - elf_bss;
831 				if (clear_user((void __user *)elf_bss +
832 							load_bias, nbyte)) {
833 					/*
834 					 * This bss-zeroing can fail if the ELF
835 					 * file specifies odd protections. So
836 					 * we don't check the return value
837 					 */
838 				}
839 			}
840 		}
841 
842 		if (elf_ppnt->p_flags & PF_R)
843 			elf_prot |= PROT_READ;
844 		if (elf_ppnt->p_flags & PF_W)
845 			elf_prot |= PROT_WRITE;
846 		if (elf_ppnt->p_flags & PF_X)
847 			elf_prot |= PROT_EXEC;
848 
849 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
850 
851 		vaddr = elf_ppnt->p_vaddr;
852 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
853 			elf_flags |= MAP_FIXED;
854 		} else if (loc->elf_ex.e_type == ET_DYN) {
855 			/* Try and get dynamic programs out of the way of the
856 			 * default mmap base, as well as whatever program they
857 			 * might try to exec.  This is because the brk will
858 			 * follow the loader, and is not movable.  */
859 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
860 		}
861 
862 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
863 				elf_prot, elf_flags);
864 		if (BAD_ADDR(error)) {
865 			send_sig(SIGKILL, current, 0);
866 			goto out_free_dentry;
867 		}
868 
869 		if (!load_addr_set) {
870 			load_addr_set = 1;
871 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
872 			if (loc->elf_ex.e_type == ET_DYN) {
873 				load_bias += error -
874 				             ELF_PAGESTART(load_bias + vaddr);
875 				load_addr += load_bias;
876 				reloc_func_desc = load_bias;
877 			}
878 		}
879 		k = elf_ppnt->p_vaddr;
880 		if (k < start_code)
881 			start_code = k;
882 		if (start_data < k)
883 			start_data = k;
884 
885 		/*
886 		 * Check to see if the section's size will overflow the
887 		 * allowed task size. Note that p_filesz must always be
888 		 * <= p_memsz so it is only necessary to check p_memsz.
889 		 */
890 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 		    elf_ppnt->p_memsz > TASK_SIZE ||
892 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
893 			/* set_brk can never work. Avoid overflows. */
894 			send_sig(SIGKILL, current, 0);
895 			goto out_free_dentry;
896 		}
897 
898 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
899 
900 		if (k > elf_bss)
901 			elf_bss = k;
902 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
903 			end_code = k;
904 		if (end_data < k)
905 			end_data = k;
906 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
907 		if (k > elf_brk)
908 			elf_brk = k;
909 	}
910 
911 	loc->elf_ex.e_entry += load_bias;
912 	elf_bss += load_bias;
913 	elf_brk += load_bias;
914 	start_code += load_bias;
915 	end_code += load_bias;
916 	start_data += load_bias;
917 	end_data += load_bias;
918 
919 	/* Calling set_brk effectively mmaps the pages that we need
920 	 * for the bss and break sections.  We must do this before
921 	 * mapping in the interpreter, to make sure it doesn't wind
922 	 * up getting placed where the bss needs to go.
923 	 */
924 	retval = set_brk(elf_bss, elf_brk);
925 	if (retval) {
926 		send_sig(SIGKILL, current, 0);
927 		goto out_free_dentry;
928 	}
929 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
930 		send_sig(SIGSEGV, current, 0);
931 		retval = -EFAULT; /* Nobody gets to see this, but.. */
932 		goto out_free_dentry;
933 	}
934 
935 	if (elf_interpreter) {
936 		if (interpreter_type == INTERPRETER_AOUT)
937 			elf_entry = load_aout_interp(&loc->interp_ex,
938 						     interpreter);
939 		else
940 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
941 						    interpreter,
942 						    &interp_load_addr);
943 		if (BAD_ADDR(elf_entry)) {
944 			force_sig(SIGSEGV, current);
945 			retval = IS_ERR((void *)elf_entry) ?
946 					(int)elf_entry : -EINVAL;
947 			goto out_free_dentry;
948 		}
949 		reloc_func_desc = interp_load_addr;
950 
951 		allow_write_access(interpreter);
952 		fput(interpreter);
953 		kfree(elf_interpreter);
954 	} else {
955 		elf_entry = loc->elf_ex.e_entry;
956 		if (BAD_ADDR(elf_entry)) {
957 			force_sig(SIGSEGV, current);
958 			retval = -EINVAL;
959 			goto out_free_dentry;
960 		}
961 	}
962 
963 	kfree(elf_phdata);
964 
965 	if (interpreter_type != INTERPRETER_AOUT)
966 		sys_close(elf_exec_fileno);
967 
968 	set_binfmt(&elf_format);
969 
970 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
971 	retval = arch_setup_additional_pages(bprm, executable_stack);
972 	if (retval < 0) {
973 		send_sig(SIGKILL, current, 0);
974 		goto out;
975 	}
976 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
977 
978 	compute_creds(bprm);
979 	current->flags &= ~PF_FORKNOEXEC;
980 	create_elf_tables(bprm, &loc->elf_ex,
981 			  (interpreter_type == INTERPRETER_AOUT),
982 			  load_addr, interp_load_addr);
983 	/* N.B. passed_fileno might not be initialized? */
984 	if (interpreter_type == INTERPRETER_AOUT)
985 		current->mm->arg_start += strlen(passed_fileno) + 1;
986 	current->mm->end_code = end_code;
987 	current->mm->start_code = start_code;
988 	current->mm->start_data = start_data;
989 	current->mm->end_data = end_data;
990 	current->mm->start_stack = bprm->p;
991 
992 	if (current->personality & MMAP_PAGE_ZERO) {
993 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
994 		   and some applications "depend" upon this behavior.
995 		   Since we do not have the power to recompile these, we
996 		   emulate the SVr4 behavior. Sigh. */
997 		down_write(&current->mm->mmap_sem);
998 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
999 				MAP_FIXED | MAP_PRIVATE, 0);
1000 		up_write(&current->mm->mmap_sem);
1001 	}
1002 
1003 #ifdef ELF_PLAT_INIT
1004 	/*
1005 	 * The ABI may specify that certain registers be set up in special
1006 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1007 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1008 	 * that the e_entry field is the address of the function descriptor
1009 	 * for the startup routine, rather than the address of the startup
1010 	 * routine itself.  This macro performs whatever initialization to
1011 	 * the regs structure is required as well as any relocations to the
1012 	 * function descriptor entries when executing dynamically links apps.
1013 	 */
1014 	ELF_PLAT_INIT(regs, reloc_func_desc);
1015 #endif
1016 
1017 	start_thread(regs, elf_entry, bprm->p);
1018 	if (unlikely(current->ptrace & PT_PTRACED)) {
1019 		if (current->ptrace & PT_TRACE_EXEC)
1020 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1021 		else
1022 			send_sig(SIGTRAP, current, 0);
1023 	}
1024 	retval = 0;
1025 out:
1026 	kfree(loc);
1027 out_ret:
1028 	return retval;
1029 
1030 	/* error cleanup */
1031 out_free_dentry:
1032 	allow_write_access(interpreter);
1033 	if (interpreter)
1034 		fput(interpreter);
1035 out_free_interp:
1036 	kfree(elf_interpreter);
1037 out_free_file:
1038 	sys_close(elf_exec_fileno);
1039 out_free_fh:
1040 	if (files)
1041 		reset_files_struct(current, files);
1042 out_free_ph:
1043 	kfree(elf_phdata);
1044 	goto out;
1045 }
1046 
1047 /* This is really simpleminded and specialized - we are loading an
1048    a.out library that is given an ELF header. */
1049 static int load_elf_library(struct file *file)
1050 {
1051 	struct elf_phdr *elf_phdata;
1052 	struct elf_phdr *eppnt;
1053 	unsigned long elf_bss, bss, len;
1054 	int retval, error, i, j;
1055 	struct elfhdr elf_ex;
1056 
1057 	error = -ENOEXEC;
1058 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1059 	if (retval != sizeof(elf_ex))
1060 		goto out;
1061 
1062 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1063 		goto out;
1064 
1065 	/* First of all, some simple consistency checks */
1066 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1067 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1068 		goto out;
1069 
1070 	/* Now read in all of the header information */
1071 
1072 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1073 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1074 
1075 	error = -ENOMEM;
1076 	elf_phdata = kmalloc(j, GFP_KERNEL);
1077 	if (!elf_phdata)
1078 		goto out;
1079 
1080 	eppnt = elf_phdata;
1081 	error = -ENOEXEC;
1082 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1083 	if (retval != j)
1084 		goto out_free_ph;
1085 
1086 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1087 		if ((eppnt + i)->p_type == PT_LOAD)
1088 			j++;
1089 	if (j != 1)
1090 		goto out_free_ph;
1091 
1092 	while (eppnt->p_type != PT_LOAD)
1093 		eppnt++;
1094 
1095 	/* Now use mmap to map the library into memory. */
1096 	down_write(&current->mm->mmap_sem);
1097 	error = do_mmap(file,
1098 			ELF_PAGESTART(eppnt->p_vaddr),
1099 			(eppnt->p_filesz +
1100 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1101 			PROT_READ | PROT_WRITE | PROT_EXEC,
1102 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1103 			(eppnt->p_offset -
1104 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1105 	up_write(&current->mm->mmap_sem);
1106 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1107 		goto out_free_ph;
1108 
1109 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1110 	if (padzero(elf_bss)) {
1111 		error = -EFAULT;
1112 		goto out_free_ph;
1113 	}
1114 
1115 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1116 			    ELF_MIN_ALIGN - 1);
1117 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1118 	if (bss > len) {
1119 		down_write(&current->mm->mmap_sem);
1120 		do_brk(len, bss - len);
1121 		up_write(&current->mm->mmap_sem);
1122 	}
1123 	error = 0;
1124 
1125 out_free_ph:
1126 	kfree(elf_phdata);
1127 out:
1128 	return error;
1129 }
1130 
1131 /*
1132  * Note that some platforms still use traditional core dumps and not
1133  * the ELF core dump.  Each platform can select it as appropriate.
1134  */
1135 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1136 
1137 /*
1138  * ELF core dumper
1139  *
1140  * Modelled on fs/exec.c:aout_core_dump()
1141  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1142  */
1143 /*
1144  * These are the only things you should do on a core-file: use only these
1145  * functions to write out all the necessary info.
1146  */
1147 static int dump_write(struct file *file, const void *addr, int nr)
1148 {
1149 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1150 }
1151 
1152 static int dump_seek(struct file *file, loff_t off)
1153 {
1154 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1155 		if (file->f_op->llseek(file, off, 1) != off)
1156 			return 0;
1157 	} else {
1158 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1159 		if (!buf)
1160 			return 0;
1161 		while (off > 0) {
1162 			unsigned long n = off;
1163 			if (n > PAGE_SIZE)
1164 				n = PAGE_SIZE;
1165 			if (!dump_write(file, buf, n))
1166 				return 0;
1167 			off -= n;
1168 		}
1169 		free_page((unsigned long)buf);
1170 	}
1171 	return 1;
1172 }
1173 
1174 /*
1175  * Decide whether a segment is worth dumping; default is yes to be
1176  * sure (missing info is worse than too much; etc).
1177  * Personally I'd include everything, and use the coredump limit...
1178  *
1179  * I think we should skip something. But I am not sure how. H.J.
1180  */
1181 static int maydump(struct vm_area_struct *vma)
1182 {
1183 	/* Do not dump I/O mapped devices or special mappings */
1184 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1185 		return 0;
1186 
1187 	/* Dump shared memory only if mapped from an anonymous file. */
1188 	if (vma->vm_flags & VM_SHARED)
1189 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1190 
1191 	/* If it hasn't been written to, don't write it out */
1192 	if (!vma->anon_vma)
1193 		return 0;
1194 
1195 	return 1;
1196 }
1197 
1198 /* An ELF note in memory */
1199 struct memelfnote
1200 {
1201 	const char *name;
1202 	int type;
1203 	unsigned int datasz;
1204 	void *data;
1205 };
1206 
1207 static int notesize(struct memelfnote *en)
1208 {
1209 	int sz;
1210 
1211 	sz = sizeof(struct elf_note);
1212 	sz += roundup(strlen(en->name) + 1, 4);
1213 	sz += roundup(en->datasz, 4);
1214 
1215 	return sz;
1216 }
1217 
1218 #define DUMP_WRITE(addr, nr, foffset)	\
1219 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1220 
1221 static int alignfile(struct file *file, loff_t *foffset)
1222 {
1223 	char buf[4] = { 0, };
1224 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1225 	return 1;
1226 }
1227 
1228 static int writenote(struct memelfnote *men, struct file *file,
1229 			loff_t *foffset)
1230 {
1231 	struct elf_note en;
1232 	en.n_namesz = strlen(men->name) + 1;
1233 	en.n_descsz = men->datasz;
1234 	en.n_type = men->type;
1235 
1236 	DUMP_WRITE(&en, sizeof(en), foffset);
1237 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1238 	if (!alignfile(file, foffset))
1239 		return 0;
1240 	DUMP_WRITE(men->data, men->datasz, foffset);
1241 	if (!alignfile(file, foffset))
1242 		return 0;
1243 
1244 	return 1;
1245 }
1246 #undef DUMP_WRITE
1247 
1248 #define DUMP_WRITE(addr, nr)	\
1249 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1250 		goto end_coredump;
1251 #define DUMP_SEEK(off)	\
1252 	if (!dump_seek(file, (off))) \
1253 		goto end_coredump;
1254 
1255 static void fill_elf_header(struct elfhdr *elf, int segs)
1256 {
1257 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1258 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1259 	elf->e_ident[EI_DATA] = ELF_DATA;
1260 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1261 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1262 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1263 
1264 	elf->e_type = ET_CORE;
1265 	elf->e_machine = ELF_ARCH;
1266 	elf->e_version = EV_CURRENT;
1267 	elf->e_entry = 0;
1268 	elf->e_phoff = sizeof(struct elfhdr);
1269 	elf->e_shoff = 0;
1270 	elf->e_flags = ELF_CORE_EFLAGS;
1271 	elf->e_ehsize = sizeof(struct elfhdr);
1272 	elf->e_phentsize = sizeof(struct elf_phdr);
1273 	elf->e_phnum = segs;
1274 	elf->e_shentsize = 0;
1275 	elf->e_shnum = 0;
1276 	elf->e_shstrndx = 0;
1277 	return;
1278 }
1279 
1280 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1281 {
1282 	phdr->p_type = PT_NOTE;
1283 	phdr->p_offset = offset;
1284 	phdr->p_vaddr = 0;
1285 	phdr->p_paddr = 0;
1286 	phdr->p_filesz = sz;
1287 	phdr->p_memsz = 0;
1288 	phdr->p_flags = 0;
1289 	phdr->p_align = 0;
1290 	return;
1291 }
1292 
1293 static void fill_note(struct memelfnote *note, const char *name, int type,
1294 		unsigned int sz, void *data)
1295 {
1296 	note->name = name;
1297 	note->type = type;
1298 	note->datasz = sz;
1299 	note->data = data;
1300 	return;
1301 }
1302 
1303 /*
1304  * fill up all the fields in prstatus from the given task struct, except
1305  * registers which need to be filled up separately.
1306  */
1307 static void fill_prstatus(struct elf_prstatus *prstatus,
1308 		struct task_struct *p, long signr)
1309 {
1310 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1311 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1312 	prstatus->pr_sighold = p->blocked.sig[0];
1313 	prstatus->pr_pid = p->pid;
1314 	prstatus->pr_ppid = p->parent->pid;
1315 	prstatus->pr_pgrp = process_group(p);
1316 	prstatus->pr_sid = p->signal->session;
1317 	if (thread_group_leader(p)) {
1318 		/*
1319 		 * This is the record for the group leader.  Add in the
1320 		 * cumulative times of previous dead threads.  This total
1321 		 * won't include the time of each live thread whose state
1322 		 * is included in the core dump.  The final total reported
1323 		 * to our parent process when it calls wait4 will include
1324 		 * those sums as well as the little bit more time it takes
1325 		 * this and each other thread to finish dying after the
1326 		 * core dump synchronization phase.
1327 		 */
1328 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1329 				   &prstatus->pr_utime);
1330 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1331 				   &prstatus->pr_stime);
1332 	} else {
1333 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1334 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1335 	}
1336 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1337 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1338 }
1339 
1340 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1341 		       struct mm_struct *mm)
1342 {
1343 	unsigned int i, len;
1344 
1345 	/* first copy the parameters from user space */
1346 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1347 
1348 	len = mm->arg_end - mm->arg_start;
1349 	if (len >= ELF_PRARGSZ)
1350 		len = ELF_PRARGSZ-1;
1351 	if (copy_from_user(&psinfo->pr_psargs,
1352 		           (const char __user *)mm->arg_start, len))
1353 		return -EFAULT;
1354 	for(i = 0; i < len; i++)
1355 		if (psinfo->pr_psargs[i] == 0)
1356 			psinfo->pr_psargs[i] = ' ';
1357 	psinfo->pr_psargs[len] = 0;
1358 
1359 	psinfo->pr_pid = p->pid;
1360 	psinfo->pr_ppid = p->parent->pid;
1361 	psinfo->pr_pgrp = process_group(p);
1362 	psinfo->pr_sid = p->signal->session;
1363 
1364 	i = p->state ? ffz(~p->state) + 1 : 0;
1365 	psinfo->pr_state = i;
1366 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1367 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1368 	psinfo->pr_nice = task_nice(p);
1369 	psinfo->pr_flag = p->flags;
1370 	SET_UID(psinfo->pr_uid, p->uid);
1371 	SET_GID(psinfo->pr_gid, p->gid);
1372 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1373 
1374 	return 0;
1375 }
1376 
1377 /* Here is the structure in which status of each thread is captured. */
1378 struct elf_thread_status
1379 {
1380 	struct list_head list;
1381 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1382 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1383 	struct task_struct *thread;
1384 #ifdef ELF_CORE_COPY_XFPREGS
1385 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1386 #endif
1387 	struct memelfnote notes[3];
1388 	int num_notes;
1389 };
1390 
1391 /*
1392  * In order to add the specific thread information for the elf file format,
1393  * we need to keep a linked list of every threads pr_status and then create
1394  * a single section for them in the final core file.
1395  */
1396 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1397 {
1398 	int sz = 0;
1399 	struct task_struct *p = t->thread;
1400 	t->num_notes = 0;
1401 
1402 	fill_prstatus(&t->prstatus, p, signr);
1403 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1404 
1405 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1406 		  &(t->prstatus));
1407 	t->num_notes++;
1408 	sz += notesize(&t->notes[0]);
1409 
1410 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1411 								&t->fpu))) {
1412 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1413 			  &(t->fpu));
1414 		t->num_notes++;
1415 		sz += notesize(&t->notes[1]);
1416 	}
1417 
1418 #ifdef ELF_CORE_COPY_XFPREGS
1419 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1420 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1421 			  &t->xfpu);
1422 		t->num_notes++;
1423 		sz += notesize(&t->notes[2]);
1424 	}
1425 #endif
1426 	return sz;
1427 }
1428 
1429 /*
1430  * Actual dumper
1431  *
1432  * This is a two-pass process; first we find the offsets of the bits,
1433  * and then they are actually written out.  If we run out of core limit
1434  * we just truncate.
1435  */
1436 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1437 {
1438 #define	NUM_NOTES	6
1439 	int has_dumped = 0;
1440 	mm_segment_t fs;
1441 	int segs;
1442 	size_t size = 0;
1443 	int i;
1444 	struct vm_area_struct *vma;
1445 	struct elfhdr *elf = NULL;
1446 	loff_t offset = 0, dataoff, foffset;
1447 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1448 	int numnote;
1449 	struct memelfnote *notes = NULL;
1450 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1451 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1452  	struct task_struct *g, *p;
1453  	LIST_HEAD(thread_list);
1454  	struct list_head *t;
1455 	elf_fpregset_t *fpu = NULL;
1456 #ifdef ELF_CORE_COPY_XFPREGS
1457 	elf_fpxregset_t *xfpu = NULL;
1458 #endif
1459 	int thread_status_size = 0;
1460 	elf_addr_t *auxv;
1461 
1462 	/*
1463 	 * We no longer stop all VM operations.
1464 	 *
1465 	 * This is because those proceses that could possibly change map_count
1466 	 * or the mmap / vma pages are now blocked in do_exit on current
1467 	 * finishing this core dump.
1468 	 *
1469 	 * Only ptrace can touch these memory addresses, but it doesn't change
1470 	 * the map_count or the pages allocated. So no possibility of crashing
1471 	 * exists while dumping the mm->vm_next areas to the core file.
1472 	 */
1473 
1474 	/* alloc memory for large data structures: too large to be on stack */
1475 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1476 	if (!elf)
1477 		goto cleanup;
1478 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1479 	if (!prstatus)
1480 		goto cleanup;
1481 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1482 	if (!psinfo)
1483 		goto cleanup;
1484 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1485 	if (!notes)
1486 		goto cleanup;
1487 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1488 	if (!fpu)
1489 		goto cleanup;
1490 #ifdef ELF_CORE_COPY_XFPREGS
1491 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1492 	if (!xfpu)
1493 		goto cleanup;
1494 #endif
1495 
1496 	if (signr) {
1497 		struct elf_thread_status *tmp;
1498 		rcu_read_lock();
1499 		do_each_thread(g,p)
1500 			if (current->mm == p->mm && current != p) {
1501 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1502 				if (!tmp) {
1503 					rcu_read_unlock();
1504 					goto cleanup;
1505 				}
1506 				tmp->thread = p;
1507 				list_add(&tmp->list, &thread_list);
1508 			}
1509 		while_each_thread(g,p);
1510 		rcu_read_unlock();
1511 		list_for_each(t, &thread_list) {
1512 			struct elf_thread_status *tmp;
1513 			int sz;
1514 
1515 			tmp = list_entry(t, struct elf_thread_status, list);
1516 			sz = elf_dump_thread_status(signr, tmp);
1517 			thread_status_size += sz;
1518 		}
1519 	}
1520 	/* now collect the dump for the current */
1521 	memset(prstatus, 0, sizeof(*prstatus));
1522 	fill_prstatus(prstatus, current, signr);
1523 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1524 
1525 	segs = current->mm->map_count;
1526 #ifdef ELF_CORE_EXTRA_PHDRS
1527 	segs += ELF_CORE_EXTRA_PHDRS;
1528 #endif
1529 
1530 	/* Set up header */
1531 	fill_elf_header(elf, segs + 1);	/* including notes section */
1532 
1533 	has_dumped = 1;
1534 	current->flags |= PF_DUMPCORE;
1535 
1536 	/*
1537 	 * Set up the notes in similar form to SVR4 core dumps made
1538 	 * with info from their /proc.
1539 	 */
1540 
1541 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1542 	fill_psinfo(psinfo, current->group_leader, current->mm);
1543 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1544 
1545 	numnote = 2;
1546 
1547 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1548 
1549 	i = 0;
1550 	do
1551 		i += 2;
1552 	while (auxv[i - 2] != AT_NULL);
1553 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1554 		  i * sizeof(elf_addr_t), auxv);
1555 
1556   	/* Try to dump the FPU. */
1557 	if ((prstatus->pr_fpvalid =
1558 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1559 		fill_note(notes + numnote++,
1560 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1561 #ifdef ELF_CORE_COPY_XFPREGS
1562 	if (elf_core_copy_task_xfpregs(current, xfpu))
1563 		fill_note(notes + numnote++,
1564 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1565 #endif
1566 
1567 	fs = get_fs();
1568 	set_fs(KERNEL_DS);
1569 
1570 	DUMP_WRITE(elf, sizeof(*elf));
1571 	offset += sizeof(*elf);				/* Elf header */
1572 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
1573 
1574 	/* Write notes phdr entry */
1575 	{
1576 		struct elf_phdr phdr;
1577 		int sz = 0;
1578 
1579 		for (i = 0; i < numnote; i++)
1580 			sz += notesize(notes + i);
1581 
1582 		sz += thread_status_size;
1583 
1584 		fill_elf_note_phdr(&phdr, sz, offset);
1585 		offset += sz;
1586 		DUMP_WRITE(&phdr, sizeof(phdr));
1587 	}
1588 
1589 	foffset = offset;
1590 
1591 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1592 
1593 	/* Write program headers for segments dump */
1594 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1595 		struct elf_phdr phdr;
1596 		size_t sz;
1597 
1598 		sz = vma->vm_end - vma->vm_start;
1599 
1600 		phdr.p_type = PT_LOAD;
1601 		phdr.p_offset = offset;
1602 		phdr.p_vaddr = vma->vm_start;
1603 		phdr.p_paddr = 0;
1604 		phdr.p_filesz = maydump(vma) ? sz : 0;
1605 		phdr.p_memsz = sz;
1606 		offset += phdr.p_filesz;
1607 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1608 		if (vma->vm_flags & VM_WRITE)
1609 			phdr.p_flags |= PF_W;
1610 		if (vma->vm_flags & VM_EXEC)
1611 			phdr.p_flags |= PF_X;
1612 		phdr.p_align = ELF_EXEC_PAGESIZE;
1613 
1614 		DUMP_WRITE(&phdr, sizeof(phdr));
1615 		foffset += sizeof(phdr);
1616 	}
1617 
1618 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1619 	ELF_CORE_WRITE_EXTRA_PHDRS;
1620 #endif
1621 
1622  	/* write out the notes section */
1623 	for (i = 0; i < numnote; i++)
1624 		if (!writenote(notes + i, file, &foffset))
1625 			goto end_coredump;
1626 
1627 	/* write out the thread status notes section */
1628 	list_for_each(t, &thread_list) {
1629 		struct elf_thread_status *tmp =
1630 				list_entry(t, struct elf_thread_status, list);
1631 
1632 		for (i = 0; i < tmp->num_notes; i++)
1633 			if (!writenote(&tmp->notes[i], file, &foffset))
1634 				goto end_coredump;
1635 	}
1636 
1637 	/* Align to page */
1638 	DUMP_SEEK(dataoff - foffset);
1639 
1640 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1641 		unsigned long addr;
1642 
1643 		if (!maydump(vma))
1644 			continue;
1645 
1646 		for (addr = vma->vm_start;
1647 		     addr < vma->vm_end;
1648 		     addr += PAGE_SIZE) {
1649 			struct page *page;
1650 			struct vm_area_struct *vma;
1651 
1652 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1653 						&page, &vma) <= 0) {
1654 				DUMP_SEEK(PAGE_SIZE);
1655 			} else {
1656 				if (page == ZERO_PAGE(addr)) {
1657 					DUMP_SEEK(PAGE_SIZE);
1658 				} else {
1659 					void *kaddr;
1660 					flush_cache_page(vma, addr,
1661 							 page_to_pfn(page));
1662 					kaddr = kmap(page);
1663 					if ((size += PAGE_SIZE) > limit ||
1664 					    !dump_write(file, kaddr,
1665 					    PAGE_SIZE)) {
1666 						kunmap(page);
1667 						page_cache_release(page);
1668 						goto end_coredump;
1669 					}
1670 					kunmap(page);
1671 				}
1672 				page_cache_release(page);
1673 			}
1674 		}
1675 	}
1676 
1677 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1678 	ELF_CORE_WRITE_EXTRA_DATA;
1679 #endif
1680 
1681 end_coredump:
1682 	set_fs(fs);
1683 
1684 cleanup:
1685 	while (!list_empty(&thread_list)) {
1686 		struct list_head *tmp = thread_list.next;
1687 		list_del(tmp);
1688 		kfree(list_entry(tmp, struct elf_thread_status, list));
1689 	}
1690 
1691 	kfree(elf);
1692 	kfree(prstatus);
1693 	kfree(psinfo);
1694 	kfree(notes);
1695 	kfree(fpu);
1696 #ifdef ELF_CORE_COPY_XFPREGS
1697 	kfree(xfpu);
1698 #endif
1699 	return has_dumped;
1700 #undef NUM_NOTES
1701 }
1702 
1703 #endif		/* USE_ELF_CORE_DUMP */
1704 
1705 static int __init init_elf_binfmt(void)
1706 {
1707 	return register_binfmt(&elf_format);
1708 }
1709 
1710 static void __exit exit_elf_binfmt(void)
1711 {
1712 	/* Remove the COFF and ELF loaders. */
1713 	unregister_binfmt(&elf_format);
1714 }
1715 
1716 core_initcall(init_elf_binfmt);
1717 module_exit(exit_elf_binfmt);
1718 MODULE_LICENSE("GPL");
1719