xref: /openbmc/linux/fs/binfmt_elf.c (revision c21b37f6)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 		int interp_aout, unsigned long load_addr,
137 		unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	const char *k_platform = ELF_PLATFORM;
147 	int items;
148 	elf_addr_t *elf_info;
149 	int ei_index = 0;
150 	struct task_struct *tsk = current;
151 	struct vm_area_struct *vma;
152 
153 	/*
154 	 * If this architecture has a platform capability string, copy it
155 	 * to userspace.  In some cases (Sparc), this info is impossible
156 	 * for userspace to get any other way, in others (i386) it is
157 	 * merely difficult.
158 	 */
159 	u_platform = NULL;
160 	if (k_platform) {
161 		size_t len = strlen(k_platform) + 1;
162 
163 		/*
164 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 		 * evictions by the processes running on the same package. One
166 		 * thing we can do is to shuffle the initial stack for them.
167 		 */
168 
169 		p = arch_align_stack(p);
170 
171 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 		if (__copy_to_user(u_platform, k_platform, len))
173 			return -EFAULT;
174 	}
175 
176 	/* Create the ELF interpreter info */
177 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
178 #define NEW_AUX_ENT(id, val) \
179 	do { \
180 		elf_info[ei_index++] = id; \
181 		elf_info[ei_index++] = val; \
182 	} while (0)
183 
184 #ifdef ARCH_DLINFO
185 	/*
186 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 	 * AUXV.
188 	 */
189 	ARCH_DLINFO;
190 #endif
191 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
195 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
196 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 	NEW_AUX_ENT(AT_FLAGS, 0);
199 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
200 	NEW_AUX_ENT(AT_UID, tsk->uid);
201 	NEW_AUX_ENT(AT_EUID, tsk->euid);
202 	NEW_AUX_ENT(AT_GID, tsk->gid);
203 	NEW_AUX_ENT(AT_EGID, tsk->egid);
204  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
205 	if (k_platform) {
206 		NEW_AUX_ENT(AT_PLATFORM,
207 			    (elf_addr_t)(unsigned long)u_platform);
208 	}
209 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
210 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
211 	}
212 #undef NEW_AUX_ENT
213 	/* AT_NULL is zero; clear the rest too */
214 	memset(&elf_info[ei_index], 0,
215 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216 
217 	/* And advance past the AT_NULL entry.  */
218 	ei_index += 2;
219 
220 	sp = STACK_ADD(p, ei_index);
221 
222 	items = (argc + 1) + (envc + 1);
223 	if (interp_aout) {
224 		items += 3; /* a.out interpreters require argv & envp too */
225 	} else {
226 		items += 1; /* ELF interpreters only put argc on the stack */
227 	}
228 	bprm->p = STACK_ROUND(sp, items);
229 
230 	/* Point sp at the lowest address on the stack */
231 #ifdef CONFIG_STACK_GROWSUP
232 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
233 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
234 #else
235 	sp = (elf_addr_t __user *)bprm->p;
236 #endif
237 
238 
239 	/*
240 	 * Grow the stack manually; some architectures have a limit on how
241 	 * far ahead a user-space access may be in order to grow the stack.
242 	 */
243 	vma = find_extend_vma(current->mm, bprm->p);
244 	if (!vma)
245 		return -EFAULT;
246 
247 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 	if (__put_user(argc, sp++))
249 		return -EFAULT;
250 	if (interp_aout) {
251 		argv = sp + 2;
252 		envp = argv + argc + 1;
253 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 			return -EFAULT;
256 	} else {
257 		argv = sp;
258 		envp = argv + argc + 1;
259 	}
260 
261 	/* Populate argv and envp */
262 	p = current->mm->arg_end = current->mm->arg_start;
263 	while (argc-- > 0) {
264 		size_t len;
265 		if (__put_user((elf_addr_t)p, argv++))
266 			return -EFAULT;
267 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 		if (!len || len > MAX_ARG_STRLEN)
269 			return 0;
270 		p += len;
271 	}
272 	if (__put_user(0, argv))
273 		return -EFAULT;
274 	current->mm->arg_end = current->mm->env_start = p;
275 	while (envc-- > 0) {
276 		size_t len;
277 		if (__put_user((elf_addr_t)p, envp++))
278 			return -EFAULT;
279 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 		if (!len || len > MAX_ARG_STRLEN)
281 			return 0;
282 		p += len;
283 	}
284 	if (__put_user(0, envp))
285 		return -EFAULT;
286 	current->mm->env_end = p;
287 
288 	/* Put the elf_info on the stack in the right place.  */
289 	sp = (elf_addr_t __user *)envp + 1;
290 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 		return -EFAULT;
292 	return 0;
293 }
294 
295 #ifndef elf_map
296 
297 static unsigned long elf_map(struct file *filep, unsigned long addr,
298 		struct elf_phdr *eppnt, int prot, int type)
299 {
300 	unsigned long map_addr;
301 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
302 
303 	down_write(&current->mm->mmap_sem);
304 	/* mmap() will return -EINVAL if given a zero size, but a
305 	 * segment with zero filesize is perfectly valid */
306 	if (eppnt->p_filesz + pageoffset)
307 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
308 				   eppnt->p_filesz + pageoffset, prot, type,
309 				   eppnt->p_offset - pageoffset);
310 	else
311 		map_addr = ELF_PAGESTART(addr);
312 	up_write(&current->mm->mmap_sem);
313 	return(map_addr);
314 }
315 
316 #endif /* !elf_map */
317 
318 /* This is much more generalized than the library routine read function,
319    so we keep this separate.  Technically the library read function
320    is only provided so that we can read a.out libraries that have
321    an ELF header */
322 
323 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
324 		struct file *interpreter, unsigned long *interp_load_addr)
325 {
326 	struct elf_phdr *elf_phdata;
327 	struct elf_phdr *eppnt;
328 	unsigned long load_addr = 0;
329 	int load_addr_set = 0;
330 	unsigned long last_bss = 0, elf_bss = 0;
331 	unsigned long error = ~0UL;
332 	int retval, i, size;
333 
334 	/* First of all, some simple consistency checks */
335 	if (interp_elf_ex->e_type != ET_EXEC &&
336 	    interp_elf_ex->e_type != ET_DYN)
337 		goto out;
338 	if (!elf_check_arch(interp_elf_ex))
339 		goto out;
340 	if (!interpreter->f_op || !interpreter->f_op->mmap)
341 		goto out;
342 
343 	/*
344 	 * If the size of this structure has changed, then punt, since
345 	 * we will be doing the wrong thing.
346 	 */
347 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
348 		goto out;
349 	if (interp_elf_ex->e_phnum < 1 ||
350 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
351 		goto out;
352 
353 	/* Now read in all of the header information */
354 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
355 	if (size > ELF_MIN_ALIGN)
356 		goto out;
357 	elf_phdata = kmalloc(size, GFP_KERNEL);
358 	if (!elf_phdata)
359 		goto out;
360 
361 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
362 			     (char *)elf_phdata,size);
363 	error = -EIO;
364 	if (retval != size) {
365 		if (retval < 0)
366 			error = retval;
367 		goto out_close;
368 	}
369 
370 	eppnt = elf_phdata;
371 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
372 		if (eppnt->p_type == PT_LOAD) {
373 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
374 			int elf_prot = 0;
375 			unsigned long vaddr = 0;
376 			unsigned long k, map_addr;
377 
378 			if (eppnt->p_flags & PF_R)
379 		    		elf_prot = PROT_READ;
380 			if (eppnt->p_flags & PF_W)
381 				elf_prot |= PROT_WRITE;
382 			if (eppnt->p_flags & PF_X)
383 				elf_prot |= PROT_EXEC;
384 			vaddr = eppnt->p_vaddr;
385 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
386 				elf_type |= MAP_FIXED;
387 
388 			map_addr = elf_map(interpreter, load_addr + vaddr,
389 					   eppnt, elf_prot, elf_type);
390 			error = map_addr;
391 			if (BAD_ADDR(map_addr))
392 				goto out_close;
393 
394 			if (!load_addr_set &&
395 			    interp_elf_ex->e_type == ET_DYN) {
396 				load_addr = map_addr - ELF_PAGESTART(vaddr);
397 				load_addr_set = 1;
398 			}
399 
400 			/*
401 			 * Check to see if the section's size will overflow the
402 			 * allowed task size. Note that p_filesz must always be
403 			 * <= p_memsize so it's only necessary to check p_memsz.
404 			 */
405 			k = load_addr + eppnt->p_vaddr;
406 			if (BAD_ADDR(k) ||
407 			    eppnt->p_filesz > eppnt->p_memsz ||
408 			    eppnt->p_memsz > TASK_SIZE ||
409 			    TASK_SIZE - eppnt->p_memsz < k) {
410 				error = -ENOMEM;
411 				goto out_close;
412 			}
413 
414 			/*
415 			 * Find the end of the file mapping for this phdr, and
416 			 * keep track of the largest address we see for this.
417 			 */
418 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
419 			if (k > elf_bss)
420 				elf_bss = k;
421 
422 			/*
423 			 * Do the same thing for the memory mapping - between
424 			 * elf_bss and last_bss is the bss section.
425 			 */
426 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
427 			if (k > last_bss)
428 				last_bss = k;
429 		}
430 	}
431 
432 	/*
433 	 * Now fill out the bss section.  First pad the last page up
434 	 * to the page boundary, and then perform a mmap to make sure
435 	 * that there are zero-mapped pages up to and including the
436 	 * last bss page.
437 	 */
438 	if (padzero(elf_bss)) {
439 		error = -EFAULT;
440 		goto out_close;
441 	}
442 
443 	/* What we have mapped so far */
444 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
445 
446 	/* Map the last of the bss segment */
447 	if (last_bss > elf_bss) {
448 		down_write(&current->mm->mmap_sem);
449 		error = do_brk(elf_bss, last_bss - elf_bss);
450 		up_write(&current->mm->mmap_sem);
451 		if (BAD_ADDR(error))
452 			goto out_close;
453 	}
454 
455 	*interp_load_addr = load_addr;
456 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
457 
458 out_close:
459 	kfree(elf_phdata);
460 out:
461 	return error;
462 }
463 
464 static unsigned long load_aout_interp(struct exec *interp_ex,
465 		struct file *interpreter)
466 {
467 	unsigned long text_data, elf_entry = ~0UL;
468 	char __user * addr;
469 	loff_t offset;
470 
471 	current->mm->end_code = interp_ex->a_text;
472 	text_data = interp_ex->a_text + interp_ex->a_data;
473 	current->mm->end_data = text_data;
474 	current->mm->brk = interp_ex->a_bss + text_data;
475 
476 	switch (N_MAGIC(*interp_ex)) {
477 	case OMAGIC:
478 		offset = 32;
479 		addr = (char __user *)0;
480 		break;
481 	case ZMAGIC:
482 	case QMAGIC:
483 		offset = N_TXTOFF(*interp_ex);
484 		addr = (char __user *)N_TXTADDR(*interp_ex);
485 		break;
486 	default:
487 		goto out;
488 	}
489 
490 	down_write(&current->mm->mmap_sem);
491 	do_brk(0, text_data);
492 	up_write(&current->mm->mmap_sem);
493 	if (!interpreter->f_op || !interpreter->f_op->read)
494 		goto out;
495 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
496 		goto out;
497 	flush_icache_range((unsigned long)addr,
498 	                   (unsigned long)addr + text_data);
499 
500 	down_write(&current->mm->mmap_sem);
501 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
502 		interp_ex->a_bss);
503 	up_write(&current->mm->mmap_sem);
504 	elf_entry = interp_ex->a_entry;
505 
506 out:
507 	return elf_entry;
508 }
509 
510 /*
511  * These are the functions used to load ELF style executables and shared
512  * libraries.  There is no binary dependent code anywhere else.
513  */
514 
515 #define INTERPRETER_NONE 0
516 #define INTERPRETER_AOUT 1
517 #define INTERPRETER_ELF 2
518 
519 #ifndef STACK_RND_MASK
520 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
521 #endif
522 
523 static unsigned long randomize_stack_top(unsigned long stack_top)
524 {
525 	unsigned int random_variable = 0;
526 
527 	if ((current->flags & PF_RANDOMIZE) &&
528 		!(current->personality & ADDR_NO_RANDOMIZE)) {
529 		random_variable = get_random_int() & STACK_RND_MASK;
530 		random_variable <<= PAGE_SHIFT;
531 	}
532 #ifdef CONFIG_STACK_GROWSUP
533 	return PAGE_ALIGN(stack_top) + random_variable;
534 #else
535 	return PAGE_ALIGN(stack_top) - random_variable;
536 #endif
537 }
538 
539 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
540 {
541 	struct file *interpreter = NULL; /* to shut gcc up */
542  	unsigned long load_addr = 0, load_bias = 0;
543 	int load_addr_set = 0;
544 	char * elf_interpreter = NULL;
545 	unsigned int interpreter_type = INTERPRETER_NONE;
546 	unsigned char ibcs2_interpreter = 0;
547 	unsigned long error;
548 	struct elf_phdr *elf_ppnt, *elf_phdata;
549 	unsigned long elf_bss, elf_brk;
550 	int elf_exec_fileno;
551 	int retval, i;
552 	unsigned int size;
553 	unsigned long elf_entry, interp_load_addr = 0;
554 	unsigned long start_code, end_code, start_data, end_data;
555 	unsigned long reloc_func_desc = 0;
556 	char passed_fileno[6];
557 	struct files_struct *files;
558 	int executable_stack = EXSTACK_DEFAULT;
559 	unsigned long def_flags = 0;
560 	struct {
561 		struct elfhdr elf_ex;
562 		struct elfhdr interp_elf_ex;
563   		struct exec interp_ex;
564 	} *loc;
565 
566 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
567 	if (!loc) {
568 		retval = -ENOMEM;
569 		goto out_ret;
570 	}
571 
572 	/* Get the exec-header */
573 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
574 
575 	retval = -ENOEXEC;
576 	/* First of all, some simple consistency checks */
577 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
578 		goto out;
579 
580 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
581 		goto out;
582 	if (!elf_check_arch(&loc->elf_ex))
583 		goto out;
584 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
585 		goto out;
586 
587 	/* Now read in all of the header information */
588 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
589 		goto out;
590 	if (loc->elf_ex.e_phnum < 1 ||
591 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
592 		goto out;
593 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
594 	retval = -ENOMEM;
595 	elf_phdata = kmalloc(size, GFP_KERNEL);
596 	if (!elf_phdata)
597 		goto out;
598 
599 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
600 			     (char *)elf_phdata, size);
601 	if (retval != size) {
602 		if (retval >= 0)
603 			retval = -EIO;
604 		goto out_free_ph;
605 	}
606 
607 	files = current->files;	/* Refcounted so ok */
608 	retval = unshare_files();
609 	if (retval < 0)
610 		goto out_free_ph;
611 	if (files == current->files) {
612 		put_files_struct(files);
613 		files = NULL;
614 	}
615 
616 	/* exec will make our files private anyway, but for the a.out
617 	   loader stuff we need to do it earlier */
618 	retval = get_unused_fd();
619 	if (retval < 0)
620 		goto out_free_fh;
621 	get_file(bprm->file);
622 	fd_install(elf_exec_fileno = retval, bprm->file);
623 
624 	elf_ppnt = elf_phdata;
625 	elf_bss = 0;
626 	elf_brk = 0;
627 
628 	start_code = ~0UL;
629 	end_code = 0;
630 	start_data = 0;
631 	end_data = 0;
632 
633 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
634 		if (elf_ppnt->p_type == PT_INTERP) {
635 			/* This is the program interpreter used for
636 			 * shared libraries - for now assume that this
637 			 * is an a.out format binary
638 			 */
639 			retval = -ENOEXEC;
640 			if (elf_ppnt->p_filesz > PATH_MAX ||
641 			    elf_ppnt->p_filesz < 2)
642 				goto out_free_file;
643 
644 			retval = -ENOMEM;
645 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
646 						  GFP_KERNEL);
647 			if (!elf_interpreter)
648 				goto out_free_file;
649 
650 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
651 					     elf_interpreter,
652 					     elf_ppnt->p_filesz);
653 			if (retval != elf_ppnt->p_filesz) {
654 				if (retval >= 0)
655 					retval = -EIO;
656 				goto out_free_interp;
657 			}
658 			/* make sure path is NULL terminated */
659 			retval = -ENOEXEC;
660 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
661 				goto out_free_interp;
662 
663 			/* If the program interpreter is one of these two,
664 			 * then assume an iBCS2 image. Otherwise assume
665 			 * a native linux image.
666 			 */
667 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
668 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
669 				ibcs2_interpreter = 1;
670 
671 			/*
672 			 * The early SET_PERSONALITY here is so that the lookup
673 			 * for the interpreter happens in the namespace of the
674 			 * to-be-execed image.  SET_PERSONALITY can select an
675 			 * alternate root.
676 			 *
677 			 * However, SET_PERSONALITY is NOT allowed to switch
678 			 * this task into the new images's memory mapping
679 			 * policy - that is, TASK_SIZE must still evaluate to
680 			 * that which is appropriate to the execing application.
681 			 * This is because exit_mmap() needs to have TASK_SIZE
682 			 * evaluate to the size of the old image.
683 			 *
684 			 * So if (say) a 64-bit application is execing a 32-bit
685 			 * application it is the architecture's responsibility
686 			 * to defer changing the value of TASK_SIZE until the
687 			 * switch really is going to happen - do this in
688 			 * flush_thread().	- akpm
689 			 */
690 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
691 
692 			interpreter = open_exec(elf_interpreter);
693 			retval = PTR_ERR(interpreter);
694 			if (IS_ERR(interpreter))
695 				goto out_free_interp;
696 
697 			/*
698 			 * If the binary is not readable then enforce
699 			 * mm->dumpable = 0 regardless of the interpreter's
700 			 * permissions.
701 			 */
702 			if (file_permission(interpreter, MAY_READ) < 0)
703 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
704 
705 			retval = kernel_read(interpreter, 0, bprm->buf,
706 					     BINPRM_BUF_SIZE);
707 			if (retval != BINPRM_BUF_SIZE) {
708 				if (retval >= 0)
709 					retval = -EIO;
710 				goto out_free_dentry;
711 			}
712 
713 			/* Get the exec headers */
714 			loc->interp_ex = *((struct exec *)bprm->buf);
715 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
716 			break;
717 		}
718 		elf_ppnt++;
719 	}
720 
721 	elf_ppnt = elf_phdata;
722 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
723 		if (elf_ppnt->p_type == PT_GNU_STACK) {
724 			if (elf_ppnt->p_flags & PF_X)
725 				executable_stack = EXSTACK_ENABLE_X;
726 			else
727 				executable_stack = EXSTACK_DISABLE_X;
728 			break;
729 		}
730 
731 	/* Some simple consistency checks for the interpreter */
732 	if (elf_interpreter) {
733 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
734 
735 		/* Now figure out which format our binary is */
736 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
737 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
738 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
739 			interpreter_type = INTERPRETER_ELF;
740 
741 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
742 			interpreter_type &= ~INTERPRETER_ELF;
743 
744 		retval = -ELIBBAD;
745 		if (!interpreter_type)
746 			goto out_free_dentry;
747 
748 		/* Make sure only one type was selected */
749 		if ((interpreter_type & INTERPRETER_ELF) &&
750 		     interpreter_type != INTERPRETER_ELF) {
751 	     		// FIXME - ratelimit this before re-enabling
752 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
753 			interpreter_type = INTERPRETER_ELF;
754 		}
755 		/* Verify the interpreter has a valid arch */
756 		if ((interpreter_type == INTERPRETER_ELF) &&
757 		    !elf_check_arch(&loc->interp_elf_ex))
758 			goto out_free_dentry;
759 	} else {
760 		/* Executables without an interpreter also need a personality  */
761 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
762 	}
763 
764 	/* OK, we are done with that, now set up the arg stuff,
765 	   and then start this sucker up */
766 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
767 		char *passed_p = passed_fileno;
768 		sprintf(passed_fileno, "%d", elf_exec_fileno);
769 
770 		if (elf_interpreter) {
771 			retval = copy_strings_kernel(1, &passed_p, bprm);
772 			if (retval)
773 				goto out_free_dentry;
774 			bprm->argc++;
775 		}
776 	}
777 
778 	/* Flush all traces of the currently running executable */
779 	retval = flush_old_exec(bprm);
780 	if (retval)
781 		goto out_free_dentry;
782 
783 	/* Discard our unneeded old files struct */
784 	if (files) {
785 		put_files_struct(files);
786 		files = NULL;
787 	}
788 
789 	/* OK, This is the point of no return */
790 	current->flags &= ~PF_FORKNOEXEC;
791 	current->mm->def_flags = def_flags;
792 
793 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
794 	   may depend on the personality.  */
795 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
796 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
797 		current->personality |= READ_IMPLIES_EXEC;
798 
799 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
800 		current->flags |= PF_RANDOMIZE;
801 	arch_pick_mmap_layout(current->mm);
802 
803 	/* Do this so that we can load the interpreter, if need be.  We will
804 	   change some of these later */
805 	current->mm->free_area_cache = current->mm->mmap_base;
806 	current->mm->cached_hole_size = 0;
807 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
808 				 executable_stack);
809 	if (retval < 0) {
810 		send_sig(SIGKILL, current, 0);
811 		goto out_free_dentry;
812 	}
813 
814 	current->mm->start_stack = bprm->p;
815 
816 	/* Now we do a little grungy work by mmaping the ELF image into
817 	   the correct location in memory.  At this point, we assume that
818 	   the image should be loaded at fixed address, not at a variable
819 	   address. */
820 	for(i = 0, elf_ppnt = elf_phdata;
821 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
822 		int elf_prot = 0, elf_flags;
823 		unsigned long k, vaddr;
824 
825 		if (elf_ppnt->p_type != PT_LOAD)
826 			continue;
827 
828 		if (unlikely (elf_brk > elf_bss)) {
829 			unsigned long nbyte;
830 
831 			/* There was a PT_LOAD segment with p_memsz > p_filesz
832 			   before this one. Map anonymous pages, if needed,
833 			   and clear the area.  */
834 			retval = set_brk (elf_bss + load_bias,
835 					  elf_brk + load_bias);
836 			if (retval) {
837 				send_sig(SIGKILL, current, 0);
838 				goto out_free_dentry;
839 			}
840 			nbyte = ELF_PAGEOFFSET(elf_bss);
841 			if (nbyte) {
842 				nbyte = ELF_MIN_ALIGN - nbyte;
843 				if (nbyte > elf_brk - elf_bss)
844 					nbyte = elf_brk - elf_bss;
845 				if (clear_user((void __user *)elf_bss +
846 							load_bias, nbyte)) {
847 					/*
848 					 * This bss-zeroing can fail if the ELF
849 					 * file specifies odd protections. So
850 					 * we don't check the return value
851 					 */
852 				}
853 			}
854 		}
855 
856 		if (elf_ppnt->p_flags & PF_R)
857 			elf_prot |= PROT_READ;
858 		if (elf_ppnt->p_flags & PF_W)
859 			elf_prot |= PROT_WRITE;
860 		if (elf_ppnt->p_flags & PF_X)
861 			elf_prot |= PROT_EXEC;
862 
863 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
864 
865 		vaddr = elf_ppnt->p_vaddr;
866 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
867 			elf_flags |= MAP_FIXED;
868 		} else if (loc->elf_ex.e_type == ET_DYN) {
869 			/* Try and get dynamic programs out of the way of the
870 			 * default mmap base, as well as whatever program they
871 			 * might try to exec.  This is because the brk will
872 			 * follow the loader, and is not movable.  */
873 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
874 		}
875 
876 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
877 				elf_prot, elf_flags);
878 		if (BAD_ADDR(error)) {
879 			send_sig(SIGKILL, current, 0);
880 			retval = IS_ERR((void *)error) ?
881 				PTR_ERR((void*)error) : -EINVAL;
882 			goto out_free_dentry;
883 		}
884 
885 		if (!load_addr_set) {
886 			load_addr_set = 1;
887 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
888 			if (loc->elf_ex.e_type == ET_DYN) {
889 				load_bias += error -
890 				             ELF_PAGESTART(load_bias + vaddr);
891 				load_addr += load_bias;
892 				reloc_func_desc = load_bias;
893 			}
894 		}
895 		k = elf_ppnt->p_vaddr;
896 		if (k < start_code)
897 			start_code = k;
898 		if (start_data < k)
899 			start_data = k;
900 
901 		/*
902 		 * Check to see if the section's size will overflow the
903 		 * allowed task size. Note that p_filesz must always be
904 		 * <= p_memsz so it is only necessary to check p_memsz.
905 		 */
906 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
907 		    elf_ppnt->p_memsz > TASK_SIZE ||
908 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
909 			/* set_brk can never work. Avoid overflows. */
910 			send_sig(SIGKILL, current, 0);
911 			retval = -EINVAL;
912 			goto out_free_dentry;
913 		}
914 
915 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
916 
917 		if (k > elf_bss)
918 			elf_bss = k;
919 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
920 			end_code = k;
921 		if (end_data < k)
922 			end_data = k;
923 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
924 		if (k > elf_brk)
925 			elf_brk = k;
926 	}
927 
928 	loc->elf_ex.e_entry += load_bias;
929 	elf_bss += load_bias;
930 	elf_brk += load_bias;
931 	start_code += load_bias;
932 	end_code += load_bias;
933 	start_data += load_bias;
934 	end_data += load_bias;
935 
936 	/* Calling set_brk effectively mmaps the pages that we need
937 	 * for the bss and break sections.  We must do this before
938 	 * mapping in the interpreter, to make sure it doesn't wind
939 	 * up getting placed where the bss needs to go.
940 	 */
941 	retval = set_brk(elf_bss, elf_brk);
942 	if (retval) {
943 		send_sig(SIGKILL, current, 0);
944 		goto out_free_dentry;
945 	}
946 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
947 		send_sig(SIGSEGV, current, 0);
948 		retval = -EFAULT; /* Nobody gets to see this, but.. */
949 		goto out_free_dentry;
950 	}
951 
952 	if (elf_interpreter) {
953 		if (interpreter_type == INTERPRETER_AOUT)
954 			elf_entry = load_aout_interp(&loc->interp_ex,
955 						     interpreter);
956 		else
957 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
958 						    interpreter,
959 						    &interp_load_addr);
960 		if (BAD_ADDR(elf_entry)) {
961 			force_sig(SIGSEGV, current);
962 			retval = IS_ERR((void *)elf_entry) ?
963 					(int)elf_entry : -EINVAL;
964 			goto out_free_dentry;
965 		}
966 		reloc_func_desc = interp_load_addr;
967 
968 		allow_write_access(interpreter);
969 		fput(interpreter);
970 		kfree(elf_interpreter);
971 	} else {
972 		elf_entry = loc->elf_ex.e_entry;
973 		if (BAD_ADDR(elf_entry)) {
974 			force_sig(SIGSEGV, current);
975 			retval = -EINVAL;
976 			goto out_free_dentry;
977 		}
978 	}
979 
980 	kfree(elf_phdata);
981 
982 	if (interpreter_type != INTERPRETER_AOUT)
983 		sys_close(elf_exec_fileno);
984 
985 	set_binfmt(&elf_format);
986 
987 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
988 	retval = arch_setup_additional_pages(bprm, executable_stack);
989 	if (retval < 0) {
990 		send_sig(SIGKILL, current, 0);
991 		goto out;
992 	}
993 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
994 
995 	compute_creds(bprm);
996 	current->flags &= ~PF_FORKNOEXEC;
997 	retval = create_elf_tables(bprm, &loc->elf_ex,
998 			  (interpreter_type == INTERPRETER_AOUT),
999 			  load_addr, interp_load_addr);
1000 	if (retval < 0) {
1001 		send_sig(SIGKILL, current, 0);
1002 		goto out;
1003 	}
1004 	/* N.B. passed_fileno might not be initialized? */
1005 	if (interpreter_type == INTERPRETER_AOUT)
1006 		current->mm->arg_start += strlen(passed_fileno) + 1;
1007 	current->mm->end_code = end_code;
1008 	current->mm->start_code = start_code;
1009 	current->mm->start_data = start_data;
1010 	current->mm->end_data = end_data;
1011 	current->mm->start_stack = bprm->p;
1012 
1013 	if (current->personality & MMAP_PAGE_ZERO) {
1014 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1015 		   and some applications "depend" upon this behavior.
1016 		   Since we do not have the power to recompile these, we
1017 		   emulate the SVr4 behavior. Sigh. */
1018 		down_write(&current->mm->mmap_sem);
1019 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1020 				MAP_FIXED | MAP_PRIVATE, 0);
1021 		up_write(&current->mm->mmap_sem);
1022 	}
1023 
1024 #ifdef ELF_PLAT_INIT
1025 	/*
1026 	 * The ABI may specify that certain registers be set up in special
1027 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1028 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1029 	 * that the e_entry field is the address of the function descriptor
1030 	 * for the startup routine, rather than the address of the startup
1031 	 * routine itself.  This macro performs whatever initialization to
1032 	 * the regs structure is required as well as any relocations to the
1033 	 * function descriptor entries when executing dynamically links apps.
1034 	 */
1035 	ELF_PLAT_INIT(regs, reloc_func_desc);
1036 #endif
1037 
1038 	start_thread(regs, elf_entry, bprm->p);
1039 	if (unlikely(current->ptrace & PT_PTRACED)) {
1040 		if (current->ptrace & PT_TRACE_EXEC)
1041 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1042 		else
1043 			send_sig(SIGTRAP, current, 0);
1044 	}
1045 	retval = 0;
1046 out:
1047 	kfree(loc);
1048 out_ret:
1049 	return retval;
1050 
1051 	/* error cleanup */
1052 out_free_dentry:
1053 	allow_write_access(interpreter);
1054 	if (interpreter)
1055 		fput(interpreter);
1056 out_free_interp:
1057 	kfree(elf_interpreter);
1058 out_free_file:
1059 	sys_close(elf_exec_fileno);
1060 out_free_fh:
1061 	if (files)
1062 		reset_files_struct(current, files);
1063 out_free_ph:
1064 	kfree(elf_phdata);
1065 	goto out;
1066 }
1067 
1068 /* This is really simpleminded and specialized - we are loading an
1069    a.out library that is given an ELF header. */
1070 static int load_elf_library(struct file *file)
1071 {
1072 	struct elf_phdr *elf_phdata;
1073 	struct elf_phdr *eppnt;
1074 	unsigned long elf_bss, bss, len;
1075 	int retval, error, i, j;
1076 	struct elfhdr elf_ex;
1077 
1078 	error = -ENOEXEC;
1079 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1080 	if (retval != sizeof(elf_ex))
1081 		goto out;
1082 
1083 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1084 		goto out;
1085 
1086 	/* First of all, some simple consistency checks */
1087 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1088 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1089 		goto out;
1090 
1091 	/* Now read in all of the header information */
1092 
1093 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1094 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1095 
1096 	error = -ENOMEM;
1097 	elf_phdata = kmalloc(j, GFP_KERNEL);
1098 	if (!elf_phdata)
1099 		goto out;
1100 
1101 	eppnt = elf_phdata;
1102 	error = -ENOEXEC;
1103 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1104 	if (retval != j)
1105 		goto out_free_ph;
1106 
1107 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1108 		if ((eppnt + i)->p_type == PT_LOAD)
1109 			j++;
1110 	if (j != 1)
1111 		goto out_free_ph;
1112 
1113 	while (eppnt->p_type != PT_LOAD)
1114 		eppnt++;
1115 
1116 	/* Now use mmap to map the library into memory. */
1117 	down_write(&current->mm->mmap_sem);
1118 	error = do_mmap(file,
1119 			ELF_PAGESTART(eppnt->p_vaddr),
1120 			(eppnt->p_filesz +
1121 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1122 			PROT_READ | PROT_WRITE | PROT_EXEC,
1123 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1124 			(eppnt->p_offset -
1125 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1126 	up_write(&current->mm->mmap_sem);
1127 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1128 		goto out_free_ph;
1129 
1130 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1131 	if (padzero(elf_bss)) {
1132 		error = -EFAULT;
1133 		goto out_free_ph;
1134 	}
1135 
1136 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1137 			    ELF_MIN_ALIGN - 1);
1138 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1139 	if (bss > len) {
1140 		down_write(&current->mm->mmap_sem);
1141 		do_brk(len, bss - len);
1142 		up_write(&current->mm->mmap_sem);
1143 	}
1144 	error = 0;
1145 
1146 out_free_ph:
1147 	kfree(elf_phdata);
1148 out:
1149 	return error;
1150 }
1151 
1152 /*
1153  * Note that some platforms still use traditional core dumps and not
1154  * the ELF core dump.  Each platform can select it as appropriate.
1155  */
1156 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1157 
1158 /*
1159  * ELF core dumper
1160  *
1161  * Modelled on fs/exec.c:aout_core_dump()
1162  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1163  */
1164 /*
1165  * These are the only things you should do on a core-file: use only these
1166  * functions to write out all the necessary info.
1167  */
1168 static int dump_write(struct file *file, const void *addr, int nr)
1169 {
1170 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1171 }
1172 
1173 static int dump_seek(struct file *file, loff_t off)
1174 {
1175 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1176 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1177 			return 0;
1178 	} else {
1179 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1180 		if (!buf)
1181 			return 0;
1182 		while (off > 0) {
1183 			unsigned long n = off;
1184 			if (n > PAGE_SIZE)
1185 				n = PAGE_SIZE;
1186 			if (!dump_write(file, buf, n))
1187 				return 0;
1188 			off -= n;
1189 		}
1190 		free_page((unsigned long)buf);
1191 	}
1192 	return 1;
1193 }
1194 
1195 /*
1196  * Decide whether a segment is worth dumping; default is yes to be
1197  * sure (missing info is worse than too much; etc).
1198  * Personally I'd include everything, and use the coredump limit...
1199  *
1200  * I think we should skip something. But I am not sure how. H.J.
1201  */
1202 static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
1203 {
1204 	/* The vma can be set up to tell us the answer directly.  */
1205 	if (vma->vm_flags & VM_ALWAYSDUMP)
1206 		return 1;
1207 
1208 	/* Do not dump I/O mapped devices or special mappings */
1209 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1210 		return 0;
1211 
1212 	/* By default, dump shared memory if mapped from an anonymous file. */
1213 	if (vma->vm_flags & VM_SHARED) {
1214 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0)
1215 			return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1216 		else
1217 			return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
1218 	}
1219 
1220 	/* By default, if it hasn't been written to, don't write it out. */
1221 	if (!vma->anon_vma)
1222 		return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
1223 
1224 	return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
1225 }
1226 
1227 /* An ELF note in memory */
1228 struct memelfnote
1229 {
1230 	const char *name;
1231 	int type;
1232 	unsigned int datasz;
1233 	void *data;
1234 };
1235 
1236 static int notesize(struct memelfnote *en)
1237 {
1238 	int sz;
1239 
1240 	sz = sizeof(struct elf_note);
1241 	sz += roundup(strlen(en->name) + 1, 4);
1242 	sz += roundup(en->datasz, 4);
1243 
1244 	return sz;
1245 }
1246 
1247 #define DUMP_WRITE(addr, nr, foffset)	\
1248 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1249 
1250 static int alignfile(struct file *file, loff_t *foffset)
1251 {
1252 	static const char buf[4] = { 0, };
1253 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1254 	return 1;
1255 }
1256 
1257 static int writenote(struct memelfnote *men, struct file *file,
1258 			loff_t *foffset)
1259 {
1260 	struct elf_note en;
1261 	en.n_namesz = strlen(men->name) + 1;
1262 	en.n_descsz = men->datasz;
1263 	en.n_type = men->type;
1264 
1265 	DUMP_WRITE(&en, sizeof(en), foffset);
1266 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1267 	if (!alignfile(file, foffset))
1268 		return 0;
1269 	DUMP_WRITE(men->data, men->datasz, foffset);
1270 	if (!alignfile(file, foffset))
1271 		return 0;
1272 
1273 	return 1;
1274 }
1275 #undef DUMP_WRITE
1276 
1277 #define DUMP_WRITE(addr, nr)	\
1278 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1279 		goto end_coredump;
1280 #define DUMP_SEEK(off)	\
1281 	if (!dump_seek(file, (off))) \
1282 		goto end_coredump;
1283 
1284 static void fill_elf_header(struct elfhdr *elf, int segs)
1285 {
1286 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1287 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1288 	elf->e_ident[EI_DATA] = ELF_DATA;
1289 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1290 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1291 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1292 
1293 	elf->e_type = ET_CORE;
1294 	elf->e_machine = ELF_ARCH;
1295 	elf->e_version = EV_CURRENT;
1296 	elf->e_entry = 0;
1297 	elf->e_phoff = sizeof(struct elfhdr);
1298 	elf->e_shoff = 0;
1299 	elf->e_flags = ELF_CORE_EFLAGS;
1300 	elf->e_ehsize = sizeof(struct elfhdr);
1301 	elf->e_phentsize = sizeof(struct elf_phdr);
1302 	elf->e_phnum = segs;
1303 	elf->e_shentsize = 0;
1304 	elf->e_shnum = 0;
1305 	elf->e_shstrndx = 0;
1306 	return;
1307 }
1308 
1309 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1310 {
1311 	phdr->p_type = PT_NOTE;
1312 	phdr->p_offset = offset;
1313 	phdr->p_vaddr = 0;
1314 	phdr->p_paddr = 0;
1315 	phdr->p_filesz = sz;
1316 	phdr->p_memsz = 0;
1317 	phdr->p_flags = 0;
1318 	phdr->p_align = 0;
1319 	return;
1320 }
1321 
1322 static void fill_note(struct memelfnote *note, const char *name, int type,
1323 		unsigned int sz, void *data)
1324 {
1325 	note->name = name;
1326 	note->type = type;
1327 	note->datasz = sz;
1328 	note->data = data;
1329 	return;
1330 }
1331 
1332 /*
1333  * fill up all the fields in prstatus from the given task struct, except
1334  * registers which need to be filled up separately.
1335  */
1336 static void fill_prstatus(struct elf_prstatus *prstatus,
1337 		struct task_struct *p, long signr)
1338 {
1339 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1340 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1341 	prstatus->pr_sighold = p->blocked.sig[0];
1342 	prstatus->pr_pid = p->pid;
1343 	prstatus->pr_ppid = p->parent->pid;
1344 	prstatus->pr_pgrp = process_group(p);
1345 	prstatus->pr_sid = process_session(p);
1346 	if (thread_group_leader(p)) {
1347 		/*
1348 		 * This is the record for the group leader.  Add in the
1349 		 * cumulative times of previous dead threads.  This total
1350 		 * won't include the time of each live thread whose state
1351 		 * is included in the core dump.  The final total reported
1352 		 * to our parent process when it calls wait4 will include
1353 		 * those sums as well as the little bit more time it takes
1354 		 * this and each other thread to finish dying after the
1355 		 * core dump synchronization phase.
1356 		 */
1357 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1358 				   &prstatus->pr_utime);
1359 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1360 				   &prstatus->pr_stime);
1361 	} else {
1362 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1363 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1364 	}
1365 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1366 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1367 }
1368 
1369 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1370 		       struct mm_struct *mm)
1371 {
1372 	unsigned int i, len;
1373 
1374 	/* first copy the parameters from user space */
1375 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1376 
1377 	len = mm->arg_end - mm->arg_start;
1378 	if (len >= ELF_PRARGSZ)
1379 		len = ELF_PRARGSZ-1;
1380 	if (copy_from_user(&psinfo->pr_psargs,
1381 		           (const char __user *)mm->arg_start, len))
1382 		return -EFAULT;
1383 	for(i = 0; i < len; i++)
1384 		if (psinfo->pr_psargs[i] == 0)
1385 			psinfo->pr_psargs[i] = ' ';
1386 	psinfo->pr_psargs[len] = 0;
1387 
1388 	psinfo->pr_pid = p->pid;
1389 	psinfo->pr_ppid = p->parent->pid;
1390 	psinfo->pr_pgrp = process_group(p);
1391 	psinfo->pr_sid = process_session(p);
1392 
1393 	i = p->state ? ffz(~p->state) + 1 : 0;
1394 	psinfo->pr_state = i;
1395 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1396 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1397 	psinfo->pr_nice = task_nice(p);
1398 	psinfo->pr_flag = p->flags;
1399 	SET_UID(psinfo->pr_uid, p->uid);
1400 	SET_GID(psinfo->pr_gid, p->gid);
1401 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1402 
1403 	return 0;
1404 }
1405 
1406 /* Here is the structure in which status of each thread is captured. */
1407 struct elf_thread_status
1408 {
1409 	struct list_head list;
1410 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1411 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1412 	struct task_struct *thread;
1413 #ifdef ELF_CORE_COPY_XFPREGS
1414 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1415 #endif
1416 	struct memelfnote notes[3];
1417 	int num_notes;
1418 };
1419 
1420 /*
1421  * In order to add the specific thread information for the elf file format,
1422  * we need to keep a linked list of every threads pr_status and then create
1423  * a single section for them in the final core file.
1424  */
1425 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1426 {
1427 	int sz = 0;
1428 	struct task_struct *p = t->thread;
1429 	t->num_notes = 0;
1430 
1431 	fill_prstatus(&t->prstatus, p, signr);
1432 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1433 
1434 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1435 		  &(t->prstatus));
1436 	t->num_notes++;
1437 	sz += notesize(&t->notes[0]);
1438 
1439 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1440 								&t->fpu))) {
1441 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1442 			  &(t->fpu));
1443 		t->num_notes++;
1444 		sz += notesize(&t->notes[1]);
1445 	}
1446 
1447 #ifdef ELF_CORE_COPY_XFPREGS
1448 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1449 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1450 			  &t->xfpu);
1451 		t->num_notes++;
1452 		sz += notesize(&t->notes[2]);
1453 	}
1454 #endif
1455 	return sz;
1456 }
1457 
1458 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1459 					struct vm_area_struct *gate_vma)
1460 {
1461 	struct vm_area_struct *ret = tsk->mm->mmap;
1462 
1463 	if (ret)
1464 		return ret;
1465 	return gate_vma;
1466 }
1467 /*
1468  * Helper function for iterating across a vma list.  It ensures that the caller
1469  * will visit `gate_vma' prior to terminating the search.
1470  */
1471 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1472 					struct vm_area_struct *gate_vma)
1473 {
1474 	struct vm_area_struct *ret;
1475 
1476 	ret = this_vma->vm_next;
1477 	if (ret)
1478 		return ret;
1479 	if (this_vma == gate_vma)
1480 		return NULL;
1481 	return gate_vma;
1482 }
1483 
1484 /*
1485  * Actual dumper
1486  *
1487  * This is a two-pass process; first we find the offsets of the bits,
1488  * and then they are actually written out.  If we run out of core limit
1489  * we just truncate.
1490  */
1491 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1492 {
1493 #define	NUM_NOTES	6
1494 	int has_dumped = 0;
1495 	mm_segment_t fs;
1496 	int segs;
1497 	size_t size = 0;
1498 	int i;
1499 	struct vm_area_struct *vma, *gate_vma;
1500 	struct elfhdr *elf = NULL;
1501 	loff_t offset = 0, dataoff, foffset;
1502 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1503 	int numnote;
1504 	struct memelfnote *notes = NULL;
1505 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1506 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1507  	struct task_struct *g, *p;
1508  	LIST_HEAD(thread_list);
1509  	struct list_head *t;
1510 	elf_fpregset_t *fpu = NULL;
1511 #ifdef ELF_CORE_COPY_XFPREGS
1512 	elf_fpxregset_t *xfpu = NULL;
1513 #endif
1514 	int thread_status_size = 0;
1515 	elf_addr_t *auxv;
1516 	unsigned long mm_flags;
1517 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1518 	int extra_notes_size;
1519 #endif
1520 
1521 	/*
1522 	 * We no longer stop all VM operations.
1523 	 *
1524 	 * This is because those proceses that could possibly change map_count
1525 	 * or the mmap / vma pages are now blocked in do_exit on current
1526 	 * finishing this core dump.
1527 	 *
1528 	 * Only ptrace can touch these memory addresses, but it doesn't change
1529 	 * the map_count or the pages allocated. So no possibility of crashing
1530 	 * exists while dumping the mm->vm_next areas to the core file.
1531 	 */
1532 
1533 	/* alloc memory for large data structures: too large to be on stack */
1534 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1535 	if (!elf)
1536 		goto cleanup;
1537 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1538 	if (!prstatus)
1539 		goto cleanup;
1540 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1541 	if (!psinfo)
1542 		goto cleanup;
1543 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1544 	if (!notes)
1545 		goto cleanup;
1546 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1547 	if (!fpu)
1548 		goto cleanup;
1549 #ifdef ELF_CORE_COPY_XFPREGS
1550 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1551 	if (!xfpu)
1552 		goto cleanup;
1553 #endif
1554 
1555 	if (signr) {
1556 		struct elf_thread_status *tmp;
1557 		rcu_read_lock();
1558 		do_each_thread(g,p)
1559 			if (current->mm == p->mm && current != p) {
1560 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1561 				if (!tmp) {
1562 					rcu_read_unlock();
1563 					goto cleanup;
1564 				}
1565 				tmp->thread = p;
1566 				list_add(&tmp->list, &thread_list);
1567 			}
1568 		while_each_thread(g,p);
1569 		rcu_read_unlock();
1570 		list_for_each(t, &thread_list) {
1571 			struct elf_thread_status *tmp;
1572 			int sz;
1573 
1574 			tmp = list_entry(t, struct elf_thread_status, list);
1575 			sz = elf_dump_thread_status(signr, tmp);
1576 			thread_status_size += sz;
1577 		}
1578 	}
1579 	/* now collect the dump for the current */
1580 	memset(prstatus, 0, sizeof(*prstatus));
1581 	fill_prstatus(prstatus, current, signr);
1582 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1583 
1584 	segs = current->mm->map_count;
1585 #ifdef ELF_CORE_EXTRA_PHDRS
1586 	segs += ELF_CORE_EXTRA_PHDRS;
1587 #endif
1588 
1589 	gate_vma = get_gate_vma(current);
1590 	if (gate_vma != NULL)
1591 		segs++;
1592 
1593 	/* Set up header */
1594 	fill_elf_header(elf, segs + 1);	/* including notes section */
1595 
1596 	has_dumped = 1;
1597 	current->flags |= PF_DUMPCORE;
1598 
1599 	/*
1600 	 * Set up the notes in similar form to SVR4 core dumps made
1601 	 * with info from their /proc.
1602 	 */
1603 
1604 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1605 	fill_psinfo(psinfo, current->group_leader, current->mm);
1606 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1607 
1608 	numnote = 2;
1609 
1610 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1611 
1612 	i = 0;
1613 	do
1614 		i += 2;
1615 	while (auxv[i - 2] != AT_NULL);
1616 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1617 		  i * sizeof(elf_addr_t), auxv);
1618 
1619   	/* Try to dump the FPU. */
1620 	if ((prstatus->pr_fpvalid =
1621 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1622 		fill_note(notes + numnote++,
1623 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1624 #ifdef ELF_CORE_COPY_XFPREGS
1625 	if (elf_core_copy_task_xfpregs(current, xfpu))
1626 		fill_note(notes + numnote++,
1627 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1628 #endif
1629 
1630 	fs = get_fs();
1631 	set_fs(KERNEL_DS);
1632 
1633 	DUMP_WRITE(elf, sizeof(*elf));
1634 	offset += sizeof(*elf);				/* Elf header */
1635 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1636 	foffset = offset;
1637 
1638 	/* Write notes phdr entry */
1639 	{
1640 		struct elf_phdr phdr;
1641 		int sz = 0;
1642 
1643 		for (i = 0; i < numnote; i++)
1644 			sz += notesize(notes + i);
1645 
1646 		sz += thread_status_size;
1647 
1648 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1649 		extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1650 		sz += extra_notes_size;
1651 #endif
1652 
1653 		fill_elf_note_phdr(&phdr, sz, offset);
1654 		offset += sz;
1655 		DUMP_WRITE(&phdr, sizeof(phdr));
1656 	}
1657 
1658 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1659 
1660 	/*
1661 	 * We must use the same mm->flags while dumping core to avoid
1662 	 * inconsistency between the program headers and bodies, otherwise an
1663 	 * unusable core file can be generated.
1664 	 */
1665 	mm_flags = current->mm->flags;
1666 
1667 	/* Write program headers for segments dump */
1668 	for (vma = first_vma(current, gate_vma); vma != NULL;
1669 			vma = next_vma(vma, gate_vma)) {
1670 		struct elf_phdr phdr;
1671 		size_t sz;
1672 
1673 		sz = vma->vm_end - vma->vm_start;
1674 
1675 		phdr.p_type = PT_LOAD;
1676 		phdr.p_offset = offset;
1677 		phdr.p_vaddr = vma->vm_start;
1678 		phdr.p_paddr = 0;
1679 		phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
1680 		phdr.p_memsz = sz;
1681 		offset += phdr.p_filesz;
1682 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1683 		if (vma->vm_flags & VM_WRITE)
1684 			phdr.p_flags |= PF_W;
1685 		if (vma->vm_flags & VM_EXEC)
1686 			phdr.p_flags |= PF_X;
1687 		phdr.p_align = ELF_EXEC_PAGESIZE;
1688 
1689 		DUMP_WRITE(&phdr, sizeof(phdr));
1690 	}
1691 
1692 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1693 	ELF_CORE_WRITE_EXTRA_PHDRS;
1694 #endif
1695 
1696  	/* write out the notes section */
1697 	for (i = 0; i < numnote; i++)
1698 		if (!writenote(notes + i, file, &foffset))
1699 			goto end_coredump;
1700 
1701 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1702 	ELF_CORE_WRITE_EXTRA_NOTES;
1703 	foffset += extra_notes_size;
1704 #endif
1705 
1706 	/* write out the thread status notes section */
1707 	list_for_each(t, &thread_list) {
1708 		struct elf_thread_status *tmp =
1709 				list_entry(t, struct elf_thread_status, list);
1710 
1711 		for (i = 0; i < tmp->num_notes; i++)
1712 			if (!writenote(&tmp->notes[i], file, &foffset))
1713 				goto end_coredump;
1714 	}
1715 
1716 	/* Align to page */
1717 	DUMP_SEEK(dataoff - foffset);
1718 
1719 	for (vma = first_vma(current, gate_vma); vma != NULL;
1720 			vma = next_vma(vma, gate_vma)) {
1721 		unsigned long addr;
1722 
1723 		if (!maydump(vma, mm_flags))
1724 			continue;
1725 
1726 		for (addr = vma->vm_start;
1727 		     addr < vma->vm_end;
1728 		     addr += PAGE_SIZE) {
1729 			struct page *page;
1730 			struct vm_area_struct *vma;
1731 
1732 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1733 						&page, &vma) <= 0) {
1734 				DUMP_SEEK(PAGE_SIZE);
1735 			} else {
1736 				if (page == ZERO_PAGE(addr)) {
1737 					if (!dump_seek(file, PAGE_SIZE)) {
1738 						page_cache_release(page);
1739 						goto end_coredump;
1740 					}
1741 				} else {
1742 					void *kaddr;
1743 					flush_cache_page(vma, addr,
1744 							 page_to_pfn(page));
1745 					kaddr = kmap(page);
1746 					if ((size += PAGE_SIZE) > limit ||
1747 					    !dump_write(file, kaddr,
1748 					    PAGE_SIZE)) {
1749 						kunmap(page);
1750 						page_cache_release(page);
1751 						goto end_coredump;
1752 					}
1753 					kunmap(page);
1754 				}
1755 				page_cache_release(page);
1756 			}
1757 		}
1758 	}
1759 
1760 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1761 	ELF_CORE_WRITE_EXTRA_DATA;
1762 #endif
1763 
1764 end_coredump:
1765 	set_fs(fs);
1766 
1767 cleanup:
1768 	while (!list_empty(&thread_list)) {
1769 		struct list_head *tmp = thread_list.next;
1770 		list_del(tmp);
1771 		kfree(list_entry(tmp, struct elf_thread_status, list));
1772 	}
1773 
1774 	kfree(elf);
1775 	kfree(prstatus);
1776 	kfree(psinfo);
1777 	kfree(notes);
1778 	kfree(fpu);
1779 #ifdef ELF_CORE_COPY_XFPREGS
1780 	kfree(xfpu);
1781 #endif
1782 	return has_dumped;
1783 #undef NUM_NOTES
1784 }
1785 
1786 #endif		/* USE_ELF_CORE_DUMP */
1787 
1788 static int __init init_elf_binfmt(void)
1789 {
1790 	return register_binfmt(&elf_format);
1791 }
1792 
1793 static void __exit exit_elf_binfmt(void)
1794 {
1795 	/* Remove the COFF and ELF loaders. */
1796 	unregister_binfmt(&elf_format);
1797 }
1798 
1799 core_initcall(init_elf_binfmt);
1800 module_exit(exit_elf_binfmt);
1801 MODULE_LICENSE("GPL");
1802