xref: /openbmc/linux/fs/binfmt_elf.c (revision 64c70b1c)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 		int interp_aout, unsigned long load_addr,
137 		unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	const char *k_platform = ELF_PLATFORM;
147 	int items;
148 	elf_addr_t *elf_info;
149 	int ei_index = 0;
150 	struct task_struct *tsk = current;
151 
152 	/*
153 	 * If this architecture has a platform capability string, copy it
154 	 * to userspace.  In some cases (Sparc), this info is impossible
155 	 * for userspace to get any other way, in others (i386) it is
156 	 * merely difficult.
157 	 */
158 	u_platform = NULL;
159 	if (k_platform) {
160 		size_t len = strlen(k_platform) + 1;
161 
162 		/*
163 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164 		 * evictions by the processes running on the same package. One
165 		 * thing we can do is to shuffle the initial stack for them.
166 		 */
167 
168 		p = arch_align_stack(p);
169 
170 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 		if (__copy_to_user(u_platform, k_platform, len))
172 			return -EFAULT;
173 	}
174 
175 	/* Create the ELF interpreter info */
176 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 #define NEW_AUX_ENT(id, val) \
178 	do { \
179 		elf_info[ei_index++] = id; \
180 		elf_info[ei_index++] = val; \
181 	} while (0)
182 
183 #ifdef ARCH_DLINFO
184 	/*
185 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
186 	 * AUXV.
187 	 */
188 	ARCH_DLINFO;
189 #endif
190 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
194 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
195 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
197 	NEW_AUX_ENT(AT_FLAGS, 0);
198 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
199 	NEW_AUX_ENT(AT_UID, tsk->uid);
200 	NEW_AUX_ENT(AT_EUID, tsk->euid);
201 	NEW_AUX_ENT(AT_GID, tsk->gid);
202 	NEW_AUX_ENT(AT_EGID, tsk->egid);
203  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
204 	if (k_platform) {
205 		NEW_AUX_ENT(AT_PLATFORM,
206 			    (elf_addr_t)(unsigned long)u_platform);
207 	}
208 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
209 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
210 	}
211 #undef NEW_AUX_ENT
212 	/* AT_NULL is zero; clear the rest too */
213 	memset(&elf_info[ei_index], 0,
214 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
215 
216 	/* And advance past the AT_NULL entry.  */
217 	ei_index += 2;
218 
219 	sp = STACK_ADD(p, ei_index);
220 
221 	items = (argc + 1) + (envc + 1);
222 	if (interp_aout) {
223 		items += 3; /* a.out interpreters require argv & envp too */
224 	} else {
225 		items += 1; /* ELF interpreters only put argc on the stack */
226 	}
227 	bprm->p = STACK_ROUND(sp, items);
228 
229 	/* Point sp at the lowest address on the stack */
230 #ifdef CONFIG_STACK_GROWSUP
231 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
232 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
233 #else
234 	sp = (elf_addr_t __user *)bprm->p;
235 #endif
236 
237 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 	if (__put_user(argc, sp++))
239 		return -EFAULT;
240 	if (interp_aout) {
241 		argv = sp + 2;
242 		envp = argv + argc + 1;
243 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
245 			return -EFAULT;
246 	} else {
247 		argv = sp;
248 		envp = argv + argc + 1;
249 	}
250 
251 	/* Populate argv and envp */
252 	p = current->mm->arg_end = current->mm->arg_start;
253 	while (argc-- > 0) {
254 		size_t len;
255 		if (__put_user((elf_addr_t)p, argv++))
256 			return -EFAULT;
257 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259 			return 0;
260 		p += len;
261 	}
262 	if (__put_user(0, argv))
263 		return -EFAULT;
264 	current->mm->arg_end = current->mm->env_start = p;
265 	while (envc-- > 0) {
266 		size_t len;
267 		if (__put_user((elf_addr_t)p, envp++))
268 			return -EFAULT;
269 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 			return 0;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type)
289 {
290 	unsigned long map_addr;
291 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
292 
293 	down_write(&current->mm->mmap_sem);
294 	/* mmap() will return -EINVAL if given a zero size, but a
295 	 * segment with zero filesize is perfectly valid */
296 	if (eppnt->p_filesz + pageoffset)
297 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 				   eppnt->p_filesz + pageoffset, prot, type,
299 				   eppnt->p_offset - pageoffset);
300 	else
301 		map_addr = ELF_PAGESTART(addr);
302 	up_write(&current->mm->mmap_sem);
303 	return(map_addr);
304 }
305 
306 #endif /* !elf_map */
307 
308 /* This is much more generalized than the library routine read function,
309    so we keep this separate.  Technically the library read function
310    is only provided so that we can read a.out libraries that have
311    an ELF header */
312 
313 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314 		struct file *interpreter, unsigned long *interp_load_addr)
315 {
316 	struct elf_phdr *elf_phdata;
317 	struct elf_phdr *eppnt;
318 	unsigned long load_addr = 0;
319 	int load_addr_set = 0;
320 	unsigned long last_bss = 0, elf_bss = 0;
321 	unsigned long error = ~0UL;
322 	int retval, i, size;
323 
324 	/* First of all, some simple consistency checks */
325 	if (interp_elf_ex->e_type != ET_EXEC &&
326 	    interp_elf_ex->e_type != ET_DYN)
327 		goto out;
328 	if (!elf_check_arch(interp_elf_ex))
329 		goto out;
330 	if (!interpreter->f_op || !interpreter->f_op->mmap)
331 		goto out;
332 
333 	/*
334 	 * If the size of this structure has changed, then punt, since
335 	 * we will be doing the wrong thing.
336 	 */
337 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
338 		goto out;
339 	if (interp_elf_ex->e_phnum < 1 ||
340 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
341 		goto out;
342 
343 	/* Now read in all of the header information */
344 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
345 	if (size > ELF_MIN_ALIGN)
346 		goto out;
347 	elf_phdata = kmalloc(size, GFP_KERNEL);
348 	if (!elf_phdata)
349 		goto out;
350 
351 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
352 			     (char *)elf_phdata,size);
353 	error = -EIO;
354 	if (retval != size) {
355 		if (retval < 0)
356 			error = retval;
357 		goto out_close;
358 	}
359 
360 	eppnt = elf_phdata;
361 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
362 		if (eppnt->p_type == PT_LOAD) {
363 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
364 			int elf_prot = 0;
365 			unsigned long vaddr = 0;
366 			unsigned long k, map_addr;
367 
368 			if (eppnt->p_flags & PF_R)
369 		    		elf_prot = PROT_READ;
370 			if (eppnt->p_flags & PF_W)
371 				elf_prot |= PROT_WRITE;
372 			if (eppnt->p_flags & PF_X)
373 				elf_prot |= PROT_EXEC;
374 			vaddr = eppnt->p_vaddr;
375 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
376 				elf_type |= MAP_FIXED;
377 
378 			map_addr = elf_map(interpreter, load_addr + vaddr,
379 					   eppnt, elf_prot, elf_type);
380 			error = map_addr;
381 			if (BAD_ADDR(map_addr))
382 				goto out_close;
383 
384 			if (!load_addr_set &&
385 			    interp_elf_ex->e_type == ET_DYN) {
386 				load_addr = map_addr - ELF_PAGESTART(vaddr);
387 				load_addr_set = 1;
388 			}
389 
390 			/*
391 			 * Check to see if the section's size will overflow the
392 			 * allowed task size. Note that p_filesz must always be
393 			 * <= p_memsize so it's only necessary to check p_memsz.
394 			 */
395 			k = load_addr + eppnt->p_vaddr;
396 			if (BAD_ADDR(k) ||
397 			    eppnt->p_filesz > eppnt->p_memsz ||
398 			    eppnt->p_memsz > TASK_SIZE ||
399 			    TASK_SIZE - eppnt->p_memsz < k) {
400 				error = -ENOMEM;
401 				goto out_close;
402 			}
403 
404 			/*
405 			 * Find the end of the file mapping for this phdr, and
406 			 * keep track of the largest address we see for this.
407 			 */
408 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
409 			if (k > elf_bss)
410 				elf_bss = k;
411 
412 			/*
413 			 * Do the same thing for the memory mapping - between
414 			 * elf_bss and last_bss is the bss section.
415 			 */
416 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
417 			if (k > last_bss)
418 				last_bss = k;
419 		}
420 	}
421 
422 	/*
423 	 * Now fill out the bss section.  First pad the last page up
424 	 * to the page boundary, and then perform a mmap to make sure
425 	 * that there are zero-mapped pages up to and including the
426 	 * last bss page.
427 	 */
428 	if (padzero(elf_bss)) {
429 		error = -EFAULT;
430 		goto out_close;
431 	}
432 
433 	/* What we have mapped so far */
434 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
435 
436 	/* Map the last of the bss segment */
437 	if (last_bss > elf_bss) {
438 		down_write(&current->mm->mmap_sem);
439 		error = do_brk(elf_bss, last_bss - elf_bss);
440 		up_write(&current->mm->mmap_sem);
441 		if (BAD_ADDR(error))
442 			goto out_close;
443 	}
444 
445 	*interp_load_addr = load_addr;
446 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
447 
448 out_close:
449 	kfree(elf_phdata);
450 out:
451 	return error;
452 }
453 
454 static unsigned long load_aout_interp(struct exec *interp_ex,
455 		struct file *interpreter)
456 {
457 	unsigned long text_data, elf_entry = ~0UL;
458 	char __user * addr;
459 	loff_t offset;
460 
461 	current->mm->end_code = interp_ex->a_text;
462 	text_data = interp_ex->a_text + interp_ex->a_data;
463 	current->mm->end_data = text_data;
464 	current->mm->brk = interp_ex->a_bss + text_data;
465 
466 	switch (N_MAGIC(*interp_ex)) {
467 	case OMAGIC:
468 		offset = 32;
469 		addr = (char __user *)0;
470 		break;
471 	case ZMAGIC:
472 	case QMAGIC:
473 		offset = N_TXTOFF(*interp_ex);
474 		addr = (char __user *)N_TXTADDR(*interp_ex);
475 		break;
476 	default:
477 		goto out;
478 	}
479 
480 	down_write(&current->mm->mmap_sem);
481 	do_brk(0, text_data);
482 	up_write(&current->mm->mmap_sem);
483 	if (!interpreter->f_op || !interpreter->f_op->read)
484 		goto out;
485 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
486 		goto out;
487 	flush_icache_range((unsigned long)addr,
488 	                   (unsigned long)addr + text_data);
489 
490 	down_write(&current->mm->mmap_sem);
491 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
492 		interp_ex->a_bss);
493 	up_write(&current->mm->mmap_sem);
494 	elf_entry = interp_ex->a_entry;
495 
496 out:
497 	return elf_entry;
498 }
499 
500 /*
501  * These are the functions used to load ELF style executables and shared
502  * libraries.  There is no binary dependent code anywhere else.
503  */
504 
505 #define INTERPRETER_NONE 0
506 #define INTERPRETER_AOUT 1
507 #define INTERPRETER_ELF 2
508 
509 #ifndef STACK_RND_MASK
510 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
511 #endif
512 
513 static unsigned long randomize_stack_top(unsigned long stack_top)
514 {
515 	unsigned int random_variable = 0;
516 
517 	if ((current->flags & PF_RANDOMIZE) &&
518 		!(current->personality & ADDR_NO_RANDOMIZE)) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 
687 			/*
688 			 * If the binary is not readable then enforce
689 			 * mm->dumpable = 0 regardless of the interpreter's
690 			 * permissions.
691 			 */
692 			if (file_permission(interpreter, MAY_READ) < 0)
693 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
694 
695 			retval = kernel_read(interpreter, 0, bprm->buf,
696 					     BINPRM_BUF_SIZE);
697 			if (retval != BINPRM_BUF_SIZE) {
698 				if (retval >= 0)
699 					retval = -EIO;
700 				goto out_free_dentry;
701 			}
702 
703 			/* Get the exec headers */
704 			loc->interp_ex = *((struct exec *)bprm->buf);
705 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
706 			break;
707 		}
708 		elf_ppnt++;
709 	}
710 
711 	elf_ppnt = elf_phdata;
712 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
713 		if (elf_ppnt->p_type == PT_GNU_STACK) {
714 			if (elf_ppnt->p_flags & PF_X)
715 				executable_stack = EXSTACK_ENABLE_X;
716 			else
717 				executable_stack = EXSTACK_DISABLE_X;
718 			break;
719 		}
720 
721 	/* Some simple consistency checks for the interpreter */
722 	if (elf_interpreter) {
723 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
724 
725 		/* Now figure out which format our binary is */
726 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
727 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
728 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
729 			interpreter_type = INTERPRETER_ELF;
730 
731 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
732 			interpreter_type &= ~INTERPRETER_ELF;
733 
734 		retval = -ELIBBAD;
735 		if (!interpreter_type)
736 			goto out_free_dentry;
737 
738 		/* Make sure only one type was selected */
739 		if ((interpreter_type & INTERPRETER_ELF) &&
740 		     interpreter_type != INTERPRETER_ELF) {
741 	     		// FIXME - ratelimit this before re-enabling
742 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
743 			interpreter_type = INTERPRETER_ELF;
744 		}
745 		/* Verify the interpreter has a valid arch */
746 		if ((interpreter_type == INTERPRETER_ELF) &&
747 		    !elf_check_arch(&loc->interp_elf_ex))
748 			goto out_free_dentry;
749 	} else {
750 		/* Executables without an interpreter also need a personality  */
751 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
752 	}
753 
754 	/* OK, we are done with that, now set up the arg stuff,
755 	   and then start this sucker up */
756 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
757 		char *passed_p = passed_fileno;
758 		sprintf(passed_fileno, "%d", elf_exec_fileno);
759 
760 		if (elf_interpreter) {
761 			retval = copy_strings_kernel(1, &passed_p, bprm);
762 			if (retval)
763 				goto out_free_dentry;
764 			bprm->argc++;
765 		}
766 	}
767 
768 	/* Flush all traces of the currently running executable */
769 	retval = flush_old_exec(bprm);
770 	if (retval)
771 		goto out_free_dentry;
772 
773 	/* Discard our unneeded old files struct */
774 	if (files) {
775 		put_files_struct(files);
776 		files = NULL;
777 	}
778 
779 	/* OK, This is the point of no return */
780 	current->mm->start_data = 0;
781 	current->mm->end_data = 0;
782 	current->mm->end_code = 0;
783 	current->mm->mmap = NULL;
784 	current->flags &= ~PF_FORKNOEXEC;
785 	current->mm->def_flags = def_flags;
786 
787 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
788 	   may depend on the personality.  */
789 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
790 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
791 		current->personality |= READ_IMPLIES_EXEC;
792 
793 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
794 		current->flags |= PF_RANDOMIZE;
795 	arch_pick_mmap_layout(current->mm);
796 
797 	/* Do this so that we can load the interpreter, if need be.  We will
798 	   change some of these later */
799 	current->mm->free_area_cache = current->mm->mmap_base;
800 	current->mm->cached_hole_size = 0;
801 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
802 				 executable_stack);
803 	if (retval < 0) {
804 		send_sig(SIGKILL, current, 0);
805 		goto out_free_dentry;
806 	}
807 
808 	current->mm->start_stack = bprm->p;
809 
810 	/* Now we do a little grungy work by mmaping the ELF image into
811 	   the correct location in memory.  At this point, we assume that
812 	   the image should be loaded at fixed address, not at a variable
813 	   address. */
814 	for(i = 0, elf_ppnt = elf_phdata;
815 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
816 		int elf_prot = 0, elf_flags;
817 		unsigned long k, vaddr;
818 
819 		if (elf_ppnt->p_type != PT_LOAD)
820 			continue;
821 
822 		if (unlikely (elf_brk > elf_bss)) {
823 			unsigned long nbyte;
824 
825 			/* There was a PT_LOAD segment with p_memsz > p_filesz
826 			   before this one. Map anonymous pages, if needed,
827 			   and clear the area.  */
828 			retval = set_brk (elf_bss + load_bias,
829 					  elf_brk + load_bias);
830 			if (retval) {
831 				send_sig(SIGKILL, current, 0);
832 				goto out_free_dentry;
833 			}
834 			nbyte = ELF_PAGEOFFSET(elf_bss);
835 			if (nbyte) {
836 				nbyte = ELF_MIN_ALIGN - nbyte;
837 				if (nbyte > elf_brk - elf_bss)
838 					nbyte = elf_brk - elf_bss;
839 				if (clear_user((void __user *)elf_bss +
840 							load_bias, nbyte)) {
841 					/*
842 					 * This bss-zeroing can fail if the ELF
843 					 * file specifies odd protections. So
844 					 * we don't check the return value
845 					 */
846 				}
847 			}
848 		}
849 
850 		if (elf_ppnt->p_flags & PF_R)
851 			elf_prot |= PROT_READ;
852 		if (elf_ppnt->p_flags & PF_W)
853 			elf_prot |= PROT_WRITE;
854 		if (elf_ppnt->p_flags & PF_X)
855 			elf_prot |= PROT_EXEC;
856 
857 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
858 
859 		vaddr = elf_ppnt->p_vaddr;
860 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
861 			elf_flags |= MAP_FIXED;
862 		} else if (loc->elf_ex.e_type == ET_DYN) {
863 			/* Try and get dynamic programs out of the way of the
864 			 * default mmap base, as well as whatever program they
865 			 * might try to exec.  This is because the brk will
866 			 * follow the loader, and is not movable.  */
867 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
868 		}
869 
870 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
871 				elf_prot, elf_flags);
872 		if (BAD_ADDR(error)) {
873 			send_sig(SIGKILL, current, 0);
874 			retval = IS_ERR((void *)error) ?
875 				PTR_ERR((void*)error) : -EINVAL;
876 			goto out_free_dentry;
877 		}
878 
879 		if (!load_addr_set) {
880 			load_addr_set = 1;
881 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
882 			if (loc->elf_ex.e_type == ET_DYN) {
883 				load_bias += error -
884 				             ELF_PAGESTART(load_bias + vaddr);
885 				load_addr += load_bias;
886 				reloc_func_desc = load_bias;
887 			}
888 		}
889 		k = elf_ppnt->p_vaddr;
890 		if (k < start_code)
891 			start_code = k;
892 		if (start_data < k)
893 			start_data = k;
894 
895 		/*
896 		 * Check to see if the section's size will overflow the
897 		 * allowed task size. Note that p_filesz must always be
898 		 * <= p_memsz so it is only necessary to check p_memsz.
899 		 */
900 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
901 		    elf_ppnt->p_memsz > TASK_SIZE ||
902 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
903 			/* set_brk can never work. Avoid overflows. */
904 			send_sig(SIGKILL, current, 0);
905 			retval = -EINVAL;
906 			goto out_free_dentry;
907 		}
908 
909 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
910 
911 		if (k > elf_bss)
912 			elf_bss = k;
913 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
914 			end_code = k;
915 		if (end_data < k)
916 			end_data = k;
917 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
918 		if (k > elf_brk)
919 			elf_brk = k;
920 	}
921 
922 	loc->elf_ex.e_entry += load_bias;
923 	elf_bss += load_bias;
924 	elf_brk += load_bias;
925 	start_code += load_bias;
926 	end_code += load_bias;
927 	start_data += load_bias;
928 	end_data += load_bias;
929 
930 	/* Calling set_brk effectively mmaps the pages that we need
931 	 * for the bss and break sections.  We must do this before
932 	 * mapping in the interpreter, to make sure it doesn't wind
933 	 * up getting placed where the bss needs to go.
934 	 */
935 	retval = set_brk(elf_bss, elf_brk);
936 	if (retval) {
937 		send_sig(SIGKILL, current, 0);
938 		goto out_free_dentry;
939 	}
940 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
941 		send_sig(SIGSEGV, current, 0);
942 		retval = -EFAULT; /* Nobody gets to see this, but.. */
943 		goto out_free_dentry;
944 	}
945 
946 	if (elf_interpreter) {
947 		if (interpreter_type == INTERPRETER_AOUT)
948 			elf_entry = load_aout_interp(&loc->interp_ex,
949 						     interpreter);
950 		else
951 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
952 						    interpreter,
953 						    &interp_load_addr);
954 		if (BAD_ADDR(elf_entry)) {
955 			force_sig(SIGSEGV, current);
956 			retval = IS_ERR((void *)elf_entry) ?
957 					(int)elf_entry : -EINVAL;
958 			goto out_free_dentry;
959 		}
960 		reloc_func_desc = interp_load_addr;
961 
962 		allow_write_access(interpreter);
963 		fput(interpreter);
964 		kfree(elf_interpreter);
965 	} else {
966 		elf_entry = loc->elf_ex.e_entry;
967 		if (BAD_ADDR(elf_entry)) {
968 			force_sig(SIGSEGV, current);
969 			retval = -EINVAL;
970 			goto out_free_dentry;
971 		}
972 	}
973 
974 	kfree(elf_phdata);
975 
976 	if (interpreter_type != INTERPRETER_AOUT)
977 		sys_close(elf_exec_fileno);
978 
979 	set_binfmt(&elf_format);
980 
981 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
982 	retval = arch_setup_additional_pages(bprm, executable_stack);
983 	if (retval < 0) {
984 		send_sig(SIGKILL, current, 0);
985 		goto out;
986 	}
987 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
988 
989 	compute_creds(bprm);
990 	current->flags &= ~PF_FORKNOEXEC;
991 	create_elf_tables(bprm, &loc->elf_ex,
992 			  (interpreter_type == INTERPRETER_AOUT),
993 			  load_addr, interp_load_addr);
994 	/* N.B. passed_fileno might not be initialized? */
995 	if (interpreter_type == INTERPRETER_AOUT)
996 		current->mm->arg_start += strlen(passed_fileno) + 1;
997 	current->mm->end_code = end_code;
998 	current->mm->start_code = start_code;
999 	current->mm->start_data = start_data;
1000 	current->mm->end_data = end_data;
1001 	current->mm->start_stack = bprm->p;
1002 
1003 	if (current->personality & MMAP_PAGE_ZERO) {
1004 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1005 		   and some applications "depend" upon this behavior.
1006 		   Since we do not have the power to recompile these, we
1007 		   emulate the SVr4 behavior. Sigh. */
1008 		down_write(&current->mm->mmap_sem);
1009 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1010 				MAP_FIXED | MAP_PRIVATE, 0);
1011 		up_write(&current->mm->mmap_sem);
1012 	}
1013 
1014 #ifdef ELF_PLAT_INIT
1015 	/*
1016 	 * The ABI may specify that certain registers be set up in special
1017 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1018 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1019 	 * that the e_entry field is the address of the function descriptor
1020 	 * for the startup routine, rather than the address of the startup
1021 	 * routine itself.  This macro performs whatever initialization to
1022 	 * the regs structure is required as well as any relocations to the
1023 	 * function descriptor entries when executing dynamically links apps.
1024 	 */
1025 	ELF_PLAT_INIT(regs, reloc_func_desc);
1026 #endif
1027 
1028 	start_thread(regs, elf_entry, bprm->p);
1029 	if (unlikely(current->ptrace & PT_PTRACED)) {
1030 		if (current->ptrace & PT_TRACE_EXEC)
1031 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1032 		else
1033 			send_sig(SIGTRAP, current, 0);
1034 	}
1035 	retval = 0;
1036 out:
1037 	kfree(loc);
1038 out_ret:
1039 	return retval;
1040 
1041 	/* error cleanup */
1042 out_free_dentry:
1043 	allow_write_access(interpreter);
1044 	if (interpreter)
1045 		fput(interpreter);
1046 out_free_interp:
1047 	kfree(elf_interpreter);
1048 out_free_file:
1049 	sys_close(elf_exec_fileno);
1050 out_free_fh:
1051 	if (files)
1052 		reset_files_struct(current, files);
1053 out_free_ph:
1054 	kfree(elf_phdata);
1055 	goto out;
1056 }
1057 
1058 /* This is really simpleminded and specialized - we are loading an
1059    a.out library that is given an ELF header. */
1060 static int load_elf_library(struct file *file)
1061 {
1062 	struct elf_phdr *elf_phdata;
1063 	struct elf_phdr *eppnt;
1064 	unsigned long elf_bss, bss, len;
1065 	int retval, error, i, j;
1066 	struct elfhdr elf_ex;
1067 
1068 	error = -ENOEXEC;
1069 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1070 	if (retval != sizeof(elf_ex))
1071 		goto out;
1072 
1073 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1074 		goto out;
1075 
1076 	/* First of all, some simple consistency checks */
1077 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1078 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1079 		goto out;
1080 
1081 	/* Now read in all of the header information */
1082 
1083 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1084 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1085 
1086 	error = -ENOMEM;
1087 	elf_phdata = kmalloc(j, GFP_KERNEL);
1088 	if (!elf_phdata)
1089 		goto out;
1090 
1091 	eppnt = elf_phdata;
1092 	error = -ENOEXEC;
1093 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1094 	if (retval != j)
1095 		goto out_free_ph;
1096 
1097 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1098 		if ((eppnt + i)->p_type == PT_LOAD)
1099 			j++;
1100 	if (j != 1)
1101 		goto out_free_ph;
1102 
1103 	while (eppnt->p_type != PT_LOAD)
1104 		eppnt++;
1105 
1106 	/* Now use mmap to map the library into memory. */
1107 	down_write(&current->mm->mmap_sem);
1108 	error = do_mmap(file,
1109 			ELF_PAGESTART(eppnt->p_vaddr),
1110 			(eppnt->p_filesz +
1111 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1112 			PROT_READ | PROT_WRITE | PROT_EXEC,
1113 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1114 			(eppnt->p_offset -
1115 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1116 	up_write(&current->mm->mmap_sem);
1117 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1118 		goto out_free_ph;
1119 
1120 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1121 	if (padzero(elf_bss)) {
1122 		error = -EFAULT;
1123 		goto out_free_ph;
1124 	}
1125 
1126 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1127 			    ELF_MIN_ALIGN - 1);
1128 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1129 	if (bss > len) {
1130 		down_write(&current->mm->mmap_sem);
1131 		do_brk(len, bss - len);
1132 		up_write(&current->mm->mmap_sem);
1133 	}
1134 	error = 0;
1135 
1136 out_free_ph:
1137 	kfree(elf_phdata);
1138 out:
1139 	return error;
1140 }
1141 
1142 /*
1143  * Note that some platforms still use traditional core dumps and not
1144  * the ELF core dump.  Each platform can select it as appropriate.
1145  */
1146 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1147 
1148 /*
1149  * ELF core dumper
1150  *
1151  * Modelled on fs/exec.c:aout_core_dump()
1152  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1153  */
1154 /*
1155  * These are the only things you should do on a core-file: use only these
1156  * functions to write out all the necessary info.
1157  */
1158 static int dump_write(struct file *file, const void *addr, int nr)
1159 {
1160 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1161 }
1162 
1163 static int dump_seek(struct file *file, loff_t off)
1164 {
1165 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1166 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1167 			return 0;
1168 	} else {
1169 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1170 		if (!buf)
1171 			return 0;
1172 		while (off > 0) {
1173 			unsigned long n = off;
1174 			if (n > PAGE_SIZE)
1175 				n = PAGE_SIZE;
1176 			if (!dump_write(file, buf, n))
1177 				return 0;
1178 			off -= n;
1179 		}
1180 		free_page((unsigned long)buf);
1181 	}
1182 	return 1;
1183 }
1184 
1185 /*
1186  * Decide whether a segment is worth dumping; default is yes to be
1187  * sure (missing info is worse than too much; etc).
1188  * Personally I'd include everything, and use the coredump limit...
1189  *
1190  * I think we should skip something. But I am not sure how. H.J.
1191  */
1192 static int maydump(struct vm_area_struct *vma)
1193 {
1194 	/* The vma can be set up to tell us the answer directly.  */
1195 	if (vma->vm_flags & VM_ALWAYSDUMP)
1196 		return 1;
1197 
1198 	/* Do not dump I/O mapped devices or special mappings */
1199 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1200 		return 0;
1201 
1202 	/* Dump shared memory only if mapped from an anonymous file. */
1203 	if (vma->vm_flags & VM_SHARED)
1204 		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1205 
1206 	/* If it hasn't been written to, don't write it out */
1207 	if (!vma->anon_vma)
1208 		return 0;
1209 
1210 	return 1;
1211 }
1212 
1213 /* An ELF note in memory */
1214 struct memelfnote
1215 {
1216 	const char *name;
1217 	int type;
1218 	unsigned int datasz;
1219 	void *data;
1220 };
1221 
1222 static int notesize(struct memelfnote *en)
1223 {
1224 	int sz;
1225 
1226 	sz = sizeof(struct elf_note);
1227 	sz += roundup(strlen(en->name) + 1, 4);
1228 	sz += roundup(en->datasz, 4);
1229 
1230 	return sz;
1231 }
1232 
1233 #define DUMP_WRITE(addr, nr, foffset)	\
1234 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1235 
1236 static int alignfile(struct file *file, loff_t *foffset)
1237 {
1238 	static const char buf[4] = { 0, };
1239 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1240 	return 1;
1241 }
1242 
1243 static int writenote(struct memelfnote *men, struct file *file,
1244 			loff_t *foffset)
1245 {
1246 	struct elf_note en;
1247 	en.n_namesz = strlen(men->name) + 1;
1248 	en.n_descsz = men->datasz;
1249 	en.n_type = men->type;
1250 
1251 	DUMP_WRITE(&en, sizeof(en), foffset);
1252 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1253 	if (!alignfile(file, foffset))
1254 		return 0;
1255 	DUMP_WRITE(men->data, men->datasz, foffset);
1256 	if (!alignfile(file, foffset))
1257 		return 0;
1258 
1259 	return 1;
1260 }
1261 #undef DUMP_WRITE
1262 
1263 #define DUMP_WRITE(addr, nr)	\
1264 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1265 		goto end_coredump;
1266 #define DUMP_SEEK(off)	\
1267 	if (!dump_seek(file, (off))) \
1268 		goto end_coredump;
1269 
1270 static void fill_elf_header(struct elfhdr *elf, int segs)
1271 {
1272 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1273 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1274 	elf->e_ident[EI_DATA] = ELF_DATA;
1275 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1276 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1277 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1278 
1279 	elf->e_type = ET_CORE;
1280 	elf->e_machine = ELF_ARCH;
1281 	elf->e_version = EV_CURRENT;
1282 	elf->e_entry = 0;
1283 	elf->e_phoff = sizeof(struct elfhdr);
1284 	elf->e_shoff = 0;
1285 	elf->e_flags = ELF_CORE_EFLAGS;
1286 	elf->e_ehsize = sizeof(struct elfhdr);
1287 	elf->e_phentsize = sizeof(struct elf_phdr);
1288 	elf->e_phnum = segs;
1289 	elf->e_shentsize = 0;
1290 	elf->e_shnum = 0;
1291 	elf->e_shstrndx = 0;
1292 	return;
1293 }
1294 
1295 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1296 {
1297 	phdr->p_type = PT_NOTE;
1298 	phdr->p_offset = offset;
1299 	phdr->p_vaddr = 0;
1300 	phdr->p_paddr = 0;
1301 	phdr->p_filesz = sz;
1302 	phdr->p_memsz = 0;
1303 	phdr->p_flags = 0;
1304 	phdr->p_align = 0;
1305 	return;
1306 }
1307 
1308 static void fill_note(struct memelfnote *note, const char *name, int type,
1309 		unsigned int sz, void *data)
1310 {
1311 	note->name = name;
1312 	note->type = type;
1313 	note->datasz = sz;
1314 	note->data = data;
1315 	return;
1316 }
1317 
1318 /*
1319  * fill up all the fields in prstatus from the given task struct, except
1320  * registers which need to be filled up separately.
1321  */
1322 static void fill_prstatus(struct elf_prstatus *prstatus,
1323 		struct task_struct *p, long signr)
1324 {
1325 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1326 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1327 	prstatus->pr_sighold = p->blocked.sig[0];
1328 	prstatus->pr_pid = p->pid;
1329 	prstatus->pr_ppid = p->parent->pid;
1330 	prstatus->pr_pgrp = process_group(p);
1331 	prstatus->pr_sid = process_session(p);
1332 	if (thread_group_leader(p)) {
1333 		/*
1334 		 * This is the record for the group leader.  Add in the
1335 		 * cumulative times of previous dead threads.  This total
1336 		 * won't include the time of each live thread whose state
1337 		 * is included in the core dump.  The final total reported
1338 		 * to our parent process when it calls wait4 will include
1339 		 * those sums as well as the little bit more time it takes
1340 		 * this and each other thread to finish dying after the
1341 		 * core dump synchronization phase.
1342 		 */
1343 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1344 				   &prstatus->pr_utime);
1345 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1346 				   &prstatus->pr_stime);
1347 	} else {
1348 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1349 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1350 	}
1351 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1352 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1353 }
1354 
1355 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1356 		       struct mm_struct *mm)
1357 {
1358 	unsigned int i, len;
1359 
1360 	/* first copy the parameters from user space */
1361 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1362 
1363 	len = mm->arg_end - mm->arg_start;
1364 	if (len >= ELF_PRARGSZ)
1365 		len = ELF_PRARGSZ-1;
1366 	if (copy_from_user(&psinfo->pr_psargs,
1367 		           (const char __user *)mm->arg_start, len))
1368 		return -EFAULT;
1369 	for(i = 0; i < len; i++)
1370 		if (psinfo->pr_psargs[i] == 0)
1371 			psinfo->pr_psargs[i] = ' ';
1372 	psinfo->pr_psargs[len] = 0;
1373 
1374 	psinfo->pr_pid = p->pid;
1375 	psinfo->pr_ppid = p->parent->pid;
1376 	psinfo->pr_pgrp = process_group(p);
1377 	psinfo->pr_sid = process_session(p);
1378 
1379 	i = p->state ? ffz(~p->state) + 1 : 0;
1380 	psinfo->pr_state = i;
1381 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1382 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1383 	psinfo->pr_nice = task_nice(p);
1384 	psinfo->pr_flag = p->flags;
1385 	SET_UID(psinfo->pr_uid, p->uid);
1386 	SET_GID(psinfo->pr_gid, p->gid);
1387 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1388 
1389 	return 0;
1390 }
1391 
1392 /* Here is the structure in which status of each thread is captured. */
1393 struct elf_thread_status
1394 {
1395 	struct list_head list;
1396 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1397 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1398 	struct task_struct *thread;
1399 #ifdef ELF_CORE_COPY_XFPREGS
1400 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1401 #endif
1402 	struct memelfnote notes[3];
1403 	int num_notes;
1404 };
1405 
1406 /*
1407  * In order to add the specific thread information for the elf file format,
1408  * we need to keep a linked list of every threads pr_status and then create
1409  * a single section for them in the final core file.
1410  */
1411 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1412 {
1413 	int sz = 0;
1414 	struct task_struct *p = t->thread;
1415 	t->num_notes = 0;
1416 
1417 	fill_prstatus(&t->prstatus, p, signr);
1418 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1419 
1420 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1421 		  &(t->prstatus));
1422 	t->num_notes++;
1423 	sz += notesize(&t->notes[0]);
1424 
1425 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1426 								&t->fpu))) {
1427 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1428 			  &(t->fpu));
1429 		t->num_notes++;
1430 		sz += notesize(&t->notes[1]);
1431 	}
1432 
1433 #ifdef ELF_CORE_COPY_XFPREGS
1434 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1435 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1436 			  &t->xfpu);
1437 		t->num_notes++;
1438 		sz += notesize(&t->notes[2]);
1439 	}
1440 #endif
1441 	return sz;
1442 }
1443 
1444 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1445 					struct vm_area_struct *gate_vma)
1446 {
1447 	struct vm_area_struct *ret = tsk->mm->mmap;
1448 
1449 	if (ret)
1450 		return ret;
1451 	return gate_vma;
1452 }
1453 /*
1454  * Helper function for iterating across a vma list.  It ensures that the caller
1455  * will visit `gate_vma' prior to terminating the search.
1456  */
1457 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1458 					struct vm_area_struct *gate_vma)
1459 {
1460 	struct vm_area_struct *ret;
1461 
1462 	ret = this_vma->vm_next;
1463 	if (ret)
1464 		return ret;
1465 	if (this_vma == gate_vma)
1466 		return NULL;
1467 	return gate_vma;
1468 }
1469 
1470 /*
1471  * Actual dumper
1472  *
1473  * This is a two-pass process; first we find the offsets of the bits,
1474  * and then they are actually written out.  If we run out of core limit
1475  * we just truncate.
1476  */
1477 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1478 {
1479 #define	NUM_NOTES	6
1480 	int has_dumped = 0;
1481 	mm_segment_t fs;
1482 	int segs;
1483 	size_t size = 0;
1484 	int i;
1485 	struct vm_area_struct *vma, *gate_vma;
1486 	struct elfhdr *elf = NULL;
1487 	loff_t offset = 0, dataoff, foffset;
1488 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1489 	int numnote;
1490 	struct memelfnote *notes = NULL;
1491 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1492 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1493  	struct task_struct *g, *p;
1494  	LIST_HEAD(thread_list);
1495  	struct list_head *t;
1496 	elf_fpregset_t *fpu = NULL;
1497 #ifdef ELF_CORE_COPY_XFPREGS
1498 	elf_fpxregset_t *xfpu = NULL;
1499 #endif
1500 	int thread_status_size = 0;
1501 	elf_addr_t *auxv;
1502 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1503 	int extra_notes_size;
1504 #endif
1505 
1506 	/*
1507 	 * We no longer stop all VM operations.
1508 	 *
1509 	 * This is because those proceses that could possibly change map_count
1510 	 * or the mmap / vma pages are now blocked in do_exit on current
1511 	 * finishing this core dump.
1512 	 *
1513 	 * Only ptrace can touch these memory addresses, but it doesn't change
1514 	 * the map_count or the pages allocated. So no possibility of crashing
1515 	 * exists while dumping the mm->vm_next areas to the core file.
1516 	 */
1517 
1518 	/* alloc memory for large data structures: too large to be on stack */
1519 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1520 	if (!elf)
1521 		goto cleanup;
1522 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1523 	if (!prstatus)
1524 		goto cleanup;
1525 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1526 	if (!psinfo)
1527 		goto cleanup;
1528 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1529 	if (!notes)
1530 		goto cleanup;
1531 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1532 	if (!fpu)
1533 		goto cleanup;
1534 #ifdef ELF_CORE_COPY_XFPREGS
1535 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1536 	if (!xfpu)
1537 		goto cleanup;
1538 #endif
1539 
1540 	if (signr) {
1541 		struct elf_thread_status *tmp;
1542 		rcu_read_lock();
1543 		do_each_thread(g,p)
1544 			if (current->mm == p->mm && current != p) {
1545 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1546 				if (!tmp) {
1547 					rcu_read_unlock();
1548 					goto cleanup;
1549 				}
1550 				tmp->thread = p;
1551 				list_add(&tmp->list, &thread_list);
1552 			}
1553 		while_each_thread(g,p);
1554 		rcu_read_unlock();
1555 		list_for_each(t, &thread_list) {
1556 			struct elf_thread_status *tmp;
1557 			int sz;
1558 
1559 			tmp = list_entry(t, struct elf_thread_status, list);
1560 			sz = elf_dump_thread_status(signr, tmp);
1561 			thread_status_size += sz;
1562 		}
1563 	}
1564 	/* now collect the dump for the current */
1565 	memset(prstatus, 0, sizeof(*prstatus));
1566 	fill_prstatus(prstatus, current, signr);
1567 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1568 
1569 	segs = current->mm->map_count;
1570 #ifdef ELF_CORE_EXTRA_PHDRS
1571 	segs += ELF_CORE_EXTRA_PHDRS;
1572 #endif
1573 
1574 	gate_vma = get_gate_vma(current);
1575 	if (gate_vma != NULL)
1576 		segs++;
1577 
1578 	/* Set up header */
1579 	fill_elf_header(elf, segs + 1);	/* including notes section */
1580 
1581 	has_dumped = 1;
1582 	current->flags |= PF_DUMPCORE;
1583 
1584 	/*
1585 	 * Set up the notes in similar form to SVR4 core dumps made
1586 	 * with info from their /proc.
1587 	 */
1588 
1589 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1590 	fill_psinfo(psinfo, current->group_leader, current->mm);
1591 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1592 
1593 	numnote = 2;
1594 
1595 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1596 
1597 	i = 0;
1598 	do
1599 		i += 2;
1600 	while (auxv[i - 2] != AT_NULL);
1601 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1602 		  i * sizeof(elf_addr_t), auxv);
1603 
1604   	/* Try to dump the FPU. */
1605 	if ((prstatus->pr_fpvalid =
1606 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1607 		fill_note(notes + numnote++,
1608 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1609 #ifdef ELF_CORE_COPY_XFPREGS
1610 	if (elf_core_copy_task_xfpregs(current, xfpu))
1611 		fill_note(notes + numnote++,
1612 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1613 #endif
1614 
1615 	fs = get_fs();
1616 	set_fs(KERNEL_DS);
1617 
1618 	DUMP_WRITE(elf, sizeof(*elf));
1619 	offset += sizeof(*elf);				/* Elf header */
1620 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1621 	foffset = offset;
1622 
1623 	/* Write notes phdr entry */
1624 	{
1625 		struct elf_phdr phdr;
1626 		int sz = 0;
1627 
1628 		for (i = 0; i < numnote; i++)
1629 			sz += notesize(notes + i);
1630 
1631 		sz += thread_status_size;
1632 
1633 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1634 		extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1635 		sz += extra_notes_size;
1636 #endif
1637 
1638 		fill_elf_note_phdr(&phdr, sz, offset);
1639 		offset += sz;
1640 		DUMP_WRITE(&phdr, sizeof(phdr));
1641 	}
1642 
1643 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1644 
1645 	/* Write program headers for segments dump */
1646 	for (vma = first_vma(current, gate_vma); vma != NULL;
1647 			vma = next_vma(vma, gate_vma)) {
1648 		struct elf_phdr phdr;
1649 		size_t sz;
1650 
1651 		sz = vma->vm_end - vma->vm_start;
1652 
1653 		phdr.p_type = PT_LOAD;
1654 		phdr.p_offset = offset;
1655 		phdr.p_vaddr = vma->vm_start;
1656 		phdr.p_paddr = 0;
1657 		phdr.p_filesz = maydump(vma) ? sz : 0;
1658 		phdr.p_memsz = sz;
1659 		offset += phdr.p_filesz;
1660 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1661 		if (vma->vm_flags & VM_WRITE)
1662 			phdr.p_flags |= PF_W;
1663 		if (vma->vm_flags & VM_EXEC)
1664 			phdr.p_flags |= PF_X;
1665 		phdr.p_align = ELF_EXEC_PAGESIZE;
1666 
1667 		DUMP_WRITE(&phdr, sizeof(phdr));
1668 	}
1669 
1670 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1671 	ELF_CORE_WRITE_EXTRA_PHDRS;
1672 #endif
1673 
1674  	/* write out the notes section */
1675 	for (i = 0; i < numnote; i++)
1676 		if (!writenote(notes + i, file, &foffset))
1677 			goto end_coredump;
1678 
1679 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1680 	ELF_CORE_WRITE_EXTRA_NOTES;
1681 	foffset += extra_notes_size;
1682 #endif
1683 
1684 	/* write out the thread status notes section */
1685 	list_for_each(t, &thread_list) {
1686 		struct elf_thread_status *tmp =
1687 				list_entry(t, struct elf_thread_status, list);
1688 
1689 		for (i = 0; i < tmp->num_notes; i++)
1690 			if (!writenote(&tmp->notes[i], file, &foffset))
1691 				goto end_coredump;
1692 	}
1693 
1694 	/* Align to page */
1695 	DUMP_SEEK(dataoff - foffset);
1696 
1697 	for (vma = first_vma(current, gate_vma); vma != NULL;
1698 			vma = next_vma(vma, gate_vma)) {
1699 		unsigned long addr;
1700 
1701 		if (!maydump(vma))
1702 			continue;
1703 
1704 		for (addr = vma->vm_start;
1705 		     addr < vma->vm_end;
1706 		     addr += PAGE_SIZE) {
1707 			struct page *page;
1708 			struct vm_area_struct *vma;
1709 
1710 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1711 						&page, &vma) <= 0) {
1712 				DUMP_SEEK(PAGE_SIZE);
1713 			} else {
1714 				if (page == ZERO_PAGE(addr)) {
1715 					if (!dump_seek(file, PAGE_SIZE)) {
1716 						page_cache_release(page);
1717 						goto end_coredump;
1718 					}
1719 				} else {
1720 					void *kaddr;
1721 					flush_cache_page(vma, addr,
1722 							 page_to_pfn(page));
1723 					kaddr = kmap(page);
1724 					if ((size += PAGE_SIZE) > limit ||
1725 					    !dump_write(file, kaddr,
1726 					    PAGE_SIZE)) {
1727 						kunmap(page);
1728 						page_cache_release(page);
1729 						goto end_coredump;
1730 					}
1731 					kunmap(page);
1732 				}
1733 				page_cache_release(page);
1734 			}
1735 		}
1736 	}
1737 
1738 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1739 	ELF_CORE_WRITE_EXTRA_DATA;
1740 #endif
1741 
1742 end_coredump:
1743 	set_fs(fs);
1744 
1745 cleanup:
1746 	while (!list_empty(&thread_list)) {
1747 		struct list_head *tmp = thread_list.next;
1748 		list_del(tmp);
1749 		kfree(list_entry(tmp, struct elf_thread_status, list));
1750 	}
1751 
1752 	kfree(elf);
1753 	kfree(prstatus);
1754 	kfree(psinfo);
1755 	kfree(notes);
1756 	kfree(fpu);
1757 #ifdef ELF_CORE_COPY_XFPREGS
1758 	kfree(xfpu);
1759 #endif
1760 	return has_dumped;
1761 #undef NUM_NOTES
1762 }
1763 
1764 #endif		/* USE_ELF_CORE_DUMP */
1765 
1766 static int __init init_elf_binfmt(void)
1767 {
1768 	return register_binfmt(&elf_format);
1769 }
1770 
1771 static void __exit exit_elf_binfmt(void)
1772 {
1773 	/* Remove the COFF and ELF loaders. */
1774 	unregister_binfmt(&elf_format);
1775 }
1776 
1777 core_initcall(init_elf_binfmt);
1778 module_exit(exit_elf_binfmt);
1779 MODULE_LICENSE("GPL");
1780