xref: /openbmc/linux/fs/binfmt_elf.c (revision 606d099c)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE
80 };
81 
82 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
83 
84 static int set_brk(unsigned long start, unsigned long end)
85 {
86 	start = ELF_PAGEALIGN(start);
87 	end = ELF_PAGEALIGN(end);
88 	if (end > start) {
89 		unsigned long addr;
90 		down_write(&current->mm->mmap_sem);
91 		addr = do_brk(start, end - start);
92 		up_write(&current->mm->mmap_sem);
93 		if (BAD_ADDR(addr))
94 			return addr;
95 	}
96 	current->mm->start_brk = current->mm->brk = end;
97 	return 0;
98 }
99 
100 /* We need to explicitly zero any fractional pages
101    after the data section (i.e. bss).  This would
102    contain the junk from the file that should not
103    be in memory
104  */
105 static int padzero(unsigned long elf_bss)
106 {
107 	unsigned long nbyte;
108 
109 	nbyte = ELF_PAGEOFFSET(elf_bss);
110 	if (nbyte) {
111 		nbyte = ELF_MIN_ALIGN - nbyte;
112 		if (clear_user((void __user *) elf_bss, nbyte))
113 			return -EFAULT;
114 	}
115 	return 0;
116 }
117 
118 /* Let's use some macros to make this stack manipulation a litle clearer */
119 #ifdef CONFIG_STACK_GROWSUP
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
121 #define STACK_ROUND(sp, items) \
122 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ \
124 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
125 	old_sp; })
126 #else
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
128 #define STACK_ROUND(sp, items) \
129 	(((unsigned long) (sp - items)) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
131 #endif
132 
133 static int
134 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
135 		int interp_aout, unsigned long load_addr,
136 		unsigned long interp_load_addr)
137 {
138 	unsigned long p = bprm->p;
139 	int argc = bprm->argc;
140 	int envc = bprm->envc;
141 	elf_addr_t __user *argv;
142 	elf_addr_t __user *envp;
143 	elf_addr_t __user *sp;
144 	elf_addr_t __user *u_platform;
145 	const char *k_platform = ELF_PLATFORM;
146 	int items;
147 	elf_addr_t *elf_info;
148 	int ei_index = 0;
149 	struct task_struct *tsk = current;
150 
151 	/*
152 	 * If this architecture has a platform capability string, copy it
153 	 * to userspace.  In some cases (Sparc), this info is impossible
154 	 * for userspace to get any other way, in others (i386) it is
155 	 * merely difficult.
156 	 */
157 	u_platform = NULL;
158 	if (k_platform) {
159 		size_t len = strlen(k_platform) + 1;
160 
161 		/*
162 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
163 		 * evictions by the processes running on the same package. One
164 		 * thing we can do is to shuffle the initial stack for them.
165 		 */
166 
167 		p = arch_align_stack(p);
168 
169 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
170 		if (__copy_to_user(u_platform, k_platform, len))
171 			return -EFAULT;
172 	}
173 
174 	/* Create the ELF interpreter info */
175 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
176 #define NEW_AUX_ENT(id, val) \
177 	do { \
178 		elf_info[ei_index++] = id; \
179 		elf_info[ei_index++] = val; \
180 	} while (0)
181 
182 #ifdef ARCH_DLINFO
183 	/*
184 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
185 	 * AUXV.
186 	 */
187 	ARCH_DLINFO;
188 #endif
189 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
190 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
191 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
192 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
193 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
194 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
195 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
196 	NEW_AUX_ENT(AT_FLAGS, 0);
197 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
198 	NEW_AUX_ENT(AT_UID, tsk->uid);
199 	NEW_AUX_ENT(AT_EUID, tsk->euid);
200 	NEW_AUX_ENT(AT_GID, tsk->gid);
201 	NEW_AUX_ENT(AT_EGID, tsk->egid);
202  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
203 	if (k_platform) {
204 		NEW_AUX_ENT(AT_PLATFORM,
205 			    (elf_addr_t)(unsigned long)u_platform);
206 	}
207 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
208 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
209 	}
210 #undef NEW_AUX_ENT
211 	/* AT_NULL is zero; clear the rest too */
212 	memset(&elf_info[ei_index], 0,
213 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
214 
215 	/* And advance past the AT_NULL entry.  */
216 	ei_index += 2;
217 
218 	sp = STACK_ADD(p, ei_index);
219 
220 	items = (argc + 1) + (envc + 1);
221 	if (interp_aout) {
222 		items += 3; /* a.out interpreters require argv & envp too */
223 	} else {
224 		items += 1; /* ELF interpreters only put argc on the stack */
225 	}
226 	bprm->p = STACK_ROUND(sp, items);
227 
228 	/* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233 	sp = (elf_addr_t __user *)bprm->p;
234 #endif
235 
236 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
237 	if (__put_user(argc, sp++))
238 		return -EFAULT;
239 	if (interp_aout) {
240 		argv = sp + 2;
241 		envp = argv + argc + 1;
242 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
243 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
244 			return -EFAULT;
245 	} else {
246 		argv = sp;
247 		envp = argv + argc + 1;
248 	}
249 
250 	/* Populate argv and envp */
251 	p = current->mm->arg_end = current->mm->arg_start;
252 	while (argc-- > 0) {
253 		size_t len;
254 		if (__put_user((elf_addr_t)p, argv++))
255 			return -EFAULT;
256 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
257 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
258 			return 0;
259 		p += len;
260 	}
261 	if (__put_user(0, argv))
262 		return -EFAULT;
263 	current->mm->arg_end = current->mm->env_start = p;
264 	while (envc-- > 0) {
265 		size_t len;
266 		if (__put_user((elf_addr_t)p, envp++))
267 			return -EFAULT;
268 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
269 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
270 			return 0;
271 		p += len;
272 	}
273 	if (__put_user(0, envp))
274 		return -EFAULT;
275 	current->mm->env_end = p;
276 
277 	/* Put the elf_info on the stack in the right place.  */
278 	sp = (elf_addr_t __user *)envp + 1;
279 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
280 		return -EFAULT;
281 	return 0;
282 }
283 
284 #ifndef elf_map
285 
286 static unsigned long elf_map(struct file *filep, unsigned long addr,
287 		struct elf_phdr *eppnt, int prot, int type)
288 {
289 	unsigned long map_addr;
290 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
291 
292 	down_write(&current->mm->mmap_sem);
293 	/* mmap() will return -EINVAL if given a zero size, but a
294 	 * segment with zero filesize is perfectly valid */
295 	if (eppnt->p_filesz + pageoffset)
296 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
297 				   eppnt->p_filesz + pageoffset, prot, type,
298 				   eppnt->p_offset - pageoffset);
299 	else
300 		map_addr = ELF_PAGESTART(addr);
301 	up_write(&current->mm->mmap_sem);
302 	return(map_addr);
303 }
304 
305 #endif /* !elf_map */
306 
307 /* This is much more generalized than the library routine read function,
308    so we keep this separate.  Technically the library read function
309    is only provided so that we can read a.out libraries that have
310    an ELF header */
311 
312 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
313 		struct file *interpreter, unsigned long *interp_load_addr)
314 {
315 	struct elf_phdr *elf_phdata;
316 	struct elf_phdr *eppnt;
317 	unsigned long load_addr = 0;
318 	int load_addr_set = 0;
319 	unsigned long last_bss = 0, elf_bss = 0;
320 	unsigned long error = ~0UL;
321 	int retval, i, size;
322 
323 	/* First of all, some simple consistency checks */
324 	if (interp_elf_ex->e_type != ET_EXEC &&
325 	    interp_elf_ex->e_type != ET_DYN)
326 		goto out;
327 	if (!elf_check_arch(interp_elf_ex))
328 		goto out;
329 	if (!interpreter->f_op || !interpreter->f_op->mmap)
330 		goto out;
331 
332 	/*
333 	 * If the size of this structure has changed, then punt, since
334 	 * we will be doing the wrong thing.
335 	 */
336 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
337 		goto out;
338 	if (interp_elf_ex->e_phnum < 1 ||
339 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
340 		goto out;
341 
342 	/* Now read in all of the header information */
343 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
344 	if (size > ELF_MIN_ALIGN)
345 		goto out;
346 	elf_phdata = kmalloc(size, GFP_KERNEL);
347 	if (!elf_phdata)
348 		goto out;
349 
350 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
351 			     (char *)elf_phdata,size);
352 	error = -EIO;
353 	if (retval != size) {
354 		if (retval < 0)
355 			error = retval;
356 		goto out_close;
357 	}
358 
359 	eppnt = elf_phdata;
360 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
361 		if (eppnt->p_type == PT_LOAD) {
362 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
363 			int elf_prot = 0;
364 			unsigned long vaddr = 0;
365 			unsigned long k, map_addr;
366 
367 			if (eppnt->p_flags & PF_R)
368 		    		elf_prot = PROT_READ;
369 			if (eppnt->p_flags & PF_W)
370 				elf_prot |= PROT_WRITE;
371 			if (eppnt->p_flags & PF_X)
372 				elf_prot |= PROT_EXEC;
373 			vaddr = eppnt->p_vaddr;
374 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
375 				elf_type |= MAP_FIXED;
376 
377 			map_addr = elf_map(interpreter, load_addr + vaddr,
378 					   eppnt, elf_prot, elf_type);
379 			error = map_addr;
380 			if (BAD_ADDR(map_addr))
381 				goto out_close;
382 
383 			if (!load_addr_set &&
384 			    interp_elf_ex->e_type == ET_DYN) {
385 				load_addr = map_addr - ELF_PAGESTART(vaddr);
386 				load_addr_set = 1;
387 			}
388 
389 			/*
390 			 * Check to see if the section's size will overflow the
391 			 * allowed task size. Note that p_filesz must always be
392 			 * <= p_memsize so it's only necessary to check p_memsz.
393 			 */
394 			k = load_addr + eppnt->p_vaddr;
395 			if (BAD_ADDR(k) ||
396 			    eppnt->p_filesz > eppnt->p_memsz ||
397 			    eppnt->p_memsz > TASK_SIZE ||
398 			    TASK_SIZE - eppnt->p_memsz < k) {
399 				error = -ENOMEM;
400 				goto out_close;
401 			}
402 
403 			/*
404 			 * Find the end of the file mapping for this phdr, and
405 			 * keep track of the largest address we see for this.
406 			 */
407 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
408 			if (k > elf_bss)
409 				elf_bss = k;
410 
411 			/*
412 			 * Do the same thing for the memory mapping - between
413 			 * elf_bss and last_bss is the bss section.
414 			 */
415 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
416 			if (k > last_bss)
417 				last_bss = k;
418 		}
419 	}
420 
421 	/*
422 	 * Now fill out the bss section.  First pad the last page up
423 	 * to the page boundary, and then perform a mmap to make sure
424 	 * that there are zero-mapped pages up to and including the
425 	 * last bss page.
426 	 */
427 	if (padzero(elf_bss)) {
428 		error = -EFAULT;
429 		goto out_close;
430 	}
431 
432 	/* What we have mapped so far */
433 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
434 
435 	/* Map the last of the bss segment */
436 	if (last_bss > elf_bss) {
437 		down_write(&current->mm->mmap_sem);
438 		error = do_brk(elf_bss, last_bss - elf_bss);
439 		up_write(&current->mm->mmap_sem);
440 		if (BAD_ADDR(error))
441 			goto out_close;
442 	}
443 
444 	*interp_load_addr = load_addr;
445 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
446 
447 out_close:
448 	kfree(elf_phdata);
449 out:
450 	return error;
451 }
452 
453 static unsigned long load_aout_interp(struct exec *interp_ex,
454 		struct file *interpreter)
455 {
456 	unsigned long text_data, elf_entry = ~0UL;
457 	char __user * addr;
458 	loff_t offset;
459 
460 	current->mm->end_code = interp_ex->a_text;
461 	text_data = interp_ex->a_text + interp_ex->a_data;
462 	current->mm->end_data = text_data;
463 	current->mm->brk = interp_ex->a_bss + text_data;
464 
465 	switch (N_MAGIC(*interp_ex)) {
466 	case OMAGIC:
467 		offset = 32;
468 		addr = (char __user *)0;
469 		break;
470 	case ZMAGIC:
471 	case QMAGIC:
472 		offset = N_TXTOFF(*interp_ex);
473 		addr = (char __user *)N_TXTADDR(*interp_ex);
474 		break;
475 	default:
476 		goto out;
477 	}
478 
479 	down_write(&current->mm->mmap_sem);
480 	do_brk(0, text_data);
481 	up_write(&current->mm->mmap_sem);
482 	if (!interpreter->f_op || !interpreter->f_op->read)
483 		goto out;
484 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
485 		goto out;
486 	flush_icache_range((unsigned long)addr,
487 	                   (unsigned long)addr + text_data);
488 
489 	down_write(&current->mm->mmap_sem);
490 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
491 		interp_ex->a_bss);
492 	up_write(&current->mm->mmap_sem);
493 	elf_entry = interp_ex->a_entry;
494 
495 out:
496 	return elf_entry;
497 }
498 
499 /*
500  * These are the functions used to load ELF style executables and shared
501  * libraries.  There is no binary dependent code anywhere else.
502  */
503 
504 #define INTERPRETER_NONE 0
505 #define INTERPRETER_AOUT 1
506 #define INTERPRETER_ELF 2
507 
508 #ifndef STACK_RND_MASK
509 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
510 #endif
511 
512 static unsigned long randomize_stack_top(unsigned long stack_top)
513 {
514 	unsigned int random_variable = 0;
515 
516 	if ((current->flags & PF_RANDOMIZE) &&
517 		!(current->personality & ADDR_NO_RANDOMIZE)) {
518 		random_variable = get_random_int() & STACK_RND_MASK;
519 		random_variable <<= PAGE_SHIFT;
520 	}
521 #ifdef CONFIG_STACK_GROWSUP
522 	return PAGE_ALIGN(stack_top) + random_variable;
523 #else
524 	return PAGE_ALIGN(stack_top) - random_variable;
525 #endif
526 }
527 
528 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
529 {
530 	struct file *interpreter = NULL; /* to shut gcc up */
531  	unsigned long load_addr = 0, load_bias = 0;
532 	int load_addr_set = 0;
533 	char * elf_interpreter = NULL;
534 	unsigned int interpreter_type = INTERPRETER_NONE;
535 	unsigned char ibcs2_interpreter = 0;
536 	unsigned long error;
537 	struct elf_phdr *elf_ppnt, *elf_phdata;
538 	unsigned long elf_bss, elf_brk;
539 	int elf_exec_fileno;
540 	int retval, i;
541 	unsigned int size;
542 	unsigned long elf_entry, interp_load_addr = 0;
543 	unsigned long start_code, end_code, start_data, end_data;
544 	unsigned long reloc_func_desc = 0;
545 	char passed_fileno[6];
546 	struct files_struct *files;
547 	int executable_stack = EXSTACK_DEFAULT;
548 	unsigned long def_flags = 0;
549 	struct {
550 		struct elfhdr elf_ex;
551 		struct elfhdr interp_elf_ex;
552   		struct exec interp_ex;
553 	} *loc;
554 
555 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
556 	if (!loc) {
557 		retval = -ENOMEM;
558 		goto out_ret;
559 	}
560 
561 	/* Get the exec-header */
562 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
563 
564 	retval = -ENOEXEC;
565 	/* First of all, some simple consistency checks */
566 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
567 		goto out;
568 
569 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
570 		goto out;
571 	if (!elf_check_arch(&loc->elf_ex))
572 		goto out;
573 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
574 		goto out;
575 
576 	/* Now read in all of the header information */
577 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
578 		goto out;
579 	if (loc->elf_ex.e_phnum < 1 ||
580 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
581 		goto out;
582 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
583 	retval = -ENOMEM;
584 	elf_phdata = kmalloc(size, GFP_KERNEL);
585 	if (!elf_phdata)
586 		goto out;
587 
588 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
589 			     (char *)elf_phdata, size);
590 	if (retval != size) {
591 		if (retval >= 0)
592 			retval = -EIO;
593 		goto out_free_ph;
594 	}
595 
596 	files = current->files;	/* Refcounted so ok */
597 	retval = unshare_files();
598 	if (retval < 0)
599 		goto out_free_ph;
600 	if (files == current->files) {
601 		put_files_struct(files);
602 		files = NULL;
603 	}
604 
605 	/* exec will make our files private anyway, but for the a.out
606 	   loader stuff we need to do it earlier */
607 	retval = get_unused_fd();
608 	if (retval < 0)
609 		goto out_free_fh;
610 	get_file(bprm->file);
611 	fd_install(elf_exec_fileno = retval, bprm->file);
612 
613 	elf_ppnt = elf_phdata;
614 	elf_bss = 0;
615 	elf_brk = 0;
616 
617 	start_code = ~0UL;
618 	end_code = 0;
619 	start_data = 0;
620 	end_data = 0;
621 
622 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
623 		if (elf_ppnt->p_type == PT_INTERP) {
624 			/* This is the program interpreter used for
625 			 * shared libraries - for now assume that this
626 			 * is an a.out format binary
627 			 */
628 			retval = -ENOEXEC;
629 			if (elf_ppnt->p_filesz > PATH_MAX ||
630 			    elf_ppnt->p_filesz < 2)
631 				goto out_free_file;
632 
633 			retval = -ENOMEM;
634 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
635 						  GFP_KERNEL);
636 			if (!elf_interpreter)
637 				goto out_free_file;
638 
639 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
640 					     elf_interpreter,
641 					     elf_ppnt->p_filesz);
642 			if (retval != elf_ppnt->p_filesz) {
643 				if (retval >= 0)
644 					retval = -EIO;
645 				goto out_free_interp;
646 			}
647 			/* make sure path is NULL terminated */
648 			retval = -ENOEXEC;
649 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
650 				goto out_free_interp;
651 
652 			/* If the program interpreter is one of these two,
653 			 * then assume an iBCS2 image. Otherwise assume
654 			 * a native linux image.
655 			 */
656 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
657 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
658 				ibcs2_interpreter = 1;
659 
660 			/*
661 			 * The early SET_PERSONALITY here is so that the lookup
662 			 * for the interpreter happens in the namespace of the
663 			 * to-be-execed image.  SET_PERSONALITY can select an
664 			 * alternate root.
665 			 *
666 			 * However, SET_PERSONALITY is NOT allowed to switch
667 			 * this task into the new images's memory mapping
668 			 * policy - that is, TASK_SIZE must still evaluate to
669 			 * that which is appropriate to the execing application.
670 			 * This is because exit_mmap() needs to have TASK_SIZE
671 			 * evaluate to the size of the old image.
672 			 *
673 			 * So if (say) a 64-bit application is execing a 32-bit
674 			 * application it is the architecture's responsibility
675 			 * to defer changing the value of TASK_SIZE until the
676 			 * switch really is going to happen - do this in
677 			 * flush_thread().	- akpm
678 			 */
679 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
680 
681 			interpreter = open_exec(elf_interpreter);
682 			retval = PTR_ERR(interpreter);
683 			if (IS_ERR(interpreter))
684 				goto out_free_interp;
685 			retval = kernel_read(interpreter, 0, bprm->buf,
686 					     BINPRM_BUF_SIZE);
687 			if (retval != BINPRM_BUF_SIZE) {
688 				if (retval >= 0)
689 					retval = -EIO;
690 				goto out_free_dentry;
691 			}
692 
693 			/* Get the exec headers */
694 			loc->interp_ex = *((struct exec *)bprm->buf);
695 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
696 			break;
697 		}
698 		elf_ppnt++;
699 	}
700 
701 	elf_ppnt = elf_phdata;
702 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
703 		if (elf_ppnt->p_type == PT_GNU_STACK) {
704 			if (elf_ppnt->p_flags & PF_X)
705 				executable_stack = EXSTACK_ENABLE_X;
706 			else
707 				executable_stack = EXSTACK_DISABLE_X;
708 			break;
709 		}
710 
711 	/* Some simple consistency checks for the interpreter */
712 	if (elf_interpreter) {
713 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
714 
715 		/* Now figure out which format our binary is */
716 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
717 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
718 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
719 			interpreter_type = INTERPRETER_ELF;
720 
721 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
722 			interpreter_type &= ~INTERPRETER_ELF;
723 
724 		retval = -ELIBBAD;
725 		if (!interpreter_type)
726 			goto out_free_dentry;
727 
728 		/* Make sure only one type was selected */
729 		if ((interpreter_type & INTERPRETER_ELF) &&
730 		     interpreter_type != INTERPRETER_ELF) {
731 	     		// FIXME - ratelimit this before re-enabling
732 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
733 			interpreter_type = INTERPRETER_ELF;
734 		}
735 		/* Verify the interpreter has a valid arch */
736 		if ((interpreter_type == INTERPRETER_ELF) &&
737 		    !elf_check_arch(&loc->interp_elf_ex))
738 			goto out_free_dentry;
739 	} else {
740 		/* Executables without an interpreter also need a personality  */
741 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
742 	}
743 
744 	/* OK, we are done with that, now set up the arg stuff,
745 	   and then start this sucker up */
746 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
747 		char *passed_p = passed_fileno;
748 		sprintf(passed_fileno, "%d", elf_exec_fileno);
749 
750 		if (elf_interpreter) {
751 			retval = copy_strings_kernel(1, &passed_p, bprm);
752 			if (retval)
753 				goto out_free_dentry;
754 			bprm->argc++;
755 		}
756 	}
757 
758 	/* Flush all traces of the currently running executable */
759 	retval = flush_old_exec(bprm);
760 	if (retval)
761 		goto out_free_dentry;
762 
763 	/* Discard our unneeded old files struct */
764 	if (files) {
765 		put_files_struct(files);
766 		files = NULL;
767 	}
768 
769 	/* OK, This is the point of no return */
770 	current->mm->start_data = 0;
771 	current->mm->end_data = 0;
772 	current->mm->end_code = 0;
773 	current->mm->mmap = NULL;
774 	current->flags &= ~PF_FORKNOEXEC;
775 	current->mm->def_flags = def_flags;
776 
777 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
778 	   may depend on the personality.  */
779 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
780 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
781 		current->personality |= READ_IMPLIES_EXEC;
782 
783 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
784 		current->flags |= PF_RANDOMIZE;
785 	arch_pick_mmap_layout(current->mm);
786 
787 	/* Do this so that we can load the interpreter, if need be.  We will
788 	   change some of these later */
789 	current->mm->free_area_cache = current->mm->mmap_base;
790 	current->mm->cached_hole_size = 0;
791 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
792 				 executable_stack);
793 	if (retval < 0) {
794 		send_sig(SIGKILL, current, 0);
795 		goto out_free_dentry;
796 	}
797 
798 	current->mm->start_stack = bprm->p;
799 
800 	/* Now we do a little grungy work by mmaping the ELF image into
801 	   the correct location in memory.  At this point, we assume that
802 	   the image should be loaded at fixed address, not at a variable
803 	   address. */
804 	for(i = 0, elf_ppnt = elf_phdata;
805 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
806 		int elf_prot = 0, elf_flags;
807 		unsigned long k, vaddr;
808 
809 		if (elf_ppnt->p_type != PT_LOAD)
810 			continue;
811 
812 		if (unlikely (elf_brk > elf_bss)) {
813 			unsigned long nbyte;
814 
815 			/* There was a PT_LOAD segment with p_memsz > p_filesz
816 			   before this one. Map anonymous pages, if needed,
817 			   and clear the area.  */
818 			retval = set_brk (elf_bss + load_bias,
819 					  elf_brk + load_bias);
820 			if (retval) {
821 				send_sig(SIGKILL, current, 0);
822 				goto out_free_dentry;
823 			}
824 			nbyte = ELF_PAGEOFFSET(elf_bss);
825 			if (nbyte) {
826 				nbyte = ELF_MIN_ALIGN - nbyte;
827 				if (nbyte > elf_brk - elf_bss)
828 					nbyte = elf_brk - elf_bss;
829 				if (clear_user((void __user *)elf_bss +
830 							load_bias, nbyte)) {
831 					/*
832 					 * This bss-zeroing can fail if the ELF
833 					 * file specifies odd protections. So
834 					 * we don't check the return value
835 					 */
836 				}
837 			}
838 		}
839 
840 		if (elf_ppnt->p_flags & PF_R)
841 			elf_prot |= PROT_READ;
842 		if (elf_ppnt->p_flags & PF_W)
843 			elf_prot |= PROT_WRITE;
844 		if (elf_ppnt->p_flags & PF_X)
845 			elf_prot |= PROT_EXEC;
846 
847 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
848 
849 		vaddr = elf_ppnt->p_vaddr;
850 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
851 			elf_flags |= MAP_FIXED;
852 		} else if (loc->elf_ex.e_type == ET_DYN) {
853 			/* Try and get dynamic programs out of the way of the
854 			 * default mmap base, as well as whatever program they
855 			 * might try to exec.  This is because the brk will
856 			 * follow the loader, and is not movable.  */
857 			if (current->flags & PF_RANDOMIZE)
858 				load_bias = randomize_range(0x10000,
859 							    ELF_ET_DYN_BASE,
860 							    0);
861 			else
862 				load_bias = ELF_ET_DYN_BASE;
863 			load_bias = ELF_PAGESTART(load_bias - vaddr);
864 		}
865 
866 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
867 				elf_prot, elf_flags);
868 		if (BAD_ADDR(error)) {
869 			send_sig(SIGKILL, current, 0);
870 			goto out_free_dentry;
871 		}
872 
873 		if (!load_addr_set) {
874 			load_addr_set = 1;
875 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
876 			if (loc->elf_ex.e_type == ET_DYN) {
877 				load_bias += error -
878 				             ELF_PAGESTART(load_bias + vaddr);
879 				load_addr += load_bias;
880 				reloc_func_desc = load_bias;
881 			}
882 		}
883 		k = elf_ppnt->p_vaddr;
884 		if (k < start_code)
885 			start_code = k;
886 		if (start_data < k)
887 			start_data = k;
888 
889 		/*
890 		 * Check to see if the section's size will overflow the
891 		 * allowed task size. Note that p_filesz must always be
892 		 * <= p_memsz so it is only necessary to check p_memsz.
893 		 */
894 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
895 		    elf_ppnt->p_memsz > TASK_SIZE ||
896 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
897 			/* set_brk can never work. Avoid overflows. */
898 			send_sig(SIGKILL, current, 0);
899 			goto out_free_dentry;
900 		}
901 
902 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
903 
904 		if (k > elf_bss)
905 			elf_bss = k;
906 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
907 			end_code = k;
908 		if (end_data < k)
909 			end_data = k;
910 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
911 		if (k > elf_brk)
912 			elf_brk = k;
913 	}
914 
915 	loc->elf_ex.e_entry += load_bias;
916 	elf_bss += load_bias;
917 	elf_brk += load_bias;
918 	start_code += load_bias;
919 	end_code += load_bias;
920 	start_data += load_bias;
921 	end_data += load_bias;
922 
923 	/* Calling set_brk effectively mmaps the pages that we need
924 	 * for the bss and break sections.  We must do this before
925 	 * mapping in the interpreter, to make sure it doesn't wind
926 	 * up getting placed where the bss needs to go.
927 	 */
928 	retval = set_brk(elf_bss, elf_brk);
929 	if (retval) {
930 		send_sig(SIGKILL, current, 0);
931 		goto out_free_dentry;
932 	}
933 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
934 		send_sig(SIGSEGV, current, 0);
935 		retval = -EFAULT; /* Nobody gets to see this, but.. */
936 		goto out_free_dentry;
937 	}
938 
939 	if (elf_interpreter) {
940 		if (interpreter_type == INTERPRETER_AOUT)
941 			elf_entry = load_aout_interp(&loc->interp_ex,
942 						     interpreter);
943 		else
944 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
945 						    interpreter,
946 						    &interp_load_addr);
947 		if (BAD_ADDR(elf_entry)) {
948 			force_sig(SIGSEGV, current);
949 			retval = IS_ERR((void *)elf_entry) ?
950 					(int)elf_entry : -EINVAL;
951 			goto out_free_dentry;
952 		}
953 		reloc_func_desc = interp_load_addr;
954 
955 		allow_write_access(interpreter);
956 		fput(interpreter);
957 		kfree(elf_interpreter);
958 	} else {
959 		elf_entry = loc->elf_ex.e_entry;
960 		if (BAD_ADDR(elf_entry)) {
961 			force_sig(SIGSEGV, current);
962 			retval = -EINVAL;
963 			goto out_free_dentry;
964 		}
965 	}
966 
967 	kfree(elf_phdata);
968 
969 	if (interpreter_type != INTERPRETER_AOUT)
970 		sys_close(elf_exec_fileno);
971 
972 	set_binfmt(&elf_format);
973 
974 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
975 	retval = arch_setup_additional_pages(bprm, executable_stack);
976 	if (retval < 0) {
977 		send_sig(SIGKILL, current, 0);
978 		goto out;
979 	}
980 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
981 
982 	compute_creds(bprm);
983 	current->flags &= ~PF_FORKNOEXEC;
984 	create_elf_tables(bprm, &loc->elf_ex,
985 			  (interpreter_type == INTERPRETER_AOUT),
986 			  load_addr, interp_load_addr);
987 	/* N.B. passed_fileno might not be initialized? */
988 	if (interpreter_type == INTERPRETER_AOUT)
989 		current->mm->arg_start += strlen(passed_fileno) + 1;
990 	current->mm->end_code = end_code;
991 	current->mm->start_code = start_code;
992 	current->mm->start_data = start_data;
993 	current->mm->end_data = end_data;
994 	current->mm->start_stack = bprm->p;
995 
996 	if (current->personality & MMAP_PAGE_ZERO) {
997 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
998 		   and some applications "depend" upon this behavior.
999 		   Since we do not have the power to recompile these, we
1000 		   emulate the SVr4 behavior. Sigh. */
1001 		down_write(&current->mm->mmap_sem);
1002 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1003 				MAP_FIXED | MAP_PRIVATE, 0);
1004 		up_write(&current->mm->mmap_sem);
1005 	}
1006 
1007 #ifdef ELF_PLAT_INIT
1008 	/*
1009 	 * The ABI may specify that certain registers be set up in special
1010 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1011 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1012 	 * that the e_entry field is the address of the function descriptor
1013 	 * for the startup routine, rather than the address of the startup
1014 	 * routine itself.  This macro performs whatever initialization to
1015 	 * the regs structure is required as well as any relocations to the
1016 	 * function descriptor entries when executing dynamically links apps.
1017 	 */
1018 	ELF_PLAT_INIT(regs, reloc_func_desc);
1019 #endif
1020 
1021 	start_thread(regs, elf_entry, bprm->p);
1022 	if (unlikely(current->ptrace & PT_PTRACED)) {
1023 		if (current->ptrace & PT_TRACE_EXEC)
1024 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1025 		else
1026 			send_sig(SIGTRAP, current, 0);
1027 	}
1028 	retval = 0;
1029 out:
1030 	kfree(loc);
1031 out_ret:
1032 	return retval;
1033 
1034 	/* error cleanup */
1035 out_free_dentry:
1036 	allow_write_access(interpreter);
1037 	if (interpreter)
1038 		fput(interpreter);
1039 out_free_interp:
1040 	kfree(elf_interpreter);
1041 out_free_file:
1042 	sys_close(elf_exec_fileno);
1043 out_free_fh:
1044 	if (files)
1045 		reset_files_struct(current, files);
1046 out_free_ph:
1047 	kfree(elf_phdata);
1048 	goto out;
1049 }
1050 
1051 /* This is really simpleminded and specialized - we are loading an
1052    a.out library that is given an ELF header. */
1053 static int load_elf_library(struct file *file)
1054 {
1055 	struct elf_phdr *elf_phdata;
1056 	struct elf_phdr *eppnt;
1057 	unsigned long elf_bss, bss, len;
1058 	int retval, error, i, j;
1059 	struct elfhdr elf_ex;
1060 
1061 	error = -ENOEXEC;
1062 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1063 	if (retval != sizeof(elf_ex))
1064 		goto out;
1065 
1066 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1067 		goto out;
1068 
1069 	/* First of all, some simple consistency checks */
1070 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1071 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1072 		goto out;
1073 
1074 	/* Now read in all of the header information */
1075 
1076 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1077 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1078 
1079 	error = -ENOMEM;
1080 	elf_phdata = kmalloc(j, GFP_KERNEL);
1081 	if (!elf_phdata)
1082 		goto out;
1083 
1084 	eppnt = elf_phdata;
1085 	error = -ENOEXEC;
1086 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1087 	if (retval != j)
1088 		goto out_free_ph;
1089 
1090 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1091 		if ((eppnt + i)->p_type == PT_LOAD)
1092 			j++;
1093 	if (j != 1)
1094 		goto out_free_ph;
1095 
1096 	while (eppnt->p_type != PT_LOAD)
1097 		eppnt++;
1098 
1099 	/* Now use mmap to map the library into memory. */
1100 	down_write(&current->mm->mmap_sem);
1101 	error = do_mmap(file,
1102 			ELF_PAGESTART(eppnt->p_vaddr),
1103 			(eppnt->p_filesz +
1104 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1105 			PROT_READ | PROT_WRITE | PROT_EXEC,
1106 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1107 			(eppnt->p_offset -
1108 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1109 	up_write(&current->mm->mmap_sem);
1110 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1111 		goto out_free_ph;
1112 
1113 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1114 	if (padzero(elf_bss)) {
1115 		error = -EFAULT;
1116 		goto out_free_ph;
1117 	}
1118 
1119 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1120 			    ELF_MIN_ALIGN - 1);
1121 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1122 	if (bss > len) {
1123 		down_write(&current->mm->mmap_sem);
1124 		do_brk(len, bss - len);
1125 		up_write(&current->mm->mmap_sem);
1126 	}
1127 	error = 0;
1128 
1129 out_free_ph:
1130 	kfree(elf_phdata);
1131 out:
1132 	return error;
1133 }
1134 
1135 /*
1136  * Note that some platforms still use traditional core dumps and not
1137  * the ELF core dump.  Each platform can select it as appropriate.
1138  */
1139 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1140 
1141 /*
1142  * ELF core dumper
1143  *
1144  * Modelled on fs/exec.c:aout_core_dump()
1145  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1146  */
1147 /*
1148  * These are the only things you should do on a core-file: use only these
1149  * functions to write out all the necessary info.
1150  */
1151 static int dump_write(struct file *file, const void *addr, int nr)
1152 {
1153 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1154 }
1155 
1156 static int dump_seek(struct file *file, loff_t off)
1157 {
1158 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1159 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1160 			return 0;
1161 	} else {
1162 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1163 		if (!buf)
1164 			return 0;
1165 		while (off > 0) {
1166 			unsigned long n = off;
1167 			if (n > PAGE_SIZE)
1168 				n = PAGE_SIZE;
1169 			if (!dump_write(file, buf, n))
1170 				return 0;
1171 			off -= n;
1172 		}
1173 		free_page((unsigned long)buf);
1174 	}
1175 	return 1;
1176 }
1177 
1178 /*
1179  * Decide whether a segment is worth dumping; default is yes to be
1180  * sure (missing info is worse than too much; etc).
1181  * Personally I'd include everything, and use the coredump limit...
1182  *
1183  * I think we should skip something. But I am not sure how. H.J.
1184  */
1185 static int maydump(struct vm_area_struct *vma)
1186 {
1187 	/* Do not dump I/O mapped devices or special mappings */
1188 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1189 		return 0;
1190 
1191 	/* Dump shared memory only if mapped from an anonymous file. */
1192 	if (vma->vm_flags & VM_SHARED)
1193 		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1194 
1195 	/* If it hasn't been written to, don't write it out */
1196 	if (!vma->anon_vma)
1197 		return 0;
1198 
1199 	return 1;
1200 }
1201 
1202 /* An ELF note in memory */
1203 struct memelfnote
1204 {
1205 	const char *name;
1206 	int type;
1207 	unsigned int datasz;
1208 	void *data;
1209 };
1210 
1211 static int notesize(struct memelfnote *en)
1212 {
1213 	int sz;
1214 
1215 	sz = sizeof(struct elf_note);
1216 	sz += roundup(strlen(en->name) + 1, 4);
1217 	sz += roundup(en->datasz, 4);
1218 
1219 	return sz;
1220 }
1221 
1222 #define DUMP_WRITE(addr, nr, foffset)	\
1223 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1224 
1225 static int alignfile(struct file *file, loff_t *foffset)
1226 {
1227 	static const char buf[4] = { 0, };
1228 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1229 	return 1;
1230 }
1231 
1232 static int writenote(struct memelfnote *men, struct file *file,
1233 			loff_t *foffset)
1234 {
1235 	struct elf_note en;
1236 	en.n_namesz = strlen(men->name) + 1;
1237 	en.n_descsz = men->datasz;
1238 	en.n_type = men->type;
1239 
1240 	DUMP_WRITE(&en, sizeof(en), foffset);
1241 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1242 	if (!alignfile(file, foffset))
1243 		return 0;
1244 	DUMP_WRITE(men->data, men->datasz, foffset);
1245 	if (!alignfile(file, foffset))
1246 		return 0;
1247 
1248 	return 1;
1249 }
1250 #undef DUMP_WRITE
1251 
1252 #define DUMP_WRITE(addr, nr)	\
1253 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1254 		goto end_coredump;
1255 #define DUMP_SEEK(off)	\
1256 	if (!dump_seek(file, (off))) \
1257 		goto end_coredump;
1258 
1259 static void fill_elf_header(struct elfhdr *elf, int segs)
1260 {
1261 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1262 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1263 	elf->e_ident[EI_DATA] = ELF_DATA;
1264 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1265 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1266 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1267 
1268 	elf->e_type = ET_CORE;
1269 	elf->e_machine = ELF_ARCH;
1270 	elf->e_version = EV_CURRENT;
1271 	elf->e_entry = 0;
1272 	elf->e_phoff = sizeof(struct elfhdr);
1273 	elf->e_shoff = 0;
1274 	elf->e_flags = ELF_CORE_EFLAGS;
1275 	elf->e_ehsize = sizeof(struct elfhdr);
1276 	elf->e_phentsize = sizeof(struct elf_phdr);
1277 	elf->e_phnum = segs;
1278 	elf->e_shentsize = 0;
1279 	elf->e_shnum = 0;
1280 	elf->e_shstrndx = 0;
1281 	return;
1282 }
1283 
1284 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1285 {
1286 	phdr->p_type = PT_NOTE;
1287 	phdr->p_offset = offset;
1288 	phdr->p_vaddr = 0;
1289 	phdr->p_paddr = 0;
1290 	phdr->p_filesz = sz;
1291 	phdr->p_memsz = 0;
1292 	phdr->p_flags = 0;
1293 	phdr->p_align = 0;
1294 	return;
1295 }
1296 
1297 static void fill_note(struct memelfnote *note, const char *name, int type,
1298 		unsigned int sz, void *data)
1299 {
1300 	note->name = name;
1301 	note->type = type;
1302 	note->datasz = sz;
1303 	note->data = data;
1304 	return;
1305 }
1306 
1307 /*
1308  * fill up all the fields in prstatus from the given task struct, except
1309  * registers which need to be filled up separately.
1310  */
1311 static void fill_prstatus(struct elf_prstatus *prstatus,
1312 		struct task_struct *p, long signr)
1313 {
1314 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1315 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1316 	prstatus->pr_sighold = p->blocked.sig[0];
1317 	prstatus->pr_pid = p->pid;
1318 	prstatus->pr_ppid = p->parent->pid;
1319 	prstatus->pr_pgrp = process_group(p);
1320 	prstatus->pr_sid = process_session(p);
1321 	if (thread_group_leader(p)) {
1322 		/*
1323 		 * This is the record for the group leader.  Add in the
1324 		 * cumulative times of previous dead threads.  This total
1325 		 * won't include the time of each live thread whose state
1326 		 * is included in the core dump.  The final total reported
1327 		 * to our parent process when it calls wait4 will include
1328 		 * those sums as well as the little bit more time it takes
1329 		 * this and each other thread to finish dying after the
1330 		 * core dump synchronization phase.
1331 		 */
1332 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1333 				   &prstatus->pr_utime);
1334 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1335 				   &prstatus->pr_stime);
1336 	} else {
1337 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1338 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1339 	}
1340 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1341 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1342 }
1343 
1344 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1345 		       struct mm_struct *mm)
1346 {
1347 	unsigned int i, len;
1348 
1349 	/* first copy the parameters from user space */
1350 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1351 
1352 	len = mm->arg_end - mm->arg_start;
1353 	if (len >= ELF_PRARGSZ)
1354 		len = ELF_PRARGSZ-1;
1355 	if (copy_from_user(&psinfo->pr_psargs,
1356 		           (const char __user *)mm->arg_start, len))
1357 		return -EFAULT;
1358 	for(i = 0; i < len; i++)
1359 		if (psinfo->pr_psargs[i] == 0)
1360 			psinfo->pr_psargs[i] = ' ';
1361 	psinfo->pr_psargs[len] = 0;
1362 
1363 	psinfo->pr_pid = p->pid;
1364 	psinfo->pr_ppid = p->parent->pid;
1365 	psinfo->pr_pgrp = process_group(p);
1366 	psinfo->pr_sid = process_session(p);
1367 
1368 	i = p->state ? ffz(~p->state) + 1 : 0;
1369 	psinfo->pr_state = i;
1370 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1371 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1372 	psinfo->pr_nice = task_nice(p);
1373 	psinfo->pr_flag = p->flags;
1374 	SET_UID(psinfo->pr_uid, p->uid);
1375 	SET_GID(psinfo->pr_gid, p->gid);
1376 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1377 
1378 	return 0;
1379 }
1380 
1381 /* Here is the structure in which status of each thread is captured. */
1382 struct elf_thread_status
1383 {
1384 	struct list_head list;
1385 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1386 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1387 	struct task_struct *thread;
1388 #ifdef ELF_CORE_COPY_XFPREGS
1389 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1390 #endif
1391 	struct memelfnote notes[3];
1392 	int num_notes;
1393 };
1394 
1395 /*
1396  * In order to add the specific thread information for the elf file format,
1397  * we need to keep a linked list of every threads pr_status and then create
1398  * a single section for them in the final core file.
1399  */
1400 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1401 {
1402 	int sz = 0;
1403 	struct task_struct *p = t->thread;
1404 	t->num_notes = 0;
1405 
1406 	fill_prstatus(&t->prstatus, p, signr);
1407 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1408 
1409 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1410 		  &(t->prstatus));
1411 	t->num_notes++;
1412 	sz += notesize(&t->notes[0]);
1413 
1414 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1415 								&t->fpu))) {
1416 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1417 			  &(t->fpu));
1418 		t->num_notes++;
1419 		sz += notesize(&t->notes[1]);
1420 	}
1421 
1422 #ifdef ELF_CORE_COPY_XFPREGS
1423 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1424 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1425 			  &t->xfpu);
1426 		t->num_notes++;
1427 		sz += notesize(&t->notes[2]);
1428 	}
1429 #endif
1430 	return sz;
1431 }
1432 
1433 /*
1434  * Actual dumper
1435  *
1436  * This is a two-pass process; first we find the offsets of the bits,
1437  * and then they are actually written out.  If we run out of core limit
1438  * we just truncate.
1439  */
1440 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1441 {
1442 #define	NUM_NOTES	6
1443 	int has_dumped = 0;
1444 	mm_segment_t fs;
1445 	int segs;
1446 	size_t size = 0;
1447 	int i;
1448 	struct vm_area_struct *vma;
1449 	struct elfhdr *elf = NULL;
1450 	loff_t offset = 0, dataoff, foffset;
1451 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1452 	int numnote;
1453 	struct memelfnote *notes = NULL;
1454 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1455 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1456  	struct task_struct *g, *p;
1457  	LIST_HEAD(thread_list);
1458  	struct list_head *t;
1459 	elf_fpregset_t *fpu = NULL;
1460 #ifdef ELF_CORE_COPY_XFPREGS
1461 	elf_fpxregset_t *xfpu = NULL;
1462 #endif
1463 	int thread_status_size = 0;
1464 	elf_addr_t *auxv;
1465 
1466 	/*
1467 	 * We no longer stop all VM operations.
1468 	 *
1469 	 * This is because those proceses that could possibly change map_count
1470 	 * or the mmap / vma pages are now blocked in do_exit on current
1471 	 * finishing this core dump.
1472 	 *
1473 	 * Only ptrace can touch these memory addresses, but it doesn't change
1474 	 * the map_count or the pages allocated. So no possibility of crashing
1475 	 * exists while dumping the mm->vm_next areas to the core file.
1476 	 */
1477 
1478 	/* alloc memory for large data structures: too large to be on stack */
1479 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1480 	if (!elf)
1481 		goto cleanup;
1482 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1483 	if (!prstatus)
1484 		goto cleanup;
1485 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1486 	if (!psinfo)
1487 		goto cleanup;
1488 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1489 	if (!notes)
1490 		goto cleanup;
1491 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1492 	if (!fpu)
1493 		goto cleanup;
1494 #ifdef ELF_CORE_COPY_XFPREGS
1495 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1496 	if (!xfpu)
1497 		goto cleanup;
1498 #endif
1499 
1500 	if (signr) {
1501 		struct elf_thread_status *tmp;
1502 		rcu_read_lock();
1503 		do_each_thread(g,p)
1504 			if (current->mm == p->mm && current != p) {
1505 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1506 				if (!tmp) {
1507 					rcu_read_unlock();
1508 					goto cleanup;
1509 				}
1510 				tmp->thread = p;
1511 				list_add(&tmp->list, &thread_list);
1512 			}
1513 		while_each_thread(g,p);
1514 		rcu_read_unlock();
1515 		list_for_each(t, &thread_list) {
1516 			struct elf_thread_status *tmp;
1517 			int sz;
1518 
1519 			tmp = list_entry(t, struct elf_thread_status, list);
1520 			sz = elf_dump_thread_status(signr, tmp);
1521 			thread_status_size += sz;
1522 		}
1523 	}
1524 	/* now collect the dump for the current */
1525 	memset(prstatus, 0, sizeof(*prstatus));
1526 	fill_prstatus(prstatus, current, signr);
1527 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1528 
1529 	segs = current->mm->map_count;
1530 #ifdef ELF_CORE_EXTRA_PHDRS
1531 	segs += ELF_CORE_EXTRA_PHDRS;
1532 #endif
1533 
1534 	/* Set up header */
1535 	fill_elf_header(elf, segs + 1);	/* including notes section */
1536 
1537 	has_dumped = 1;
1538 	current->flags |= PF_DUMPCORE;
1539 
1540 	/*
1541 	 * Set up the notes in similar form to SVR4 core dumps made
1542 	 * with info from their /proc.
1543 	 */
1544 
1545 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1546 	fill_psinfo(psinfo, current->group_leader, current->mm);
1547 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1548 
1549 	numnote = 2;
1550 
1551 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1552 
1553 	i = 0;
1554 	do
1555 		i += 2;
1556 	while (auxv[i - 2] != AT_NULL);
1557 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1558 		  i * sizeof(elf_addr_t), auxv);
1559 
1560   	/* Try to dump the FPU. */
1561 	if ((prstatus->pr_fpvalid =
1562 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1563 		fill_note(notes + numnote++,
1564 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1565 #ifdef ELF_CORE_COPY_XFPREGS
1566 	if (elf_core_copy_task_xfpregs(current, xfpu))
1567 		fill_note(notes + numnote++,
1568 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1569 #endif
1570 
1571 	fs = get_fs();
1572 	set_fs(KERNEL_DS);
1573 
1574 	DUMP_WRITE(elf, sizeof(*elf));
1575 	offset += sizeof(*elf);				/* Elf header */
1576 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1577 	foffset = offset;
1578 
1579 	/* Write notes phdr entry */
1580 	{
1581 		struct elf_phdr phdr;
1582 		int sz = 0;
1583 
1584 		for (i = 0; i < numnote; i++)
1585 			sz += notesize(notes + i);
1586 
1587 		sz += thread_status_size;
1588 
1589 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1590 		sz += ELF_CORE_EXTRA_NOTES_SIZE;
1591 #endif
1592 
1593 		fill_elf_note_phdr(&phdr, sz, offset);
1594 		offset += sz;
1595 		DUMP_WRITE(&phdr, sizeof(phdr));
1596 	}
1597 
1598 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1599 
1600 	/* Write program headers for segments dump */
1601 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1602 		struct elf_phdr phdr;
1603 		size_t sz;
1604 
1605 		sz = vma->vm_end - vma->vm_start;
1606 
1607 		phdr.p_type = PT_LOAD;
1608 		phdr.p_offset = offset;
1609 		phdr.p_vaddr = vma->vm_start;
1610 		phdr.p_paddr = 0;
1611 		phdr.p_filesz = maydump(vma) ? sz : 0;
1612 		phdr.p_memsz = sz;
1613 		offset += phdr.p_filesz;
1614 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1615 		if (vma->vm_flags & VM_WRITE)
1616 			phdr.p_flags |= PF_W;
1617 		if (vma->vm_flags & VM_EXEC)
1618 			phdr.p_flags |= PF_X;
1619 		phdr.p_align = ELF_EXEC_PAGESIZE;
1620 
1621 		DUMP_WRITE(&phdr, sizeof(phdr));
1622 	}
1623 
1624 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1625 	ELF_CORE_WRITE_EXTRA_PHDRS;
1626 #endif
1627 
1628  	/* write out the notes section */
1629 	for (i = 0; i < numnote; i++)
1630 		if (!writenote(notes + i, file, &foffset))
1631 			goto end_coredump;
1632 
1633 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1634 	ELF_CORE_WRITE_EXTRA_NOTES;
1635 #endif
1636 
1637 	/* write out the thread status notes section */
1638 	list_for_each(t, &thread_list) {
1639 		struct elf_thread_status *tmp =
1640 				list_entry(t, struct elf_thread_status, list);
1641 
1642 		for (i = 0; i < tmp->num_notes; i++)
1643 			if (!writenote(&tmp->notes[i], file, &foffset))
1644 				goto end_coredump;
1645 	}
1646 
1647 	/* Align to page */
1648 	DUMP_SEEK(dataoff - foffset);
1649 
1650 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1651 		unsigned long addr;
1652 
1653 		if (!maydump(vma))
1654 			continue;
1655 
1656 		for (addr = vma->vm_start;
1657 		     addr < vma->vm_end;
1658 		     addr += PAGE_SIZE) {
1659 			struct page *page;
1660 			struct vm_area_struct *vma;
1661 
1662 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1663 						&page, &vma) <= 0) {
1664 				DUMP_SEEK(PAGE_SIZE);
1665 			} else {
1666 				if (page == ZERO_PAGE(addr)) {
1667 					DUMP_SEEK(PAGE_SIZE);
1668 				} else {
1669 					void *kaddr;
1670 					flush_cache_page(vma, addr,
1671 							 page_to_pfn(page));
1672 					kaddr = kmap(page);
1673 					if ((size += PAGE_SIZE) > limit ||
1674 					    !dump_write(file, kaddr,
1675 					    PAGE_SIZE)) {
1676 						kunmap(page);
1677 						page_cache_release(page);
1678 						goto end_coredump;
1679 					}
1680 					kunmap(page);
1681 				}
1682 				page_cache_release(page);
1683 			}
1684 		}
1685 	}
1686 
1687 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1688 	ELF_CORE_WRITE_EXTRA_DATA;
1689 #endif
1690 
1691 end_coredump:
1692 	set_fs(fs);
1693 
1694 cleanup:
1695 	while (!list_empty(&thread_list)) {
1696 		struct list_head *tmp = thread_list.next;
1697 		list_del(tmp);
1698 		kfree(list_entry(tmp, struct elf_thread_status, list));
1699 	}
1700 
1701 	kfree(elf);
1702 	kfree(prstatus);
1703 	kfree(psinfo);
1704 	kfree(notes);
1705 	kfree(fpu);
1706 #ifdef ELF_CORE_COPY_XFPREGS
1707 	kfree(xfpu);
1708 #endif
1709 	return has_dumped;
1710 #undef NUM_NOTES
1711 }
1712 
1713 #endif		/* USE_ELF_CORE_DUMP */
1714 
1715 static int __init init_elf_binfmt(void)
1716 {
1717 	return register_binfmt(&elf_format);
1718 }
1719 
1720 static void __exit exit_elf_binfmt(void)
1721 {
1722 	/* Remove the COFF and ELF loaders. */
1723 	unregister_binfmt(&elf_format);
1724 }
1725 
1726 core_initcall(init_elf_binfmt);
1727 module_exit(exit_elf_binfmt);
1728 MODULE_LICENSE("GPL");
1729