xref: /openbmc/linux/fs/binfmt_elf.c (revision ba6e8564)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 		int interp_aout, unsigned long load_addr,
137 		unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	const char *k_platform = ELF_PLATFORM;
147 	int items;
148 	elf_addr_t *elf_info;
149 	int ei_index = 0;
150 	struct task_struct *tsk = current;
151 
152 	/*
153 	 * If this architecture has a platform capability string, copy it
154 	 * to userspace.  In some cases (Sparc), this info is impossible
155 	 * for userspace to get any other way, in others (i386) it is
156 	 * merely difficult.
157 	 */
158 	u_platform = NULL;
159 	if (k_platform) {
160 		size_t len = strlen(k_platform) + 1;
161 
162 		/*
163 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164 		 * evictions by the processes running on the same package. One
165 		 * thing we can do is to shuffle the initial stack for them.
166 		 */
167 
168 		p = arch_align_stack(p);
169 
170 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 		if (__copy_to_user(u_platform, k_platform, len))
172 			return -EFAULT;
173 	}
174 
175 	/* Create the ELF interpreter info */
176 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 #define NEW_AUX_ENT(id, val) \
178 	do { \
179 		elf_info[ei_index++] = id; \
180 		elf_info[ei_index++] = val; \
181 	} while (0)
182 
183 #ifdef ARCH_DLINFO
184 	/*
185 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
186 	 * AUXV.
187 	 */
188 	ARCH_DLINFO;
189 #endif
190 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
194 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
195 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
197 	NEW_AUX_ENT(AT_FLAGS, 0);
198 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
199 	NEW_AUX_ENT(AT_UID, tsk->uid);
200 	NEW_AUX_ENT(AT_EUID, tsk->euid);
201 	NEW_AUX_ENT(AT_GID, tsk->gid);
202 	NEW_AUX_ENT(AT_EGID, tsk->egid);
203  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
204 	if (k_platform) {
205 		NEW_AUX_ENT(AT_PLATFORM,
206 			    (elf_addr_t)(unsigned long)u_platform);
207 	}
208 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
209 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
210 	}
211 #undef NEW_AUX_ENT
212 	/* AT_NULL is zero; clear the rest too */
213 	memset(&elf_info[ei_index], 0,
214 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
215 
216 	/* And advance past the AT_NULL entry.  */
217 	ei_index += 2;
218 
219 	sp = STACK_ADD(p, ei_index);
220 
221 	items = (argc + 1) + (envc + 1);
222 	if (interp_aout) {
223 		items += 3; /* a.out interpreters require argv & envp too */
224 	} else {
225 		items += 1; /* ELF interpreters only put argc on the stack */
226 	}
227 	bprm->p = STACK_ROUND(sp, items);
228 
229 	/* Point sp at the lowest address on the stack */
230 #ifdef CONFIG_STACK_GROWSUP
231 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
232 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
233 #else
234 	sp = (elf_addr_t __user *)bprm->p;
235 #endif
236 
237 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 	if (__put_user(argc, sp++))
239 		return -EFAULT;
240 	if (interp_aout) {
241 		argv = sp + 2;
242 		envp = argv + argc + 1;
243 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
245 			return -EFAULT;
246 	} else {
247 		argv = sp;
248 		envp = argv + argc + 1;
249 	}
250 
251 	/* Populate argv and envp */
252 	p = current->mm->arg_end = current->mm->arg_start;
253 	while (argc-- > 0) {
254 		size_t len;
255 		if (__put_user((elf_addr_t)p, argv++))
256 			return -EFAULT;
257 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259 			return 0;
260 		p += len;
261 	}
262 	if (__put_user(0, argv))
263 		return -EFAULT;
264 	current->mm->arg_end = current->mm->env_start = p;
265 	while (envc-- > 0) {
266 		size_t len;
267 		if (__put_user((elf_addr_t)p, envp++))
268 			return -EFAULT;
269 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 			return 0;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type)
289 {
290 	unsigned long map_addr;
291 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
292 
293 	down_write(&current->mm->mmap_sem);
294 	/* mmap() will return -EINVAL if given a zero size, but a
295 	 * segment with zero filesize is perfectly valid */
296 	if (eppnt->p_filesz + pageoffset)
297 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 				   eppnt->p_filesz + pageoffset, prot, type,
299 				   eppnt->p_offset - pageoffset);
300 	else
301 		map_addr = ELF_PAGESTART(addr);
302 	up_write(&current->mm->mmap_sem);
303 	return(map_addr);
304 }
305 
306 #endif /* !elf_map */
307 
308 /* This is much more generalized than the library routine read function,
309    so we keep this separate.  Technically the library read function
310    is only provided so that we can read a.out libraries that have
311    an ELF header */
312 
313 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
314 		struct file *interpreter, unsigned long *interp_load_addr)
315 {
316 	struct elf_phdr *elf_phdata;
317 	struct elf_phdr *eppnt;
318 	unsigned long load_addr = 0;
319 	int load_addr_set = 0;
320 	unsigned long last_bss = 0, elf_bss = 0;
321 	unsigned long error = ~0UL;
322 	int retval, i, size;
323 
324 	/* First of all, some simple consistency checks */
325 	if (interp_elf_ex->e_type != ET_EXEC &&
326 	    interp_elf_ex->e_type != ET_DYN)
327 		goto out;
328 	if (!elf_check_arch(interp_elf_ex))
329 		goto out;
330 	if (!interpreter->f_op || !interpreter->f_op->mmap)
331 		goto out;
332 
333 	/*
334 	 * If the size of this structure has changed, then punt, since
335 	 * we will be doing the wrong thing.
336 	 */
337 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
338 		goto out;
339 	if (interp_elf_ex->e_phnum < 1 ||
340 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
341 		goto out;
342 
343 	/* Now read in all of the header information */
344 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
345 	if (size > ELF_MIN_ALIGN)
346 		goto out;
347 	elf_phdata = kmalloc(size, GFP_KERNEL);
348 	if (!elf_phdata)
349 		goto out;
350 
351 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
352 			     (char *)elf_phdata,size);
353 	error = -EIO;
354 	if (retval != size) {
355 		if (retval < 0)
356 			error = retval;
357 		goto out_close;
358 	}
359 
360 	eppnt = elf_phdata;
361 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
362 		if (eppnt->p_type == PT_LOAD) {
363 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
364 			int elf_prot = 0;
365 			unsigned long vaddr = 0;
366 			unsigned long k, map_addr;
367 
368 			if (eppnt->p_flags & PF_R)
369 		    		elf_prot = PROT_READ;
370 			if (eppnt->p_flags & PF_W)
371 				elf_prot |= PROT_WRITE;
372 			if (eppnt->p_flags & PF_X)
373 				elf_prot |= PROT_EXEC;
374 			vaddr = eppnt->p_vaddr;
375 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
376 				elf_type |= MAP_FIXED;
377 
378 			map_addr = elf_map(interpreter, load_addr + vaddr,
379 					   eppnt, elf_prot, elf_type);
380 			error = map_addr;
381 			if (BAD_ADDR(map_addr))
382 				goto out_close;
383 
384 			if (!load_addr_set &&
385 			    interp_elf_ex->e_type == ET_DYN) {
386 				load_addr = map_addr - ELF_PAGESTART(vaddr);
387 				load_addr_set = 1;
388 			}
389 
390 			/*
391 			 * Check to see if the section's size will overflow the
392 			 * allowed task size. Note that p_filesz must always be
393 			 * <= p_memsize so it's only necessary to check p_memsz.
394 			 */
395 			k = load_addr + eppnt->p_vaddr;
396 			if (BAD_ADDR(k) ||
397 			    eppnt->p_filesz > eppnt->p_memsz ||
398 			    eppnt->p_memsz > TASK_SIZE ||
399 			    TASK_SIZE - eppnt->p_memsz < k) {
400 				error = -ENOMEM;
401 				goto out_close;
402 			}
403 
404 			/*
405 			 * Find the end of the file mapping for this phdr, and
406 			 * keep track of the largest address we see for this.
407 			 */
408 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
409 			if (k > elf_bss)
410 				elf_bss = k;
411 
412 			/*
413 			 * Do the same thing for the memory mapping - between
414 			 * elf_bss and last_bss is the bss section.
415 			 */
416 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
417 			if (k > last_bss)
418 				last_bss = k;
419 		}
420 	}
421 
422 	/*
423 	 * Now fill out the bss section.  First pad the last page up
424 	 * to the page boundary, and then perform a mmap to make sure
425 	 * that there are zero-mapped pages up to and including the
426 	 * last bss page.
427 	 */
428 	if (padzero(elf_bss)) {
429 		error = -EFAULT;
430 		goto out_close;
431 	}
432 
433 	/* What we have mapped so far */
434 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
435 
436 	/* Map the last of the bss segment */
437 	if (last_bss > elf_bss) {
438 		down_write(&current->mm->mmap_sem);
439 		error = do_brk(elf_bss, last_bss - elf_bss);
440 		up_write(&current->mm->mmap_sem);
441 		if (BAD_ADDR(error))
442 			goto out_close;
443 	}
444 
445 	*interp_load_addr = load_addr;
446 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
447 
448 out_close:
449 	kfree(elf_phdata);
450 out:
451 	return error;
452 }
453 
454 static unsigned long load_aout_interp(struct exec *interp_ex,
455 		struct file *interpreter)
456 {
457 	unsigned long text_data, elf_entry = ~0UL;
458 	char __user * addr;
459 	loff_t offset;
460 
461 	current->mm->end_code = interp_ex->a_text;
462 	text_data = interp_ex->a_text + interp_ex->a_data;
463 	current->mm->end_data = text_data;
464 	current->mm->brk = interp_ex->a_bss + text_data;
465 
466 	switch (N_MAGIC(*interp_ex)) {
467 	case OMAGIC:
468 		offset = 32;
469 		addr = (char __user *)0;
470 		break;
471 	case ZMAGIC:
472 	case QMAGIC:
473 		offset = N_TXTOFF(*interp_ex);
474 		addr = (char __user *)N_TXTADDR(*interp_ex);
475 		break;
476 	default:
477 		goto out;
478 	}
479 
480 	down_write(&current->mm->mmap_sem);
481 	do_brk(0, text_data);
482 	up_write(&current->mm->mmap_sem);
483 	if (!interpreter->f_op || !interpreter->f_op->read)
484 		goto out;
485 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
486 		goto out;
487 	flush_icache_range((unsigned long)addr,
488 	                   (unsigned long)addr + text_data);
489 
490 	down_write(&current->mm->mmap_sem);
491 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
492 		interp_ex->a_bss);
493 	up_write(&current->mm->mmap_sem);
494 	elf_entry = interp_ex->a_entry;
495 
496 out:
497 	return elf_entry;
498 }
499 
500 /*
501  * These are the functions used to load ELF style executables and shared
502  * libraries.  There is no binary dependent code anywhere else.
503  */
504 
505 #define INTERPRETER_NONE 0
506 #define INTERPRETER_AOUT 1
507 #define INTERPRETER_ELF 2
508 
509 #ifndef STACK_RND_MASK
510 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
511 #endif
512 
513 static unsigned long randomize_stack_top(unsigned long stack_top)
514 {
515 	unsigned int random_variable = 0;
516 
517 	if ((current->flags & PF_RANDOMIZE) &&
518 		!(current->personality & ADDR_NO_RANDOMIZE)) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 
687 			/*
688 			 * If the binary is not readable then enforce
689 			 * mm->dumpable = 0 regardless of the interpreter's
690 			 * permissions.
691 			 */
692 			if (file_permission(interpreter, MAY_READ) < 0)
693 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
694 
695 			retval = kernel_read(interpreter, 0, bprm->buf,
696 					     BINPRM_BUF_SIZE);
697 			if (retval != BINPRM_BUF_SIZE) {
698 				if (retval >= 0)
699 					retval = -EIO;
700 				goto out_free_dentry;
701 			}
702 
703 			/* Get the exec headers */
704 			loc->interp_ex = *((struct exec *)bprm->buf);
705 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
706 			break;
707 		}
708 		elf_ppnt++;
709 	}
710 
711 	elf_ppnt = elf_phdata;
712 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
713 		if (elf_ppnt->p_type == PT_GNU_STACK) {
714 			if (elf_ppnt->p_flags & PF_X)
715 				executable_stack = EXSTACK_ENABLE_X;
716 			else
717 				executable_stack = EXSTACK_DISABLE_X;
718 			break;
719 		}
720 
721 	/* Some simple consistency checks for the interpreter */
722 	if (elf_interpreter) {
723 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
724 
725 		/* Now figure out which format our binary is */
726 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
727 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
728 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
729 			interpreter_type = INTERPRETER_ELF;
730 
731 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
732 			interpreter_type &= ~INTERPRETER_ELF;
733 
734 		retval = -ELIBBAD;
735 		if (!interpreter_type)
736 			goto out_free_dentry;
737 
738 		/* Make sure only one type was selected */
739 		if ((interpreter_type & INTERPRETER_ELF) &&
740 		     interpreter_type != INTERPRETER_ELF) {
741 	     		// FIXME - ratelimit this before re-enabling
742 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
743 			interpreter_type = INTERPRETER_ELF;
744 		}
745 		/* Verify the interpreter has a valid arch */
746 		if ((interpreter_type == INTERPRETER_ELF) &&
747 		    !elf_check_arch(&loc->interp_elf_ex))
748 			goto out_free_dentry;
749 	} else {
750 		/* Executables without an interpreter also need a personality  */
751 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
752 	}
753 
754 	/* OK, we are done with that, now set up the arg stuff,
755 	   and then start this sucker up */
756 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
757 		char *passed_p = passed_fileno;
758 		sprintf(passed_fileno, "%d", elf_exec_fileno);
759 
760 		if (elf_interpreter) {
761 			retval = copy_strings_kernel(1, &passed_p, bprm);
762 			if (retval)
763 				goto out_free_dentry;
764 			bprm->argc++;
765 		}
766 	}
767 
768 	/* Flush all traces of the currently running executable */
769 	retval = flush_old_exec(bprm);
770 	if (retval)
771 		goto out_free_dentry;
772 
773 	/* Discard our unneeded old files struct */
774 	if (files) {
775 		put_files_struct(files);
776 		files = NULL;
777 	}
778 
779 	/* OK, This is the point of no return */
780 	current->mm->start_data = 0;
781 	current->mm->end_data = 0;
782 	current->mm->end_code = 0;
783 	current->mm->mmap = NULL;
784 	current->flags &= ~PF_FORKNOEXEC;
785 	current->mm->def_flags = def_flags;
786 
787 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
788 	   may depend on the personality.  */
789 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
790 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
791 		current->personality |= READ_IMPLIES_EXEC;
792 
793 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
794 		current->flags |= PF_RANDOMIZE;
795 	arch_pick_mmap_layout(current->mm);
796 
797 	/* Do this so that we can load the interpreter, if need be.  We will
798 	   change some of these later */
799 	current->mm->free_area_cache = current->mm->mmap_base;
800 	current->mm->cached_hole_size = 0;
801 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
802 				 executable_stack);
803 	if (retval < 0) {
804 		send_sig(SIGKILL, current, 0);
805 		goto out_free_dentry;
806 	}
807 
808 	current->mm->start_stack = bprm->p;
809 
810 	/* Now we do a little grungy work by mmaping the ELF image into
811 	   the correct location in memory.  At this point, we assume that
812 	   the image should be loaded at fixed address, not at a variable
813 	   address. */
814 	for(i = 0, elf_ppnt = elf_phdata;
815 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
816 		int elf_prot = 0, elf_flags;
817 		unsigned long k, vaddr;
818 
819 		if (elf_ppnt->p_type != PT_LOAD)
820 			continue;
821 
822 		if (unlikely (elf_brk > elf_bss)) {
823 			unsigned long nbyte;
824 
825 			/* There was a PT_LOAD segment with p_memsz > p_filesz
826 			   before this one. Map anonymous pages, if needed,
827 			   and clear the area.  */
828 			retval = set_brk (elf_bss + load_bias,
829 					  elf_brk + load_bias);
830 			if (retval) {
831 				send_sig(SIGKILL, current, 0);
832 				goto out_free_dentry;
833 			}
834 			nbyte = ELF_PAGEOFFSET(elf_bss);
835 			if (nbyte) {
836 				nbyte = ELF_MIN_ALIGN - nbyte;
837 				if (nbyte > elf_brk - elf_bss)
838 					nbyte = elf_brk - elf_bss;
839 				if (clear_user((void __user *)elf_bss +
840 							load_bias, nbyte)) {
841 					/*
842 					 * This bss-zeroing can fail if the ELF
843 					 * file specifies odd protections. So
844 					 * we don't check the return value
845 					 */
846 				}
847 			}
848 		}
849 
850 		if (elf_ppnt->p_flags & PF_R)
851 			elf_prot |= PROT_READ;
852 		if (elf_ppnt->p_flags & PF_W)
853 			elf_prot |= PROT_WRITE;
854 		if (elf_ppnt->p_flags & PF_X)
855 			elf_prot |= PROT_EXEC;
856 
857 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
858 
859 		vaddr = elf_ppnt->p_vaddr;
860 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
861 			elf_flags |= MAP_FIXED;
862 		} else if (loc->elf_ex.e_type == ET_DYN) {
863 			/* Try and get dynamic programs out of the way of the
864 			 * default mmap base, as well as whatever program they
865 			 * might try to exec.  This is because the brk will
866 			 * follow the loader, and is not movable.  */
867 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
868 		}
869 
870 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
871 				elf_prot, elf_flags);
872 		if (BAD_ADDR(error)) {
873 			send_sig(SIGKILL, current, 0);
874 			goto out_free_dentry;
875 		}
876 
877 		if (!load_addr_set) {
878 			load_addr_set = 1;
879 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
880 			if (loc->elf_ex.e_type == ET_DYN) {
881 				load_bias += error -
882 				             ELF_PAGESTART(load_bias + vaddr);
883 				load_addr += load_bias;
884 				reloc_func_desc = load_bias;
885 			}
886 		}
887 		k = elf_ppnt->p_vaddr;
888 		if (k < start_code)
889 			start_code = k;
890 		if (start_data < k)
891 			start_data = k;
892 
893 		/*
894 		 * Check to see if the section's size will overflow the
895 		 * allowed task size. Note that p_filesz must always be
896 		 * <= p_memsz so it is only necessary to check p_memsz.
897 		 */
898 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
899 		    elf_ppnt->p_memsz > TASK_SIZE ||
900 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
901 			/* set_brk can never work. Avoid overflows. */
902 			send_sig(SIGKILL, current, 0);
903 			goto out_free_dentry;
904 		}
905 
906 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
907 
908 		if (k > elf_bss)
909 			elf_bss = k;
910 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
911 			end_code = k;
912 		if (end_data < k)
913 			end_data = k;
914 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
915 		if (k > elf_brk)
916 			elf_brk = k;
917 	}
918 
919 	loc->elf_ex.e_entry += load_bias;
920 	elf_bss += load_bias;
921 	elf_brk += load_bias;
922 	start_code += load_bias;
923 	end_code += load_bias;
924 	start_data += load_bias;
925 	end_data += load_bias;
926 
927 	/* Calling set_brk effectively mmaps the pages that we need
928 	 * for the bss and break sections.  We must do this before
929 	 * mapping in the interpreter, to make sure it doesn't wind
930 	 * up getting placed where the bss needs to go.
931 	 */
932 	retval = set_brk(elf_bss, elf_brk);
933 	if (retval) {
934 		send_sig(SIGKILL, current, 0);
935 		goto out_free_dentry;
936 	}
937 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
938 		send_sig(SIGSEGV, current, 0);
939 		retval = -EFAULT; /* Nobody gets to see this, but.. */
940 		goto out_free_dentry;
941 	}
942 
943 	if (elf_interpreter) {
944 		if (interpreter_type == INTERPRETER_AOUT)
945 			elf_entry = load_aout_interp(&loc->interp_ex,
946 						     interpreter);
947 		else
948 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
949 						    interpreter,
950 						    &interp_load_addr);
951 		if (BAD_ADDR(elf_entry)) {
952 			force_sig(SIGSEGV, current);
953 			retval = IS_ERR((void *)elf_entry) ?
954 					(int)elf_entry : -EINVAL;
955 			goto out_free_dentry;
956 		}
957 		reloc_func_desc = interp_load_addr;
958 
959 		allow_write_access(interpreter);
960 		fput(interpreter);
961 		kfree(elf_interpreter);
962 	} else {
963 		elf_entry = loc->elf_ex.e_entry;
964 		if (BAD_ADDR(elf_entry)) {
965 			force_sig(SIGSEGV, current);
966 			retval = -EINVAL;
967 			goto out_free_dentry;
968 		}
969 	}
970 
971 	kfree(elf_phdata);
972 
973 	if (interpreter_type != INTERPRETER_AOUT)
974 		sys_close(elf_exec_fileno);
975 
976 	set_binfmt(&elf_format);
977 
978 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
979 	retval = arch_setup_additional_pages(bprm, executable_stack);
980 	if (retval < 0) {
981 		send_sig(SIGKILL, current, 0);
982 		goto out;
983 	}
984 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
985 
986 	compute_creds(bprm);
987 	current->flags &= ~PF_FORKNOEXEC;
988 	create_elf_tables(bprm, &loc->elf_ex,
989 			  (interpreter_type == INTERPRETER_AOUT),
990 			  load_addr, interp_load_addr);
991 	/* N.B. passed_fileno might not be initialized? */
992 	if (interpreter_type == INTERPRETER_AOUT)
993 		current->mm->arg_start += strlen(passed_fileno) + 1;
994 	current->mm->end_code = end_code;
995 	current->mm->start_code = start_code;
996 	current->mm->start_data = start_data;
997 	current->mm->end_data = end_data;
998 	current->mm->start_stack = bprm->p;
999 
1000 	if (current->personality & MMAP_PAGE_ZERO) {
1001 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1002 		   and some applications "depend" upon this behavior.
1003 		   Since we do not have the power to recompile these, we
1004 		   emulate the SVr4 behavior. Sigh. */
1005 		down_write(&current->mm->mmap_sem);
1006 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1007 				MAP_FIXED | MAP_PRIVATE, 0);
1008 		up_write(&current->mm->mmap_sem);
1009 	}
1010 
1011 #ifdef ELF_PLAT_INIT
1012 	/*
1013 	 * The ABI may specify that certain registers be set up in special
1014 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1015 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1016 	 * that the e_entry field is the address of the function descriptor
1017 	 * for the startup routine, rather than the address of the startup
1018 	 * routine itself.  This macro performs whatever initialization to
1019 	 * the regs structure is required as well as any relocations to the
1020 	 * function descriptor entries when executing dynamically links apps.
1021 	 */
1022 	ELF_PLAT_INIT(regs, reloc_func_desc);
1023 #endif
1024 
1025 	start_thread(regs, elf_entry, bprm->p);
1026 	if (unlikely(current->ptrace & PT_PTRACED)) {
1027 		if (current->ptrace & PT_TRACE_EXEC)
1028 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1029 		else
1030 			send_sig(SIGTRAP, current, 0);
1031 	}
1032 	retval = 0;
1033 out:
1034 	kfree(loc);
1035 out_ret:
1036 	return retval;
1037 
1038 	/* error cleanup */
1039 out_free_dentry:
1040 	allow_write_access(interpreter);
1041 	if (interpreter)
1042 		fput(interpreter);
1043 out_free_interp:
1044 	kfree(elf_interpreter);
1045 out_free_file:
1046 	sys_close(elf_exec_fileno);
1047 out_free_fh:
1048 	if (files)
1049 		reset_files_struct(current, files);
1050 out_free_ph:
1051 	kfree(elf_phdata);
1052 	goto out;
1053 }
1054 
1055 /* This is really simpleminded and specialized - we are loading an
1056    a.out library that is given an ELF header. */
1057 static int load_elf_library(struct file *file)
1058 {
1059 	struct elf_phdr *elf_phdata;
1060 	struct elf_phdr *eppnt;
1061 	unsigned long elf_bss, bss, len;
1062 	int retval, error, i, j;
1063 	struct elfhdr elf_ex;
1064 
1065 	error = -ENOEXEC;
1066 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1067 	if (retval != sizeof(elf_ex))
1068 		goto out;
1069 
1070 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1071 		goto out;
1072 
1073 	/* First of all, some simple consistency checks */
1074 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1075 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1076 		goto out;
1077 
1078 	/* Now read in all of the header information */
1079 
1080 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1081 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1082 
1083 	error = -ENOMEM;
1084 	elf_phdata = kmalloc(j, GFP_KERNEL);
1085 	if (!elf_phdata)
1086 		goto out;
1087 
1088 	eppnt = elf_phdata;
1089 	error = -ENOEXEC;
1090 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1091 	if (retval != j)
1092 		goto out_free_ph;
1093 
1094 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1095 		if ((eppnt + i)->p_type == PT_LOAD)
1096 			j++;
1097 	if (j != 1)
1098 		goto out_free_ph;
1099 
1100 	while (eppnt->p_type != PT_LOAD)
1101 		eppnt++;
1102 
1103 	/* Now use mmap to map the library into memory. */
1104 	down_write(&current->mm->mmap_sem);
1105 	error = do_mmap(file,
1106 			ELF_PAGESTART(eppnt->p_vaddr),
1107 			(eppnt->p_filesz +
1108 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1109 			PROT_READ | PROT_WRITE | PROT_EXEC,
1110 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1111 			(eppnt->p_offset -
1112 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1113 	up_write(&current->mm->mmap_sem);
1114 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1115 		goto out_free_ph;
1116 
1117 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1118 	if (padzero(elf_bss)) {
1119 		error = -EFAULT;
1120 		goto out_free_ph;
1121 	}
1122 
1123 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1124 			    ELF_MIN_ALIGN - 1);
1125 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1126 	if (bss > len) {
1127 		down_write(&current->mm->mmap_sem);
1128 		do_brk(len, bss - len);
1129 		up_write(&current->mm->mmap_sem);
1130 	}
1131 	error = 0;
1132 
1133 out_free_ph:
1134 	kfree(elf_phdata);
1135 out:
1136 	return error;
1137 }
1138 
1139 /*
1140  * Note that some platforms still use traditional core dumps and not
1141  * the ELF core dump.  Each platform can select it as appropriate.
1142  */
1143 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1144 
1145 /*
1146  * ELF core dumper
1147  *
1148  * Modelled on fs/exec.c:aout_core_dump()
1149  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1150  */
1151 /*
1152  * These are the only things you should do on a core-file: use only these
1153  * functions to write out all the necessary info.
1154  */
1155 static int dump_write(struct file *file, const void *addr, int nr)
1156 {
1157 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1158 }
1159 
1160 static int dump_seek(struct file *file, loff_t off)
1161 {
1162 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1163 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1164 			return 0;
1165 	} else {
1166 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1167 		if (!buf)
1168 			return 0;
1169 		while (off > 0) {
1170 			unsigned long n = off;
1171 			if (n > PAGE_SIZE)
1172 				n = PAGE_SIZE;
1173 			if (!dump_write(file, buf, n))
1174 				return 0;
1175 			off -= n;
1176 		}
1177 		free_page((unsigned long)buf);
1178 	}
1179 	return 1;
1180 }
1181 
1182 /*
1183  * Decide whether a segment is worth dumping; default is yes to be
1184  * sure (missing info is worse than too much; etc).
1185  * Personally I'd include everything, and use the coredump limit...
1186  *
1187  * I think we should skip something. But I am not sure how. H.J.
1188  */
1189 static int maydump(struct vm_area_struct *vma)
1190 {
1191 	/* The vma can be set up to tell us the answer directly.  */
1192 	if (vma->vm_flags & VM_ALWAYSDUMP)
1193 		return 1;
1194 
1195 	/* Do not dump I/O mapped devices or special mappings */
1196 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1197 		return 0;
1198 
1199 	/* Dump shared memory only if mapped from an anonymous file. */
1200 	if (vma->vm_flags & VM_SHARED)
1201 		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1202 
1203 	/* If it hasn't been written to, don't write it out */
1204 	if (!vma->anon_vma)
1205 		return 0;
1206 
1207 	return 1;
1208 }
1209 
1210 /* An ELF note in memory */
1211 struct memelfnote
1212 {
1213 	const char *name;
1214 	int type;
1215 	unsigned int datasz;
1216 	void *data;
1217 };
1218 
1219 static int notesize(struct memelfnote *en)
1220 {
1221 	int sz;
1222 
1223 	sz = sizeof(struct elf_note);
1224 	sz += roundup(strlen(en->name) + 1, 4);
1225 	sz += roundup(en->datasz, 4);
1226 
1227 	return sz;
1228 }
1229 
1230 #define DUMP_WRITE(addr, nr, foffset)	\
1231 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1232 
1233 static int alignfile(struct file *file, loff_t *foffset)
1234 {
1235 	static const char buf[4] = { 0, };
1236 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1237 	return 1;
1238 }
1239 
1240 static int writenote(struct memelfnote *men, struct file *file,
1241 			loff_t *foffset)
1242 {
1243 	struct elf_note en;
1244 	en.n_namesz = strlen(men->name) + 1;
1245 	en.n_descsz = men->datasz;
1246 	en.n_type = men->type;
1247 
1248 	DUMP_WRITE(&en, sizeof(en), foffset);
1249 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1250 	if (!alignfile(file, foffset))
1251 		return 0;
1252 	DUMP_WRITE(men->data, men->datasz, foffset);
1253 	if (!alignfile(file, foffset))
1254 		return 0;
1255 
1256 	return 1;
1257 }
1258 #undef DUMP_WRITE
1259 
1260 #define DUMP_WRITE(addr, nr)	\
1261 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1262 		goto end_coredump;
1263 #define DUMP_SEEK(off)	\
1264 	if (!dump_seek(file, (off))) \
1265 		goto end_coredump;
1266 
1267 static void fill_elf_header(struct elfhdr *elf, int segs)
1268 {
1269 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1270 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1271 	elf->e_ident[EI_DATA] = ELF_DATA;
1272 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1273 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1274 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1275 
1276 	elf->e_type = ET_CORE;
1277 	elf->e_machine = ELF_ARCH;
1278 	elf->e_version = EV_CURRENT;
1279 	elf->e_entry = 0;
1280 	elf->e_phoff = sizeof(struct elfhdr);
1281 	elf->e_shoff = 0;
1282 	elf->e_flags = ELF_CORE_EFLAGS;
1283 	elf->e_ehsize = sizeof(struct elfhdr);
1284 	elf->e_phentsize = sizeof(struct elf_phdr);
1285 	elf->e_phnum = segs;
1286 	elf->e_shentsize = 0;
1287 	elf->e_shnum = 0;
1288 	elf->e_shstrndx = 0;
1289 	return;
1290 }
1291 
1292 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1293 {
1294 	phdr->p_type = PT_NOTE;
1295 	phdr->p_offset = offset;
1296 	phdr->p_vaddr = 0;
1297 	phdr->p_paddr = 0;
1298 	phdr->p_filesz = sz;
1299 	phdr->p_memsz = 0;
1300 	phdr->p_flags = 0;
1301 	phdr->p_align = 0;
1302 	return;
1303 }
1304 
1305 static void fill_note(struct memelfnote *note, const char *name, int type,
1306 		unsigned int sz, void *data)
1307 {
1308 	note->name = name;
1309 	note->type = type;
1310 	note->datasz = sz;
1311 	note->data = data;
1312 	return;
1313 }
1314 
1315 /*
1316  * fill up all the fields in prstatus from the given task struct, except
1317  * registers which need to be filled up separately.
1318  */
1319 static void fill_prstatus(struct elf_prstatus *prstatus,
1320 		struct task_struct *p, long signr)
1321 {
1322 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1323 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1324 	prstatus->pr_sighold = p->blocked.sig[0];
1325 	prstatus->pr_pid = p->pid;
1326 	prstatus->pr_ppid = p->parent->pid;
1327 	prstatus->pr_pgrp = process_group(p);
1328 	prstatus->pr_sid = process_session(p);
1329 	if (thread_group_leader(p)) {
1330 		/*
1331 		 * This is the record for the group leader.  Add in the
1332 		 * cumulative times of previous dead threads.  This total
1333 		 * won't include the time of each live thread whose state
1334 		 * is included in the core dump.  The final total reported
1335 		 * to our parent process when it calls wait4 will include
1336 		 * those sums as well as the little bit more time it takes
1337 		 * this and each other thread to finish dying after the
1338 		 * core dump synchronization phase.
1339 		 */
1340 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1341 				   &prstatus->pr_utime);
1342 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1343 				   &prstatus->pr_stime);
1344 	} else {
1345 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1346 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1347 	}
1348 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1349 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1350 }
1351 
1352 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1353 		       struct mm_struct *mm)
1354 {
1355 	unsigned int i, len;
1356 
1357 	/* first copy the parameters from user space */
1358 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1359 
1360 	len = mm->arg_end - mm->arg_start;
1361 	if (len >= ELF_PRARGSZ)
1362 		len = ELF_PRARGSZ-1;
1363 	if (copy_from_user(&psinfo->pr_psargs,
1364 		           (const char __user *)mm->arg_start, len))
1365 		return -EFAULT;
1366 	for(i = 0; i < len; i++)
1367 		if (psinfo->pr_psargs[i] == 0)
1368 			psinfo->pr_psargs[i] = ' ';
1369 	psinfo->pr_psargs[len] = 0;
1370 
1371 	psinfo->pr_pid = p->pid;
1372 	psinfo->pr_ppid = p->parent->pid;
1373 	psinfo->pr_pgrp = process_group(p);
1374 	psinfo->pr_sid = process_session(p);
1375 
1376 	i = p->state ? ffz(~p->state) + 1 : 0;
1377 	psinfo->pr_state = i;
1378 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1379 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1380 	psinfo->pr_nice = task_nice(p);
1381 	psinfo->pr_flag = p->flags;
1382 	SET_UID(psinfo->pr_uid, p->uid);
1383 	SET_GID(psinfo->pr_gid, p->gid);
1384 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1385 
1386 	return 0;
1387 }
1388 
1389 /* Here is the structure in which status of each thread is captured. */
1390 struct elf_thread_status
1391 {
1392 	struct list_head list;
1393 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1394 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1395 	struct task_struct *thread;
1396 #ifdef ELF_CORE_COPY_XFPREGS
1397 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1398 #endif
1399 	struct memelfnote notes[3];
1400 	int num_notes;
1401 };
1402 
1403 /*
1404  * In order to add the specific thread information for the elf file format,
1405  * we need to keep a linked list of every threads pr_status and then create
1406  * a single section for them in the final core file.
1407  */
1408 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1409 {
1410 	int sz = 0;
1411 	struct task_struct *p = t->thread;
1412 	t->num_notes = 0;
1413 
1414 	fill_prstatus(&t->prstatus, p, signr);
1415 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1416 
1417 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1418 		  &(t->prstatus));
1419 	t->num_notes++;
1420 	sz += notesize(&t->notes[0]);
1421 
1422 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1423 								&t->fpu))) {
1424 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1425 			  &(t->fpu));
1426 		t->num_notes++;
1427 		sz += notesize(&t->notes[1]);
1428 	}
1429 
1430 #ifdef ELF_CORE_COPY_XFPREGS
1431 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1432 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1433 			  &t->xfpu);
1434 		t->num_notes++;
1435 		sz += notesize(&t->notes[2]);
1436 	}
1437 #endif
1438 	return sz;
1439 }
1440 
1441 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1442 					struct vm_area_struct *gate_vma)
1443 {
1444 	struct vm_area_struct *ret = tsk->mm->mmap;
1445 
1446 	if (ret)
1447 		return ret;
1448 	return gate_vma;
1449 }
1450 /*
1451  * Helper function for iterating across a vma list.  It ensures that the caller
1452  * will visit `gate_vma' prior to terminating the search.
1453  */
1454 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1455 					struct vm_area_struct *gate_vma)
1456 {
1457 	struct vm_area_struct *ret;
1458 
1459 	ret = this_vma->vm_next;
1460 	if (ret)
1461 		return ret;
1462 	if (this_vma == gate_vma)
1463 		return NULL;
1464 	return gate_vma;
1465 }
1466 
1467 /*
1468  * Actual dumper
1469  *
1470  * This is a two-pass process; first we find the offsets of the bits,
1471  * and then they are actually written out.  If we run out of core limit
1472  * we just truncate.
1473  */
1474 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1475 {
1476 #define	NUM_NOTES	6
1477 	int has_dumped = 0;
1478 	mm_segment_t fs;
1479 	int segs;
1480 	size_t size = 0;
1481 	int i;
1482 	struct vm_area_struct *vma, *gate_vma;
1483 	struct elfhdr *elf = NULL;
1484 	loff_t offset = 0, dataoff, foffset;
1485 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1486 	int numnote;
1487 	struct memelfnote *notes = NULL;
1488 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1489 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1490  	struct task_struct *g, *p;
1491  	LIST_HEAD(thread_list);
1492  	struct list_head *t;
1493 	elf_fpregset_t *fpu = NULL;
1494 #ifdef ELF_CORE_COPY_XFPREGS
1495 	elf_fpxregset_t *xfpu = NULL;
1496 #endif
1497 	int thread_status_size = 0;
1498 	elf_addr_t *auxv;
1499 
1500 	/*
1501 	 * We no longer stop all VM operations.
1502 	 *
1503 	 * This is because those proceses that could possibly change map_count
1504 	 * or the mmap / vma pages are now blocked in do_exit on current
1505 	 * finishing this core dump.
1506 	 *
1507 	 * Only ptrace can touch these memory addresses, but it doesn't change
1508 	 * the map_count or the pages allocated. So no possibility of crashing
1509 	 * exists while dumping the mm->vm_next areas to the core file.
1510 	 */
1511 
1512 	/* alloc memory for large data structures: too large to be on stack */
1513 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1514 	if (!elf)
1515 		goto cleanup;
1516 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1517 	if (!prstatus)
1518 		goto cleanup;
1519 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1520 	if (!psinfo)
1521 		goto cleanup;
1522 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1523 	if (!notes)
1524 		goto cleanup;
1525 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1526 	if (!fpu)
1527 		goto cleanup;
1528 #ifdef ELF_CORE_COPY_XFPREGS
1529 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1530 	if (!xfpu)
1531 		goto cleanup;
1532 #endif
1533 
1534 	if (signr) {
1535 		struct elf_thread_status *tmp;
1536 		rcu_read_lock();
1537 		do_each_thread(g,p)
1538 			if (current->mm == p->mm && current != p) {
1539 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1540 				if (!tmp) {
1541 					rcu_read_unlock();
1542 					goto cleanup;
1543 				}
1544 				tmp->thread = p;
1545 				list_add(&tmp->list, &thread_list);
1546 			}
1547 		while_each_thread(g,p);
1548 		rcu_read_unlock();
1549 		list_for_each(t, &thread_list) {
1550 			struct elf_thread_status *tmp;
1551 			int sz;
1552 
1553 			tmp = list_entry(t, struct elf_thread_status, list);
1554 			sz = elf_dump_thread_status(signr, tmp);
1555 			thread_status_size += sz;
1556 		}
1557 	}
1558 	/* now collect the dump for the current */
1559 	memset(prstatus, 0, sizeof(*prstatus));
1560 	fill_prstatus(prstatus, current, signr);
1561 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1562 
1563 	segs = current->mm->map_count;
1564 #ifdef ELF_CORE_EXTRA_PHDRS
1565 	segs += ELF_CORE_EXTRA_PHDRS;
1566 #endif
1567 
1568 	gate_vma = get_gate_vma(current);
1569 	if (gate_vma != NULL)
1570 		segs++;
1571 
1572 	/* Set up header */
1573 	fill_elf_header(elf, segs + 1);	/* including notes section */
1574 
1575 	has_dumped = 1;
1576 	current->flags |= PF_DUMPCORE;
1577 
1578 	/*
1579 	 * Set up the notes in similar form to SVR4 core dumps made
1580 	 * with info from their /proc.
1581 	 */
1582 
1583 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1584 	fill_psinfo(psinfo, current->group_leader, current->mm);
1585 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1586 
1587 	numnote = 2;
1588 
1589 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1590 
1591 	i = 0;
1592 	do
1593 		i += 2;
1594 	while (auxv[i - 2] != AT_NULL);
1595 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1596 		  i * sizeof(elf_addr_t), auxv);
1597 
1598   	/* Try to dump the FPU. */
1599 	if ((prstatus->pr_fpvalid =
1600 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1601 		fill_note(notes + numnote++,
1602 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1603 #ifdef ELF_CORE_COPY_XFPREGS
1604 	if (elf_core_copy_task_xfpregs(current, xfpu))
1605 		fill_note(notes + numnote++,
1606 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1607 #endif
1608 
1609 	fs = get_fs();
1610 	set_fs(KERNEL_DS);
1611 
1612 	DUMP_WRITE(elf, sizeof(*elf));
1613 	offset += sizeof(*elf);				/* Elf header */
1614 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1615 	foffset = offset;
1616 
1617 	/* Write notes phdr entry */
1618 	{
1619 		struct elf_phdr phdr;
1620 		int sz = 0;
1621 
1622 		for (i = 0; i < numnote; i++)
1623 			sz += notesize(notes + i);
1624 
1625 		sz += thread_status_size;
1626 
1627 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1628 		sz += ELF_CORE_EXTRA_NOTES_SIZE;
1629 #endif
1630 
1631 		fill_elf_note_phdr(&phdr, sz, offset);
1632 		offset += sz;
1633 		DUMP_WRITE(&phdr, sizeof(phdr));
1634 	}
1635 
1636 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1637 
1638 	/* Write program headers for segments dump */
1639 	for (vma = first_vma(current, gate_vma); vma != NULL;
1640 			vma = next_vma(vma, gate_vma)) {
1641 		struct elf_phdr phdr;
1642 		size_t sz;
1643 
1644 		sz = vma->vm_end - vma->vm_start;
1645 
1646 		phdr.p_type = PT_LOAD;
1647 		phdr.p_offset = offset;
1648 		phdr.p_vaddr = vma->vm_start;
1649 		phdr.p_paddr = 0;
1650 		phdr.p_filesz = maydump(vma) ? sz : 0;
1651 		phdr.p_memsz = sz;
1652 		offset += phdr.p_filesz;
1653 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1654 		if (vma->vm_flags & VM_WRITE)
1655 			phdr.p_flags |= PF_W;
1656 		if (vma->vm_flags & VM_EXEC)
1657 			phdr.p_flags |= PF_X;
1658 		phdr.p_align = ELF_EXEC_PAGESIZE;
1659 
1660 		DUMP_WRITE(&phdr, sizeof(phdr));
1661 	}
1662 
1663 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1664 	ELF_CORE_WRITE_EXTRA_PHDRS;
1665 #endif
1666 
1667  	/* write out the notes section */
1668 	for (i = 0; i < numnote; i++)
1669 		if (!writenote(notes + i, file, &foffset))
1670 			goto end_coredump;
1671 
1672 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1673 	ELF_CORE_WRITE_EXTRA_NOTES;
1674 #endif
1675 
1676 	/* write out the thread status notes section */
1677 	list_for_each(t, &thread_list) {
1678 		struct elf_thread_status *tmp =
1679 				list_entry(t, struct elf_thread_status, list);
1680 
1681 		for (i = 0; i < tmp->num_notes; i++)
1682 			if (!writenote(&tmp->notes[i], file, &foffset))
1683 				goto end_coredump;
1684 	}
1685 
1686 	/* Align to page */
1687 	DUMP_SEEK(dataoff - foffset);
1688 
1689 	for (vma = first_vma(current, gate_vma); vma != NULL;
1690 			vma = next_vma(vma, gate_vma)) {
1691 		unsigned long addr;
1692 
1693 		if (!maydump(vma))
1694 			continue;
1695 
1696 		for (addr = vma->vm_start;
1697 		     addr < vma->vm_end;
1698 		     addr += PAGE_SIZE) {
1699 			struct page *page;
1700 			struct vm_area_struct *vma;
1701 
1702 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1703 						&page, &vma) <= 0) {
1704 				DUMP_SEEK(PAGE_SIZE);
1705 			} else {
1706 				if (page == ZERO_PAGE(addr)) {
1707 					if (!dump_seek(file, PAGE_SIZE)) {
1708 						page_cache_release(page);
1709 						goto end_coredump;
1710 					}
1711 				} else {
1712 					void *kaddr;
1713 					flush_cache_page(vma, addr,
1714 							 page_to_pfn(page));
1715 					kaddr = kmap(page);
1716 					if ((size += PAGE_SIZE) > limit ||
1717 					    !dump_write(file, kaddr,
1718 					    PAGE_SIZE)) {
1719 						kunmap(page);
1720 						page_cache_release(page);
1721 						goto end_coredump;
1722 					}
1723 					kunmap(page);
1724 				}
1725 				page_cache_release(page);
1726 			}
1727 		}
1728 	}
1729 
1730 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1731 	ELF_CORE_WRITE_EXTRA_DATA;
1732 #endif
1733 
1734 end_coredump:
1735 	set_fs(fs);
1736 
1737 cleanup:
1738 	while (!list_empty(&thread_list)) {
1739 		struct list_head *tmp = thread_list.next;
1740 		list_del(tmp);
1741 		kfree(list_entry(tmp, struct elf_thread_status, list));
1742 	}
1743 
1744 	kfree(elf);
1745 	kfree(prstatus);
1746 	kfree(psinfo);
1747 	kfree(notes);
1748 	kfree(fpu);
1749 #ifdef ELF_CORE_COPY_XFPREGS
1750 	kfree(xfpu);
1751 #endif
1752 	return has_dumped;
1753 #undef NUM_NOTES
1754 }
1755 
1756 #endif		/* USE_ELF_CORE_DUMP */
1757 
1758 static int __init init_elf_binfmt(void)
1759 {
1760 	return register_binfmt(&elf_format);
1761 }
1762 
1763 static void __exit exit_elf_binfmt(void)
1764 {
1765 	/* Remove the COFF and ELF loaders. */
1766 	unregister_binfmt(&elf_format);
1767 }
1768 
1769 core_initcall(init_elf_binfmt);
1770 module_exit(exit_elf_binfmt);
1771 MODULE_LICENSE("GPL");
1772