xref: /openbmc/linux/fs/binfmt_elf.c (revision 643d1f7f)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49 				int, int, unsigned long);
50 
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump	NULL
59 #endif
60 
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN	PAGE_SIZE
65 #endif
66 
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS	0
69 #endif
70 
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74 
75 static struct linux_binfmt elf_format = {
76 		.module		= THIS_MODULE,
77 		.load_binary	= load_elf_binary,
78 		.load_shlib	= load_elf_library,
79 		.core_dump	= elf_core_dump,
80 		.min_coredump	= ELF_EXEC_PAGESIZE,
81 		.hasvdso	= 1
82 };
83 
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85 
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88 	start = ELF_PAGEALIGN(start);
89 	end = ELF_PAGEALIGN(end);
90 	if (end > start) {
91 		unsigned long addr;
92 		down_write(&current->mm->mmap_sem);
93 		addr = do_brk(start, end - start);
94 		up_write(&current->mm->mmap_sem);
95 		if (BAD_ADDR(addr))
96 			return addr;
97 	}
98 	current->mm->start_brk = current->mm->brk = end;
99 	return 0;
100 }
101 
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109 	unsigned long nbyte;
110 
111 	nbyte = ELF_PAGEOFFSET(elf_bss);
112 	if (nbyte) {
113 		nbyte = ELF_MIN_ALIGN - nbyte;
114 		if (clear_user((void __user *) elf_bss, nbyte))
115 			return -EFAULT;
116 	}
117 	return 0;
118 }
119 
120 /* Let's use some macros to make this stack manipulation a litle clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127 	old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131 	(((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134 
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137 		int interp_aout, unsigned long load_addr,
138 		unsigned long interp_load_addr)
139 {
140 	unsigned long p = bprm->p;
141 	int argc = bprm->argc;
142 	int envc = bprm->envc;
143 	elf_addr_t __user *argv;
144 	elf_addr_t __user *envp;
145 	elf_addr_t __user *sp;
146 	elf_addr_t __user *u_platform;
147 	const char *k_platform = ELF_PLATFORM;
148 	int items;
149 	elf_addr_t *elf_info;
150 	int ei_index = 0;
151 	struct task_struct *tsk = current;
152 	struct vm_area_struct *vma;
153 
154 	/*
155 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
156 	 * evictions by the processes running on the same package. One
157 	 * thing we can do is to shuffle the initial stack for them.
158 	 */
159 
160 	p = arch_align_stack(p);
161 
162 	/*
163 	 * If this architecture has a platform capability string, copy it
164 	 * to userspace.  In some cases (Sparc), this info is impossible
165 	 * for userspace to get any other way, in others (i386) it is
166 	 * merely difficult.
167 	 */
168 	u_platform = NULL;
169 	if (k_platform) {
170 		size_t len = strlen(k_platform) + 1;
171 
172 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
173 		if (__copy_to_user(u_platform, k_platform, len))
174 			return -EFAULT;
175 	}
176 
177 	/* Create the ELF interpreter info */
178 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
179 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
180 #define NEW_AUX_ENT(id, val) \
181 	do { \
182 		elf_info[ei_index++] = id; \
183 		elf_info[ei_index++] = val; \
184 	} while (0)
185 
186 #ifdef ARCH_DLINFO
187 	/*
188 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
189 	 * AUXV.
190 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
191 	 * ARCH_DLINFO changes
192 	 */
193 	ARCH_DLINFO;
194 #endif
195 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
196 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
197 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
198 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
199 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
200 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
201 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
202 	NEW_AUX_ENT(AT_FLAGS, 0);
203 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
204 	NEW_AUX_ENT(AT_UID, tsk->uid);
205 	NEW_AUX_ENT(AT_EUID, tsk->euid);
206 	NEW_AUX_ENT(AT_GID, tsk->gid);
207 	NEW_AUX_ENT(AT_EGID, tsk->egid);
208  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
209 	if (k_platform) {
210 		NEW_AUX_ENT(AT_PLATFORM,
211 			    (elf_addr_t)(unsigned long)u_platform);
212 	}
213 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
214 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
215 	}
216 #undef NEW_AUX_ENT
217 	/* AT_NULL is zero; clear the rest too */
218 	memset(&elf_info[ei_index], 0,
219 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
220 
221 	/* And advance past the AT_NULL entry.  */
222 	ei_index += 2;
223 
224 	sp = STACK_ADD(p, ei_index);
225 
226 	items = (argc + 1) + (envc + 1);
227 	if (interp_aout) {
228 		items += 3; /* a.out interpreters require argv & envp too */
229 	} else {
230 		items += 1; /* ELF interpreters only put argc on the stack */
231 	}
232 	bprm->p = STACK_ROUND(sp, items);
233 
234 	/* Point sp at the lowest address on the stack */
235 #ifdef CONFIG_STACK_GROWSUP
236 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
237 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
238 #else
239 	sp = (elf_addr_t __user *)bprm->p;
240 #endif
241 
242 
243 	/*
244 	 * Grow the stack manually; some architectures have a limit on how
245 	 * far ahead a user-space access may be in order to grow the stack.
246 	 */
247 	vma = find_extend_vma(current->mm, bprm->p);
248 	if (!vma)
249 		return -EFAULT;
250 
251 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
252 	if (__put_user(argc, sp++))
253 		return -EFAULT;
254 	if (interp_aout) {
255 		argv = sp + 2;
256 		envp = argv + argc + 1;
257 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
258 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
259 			return -EFAULT;
260 	} else {
261 		argv = sp;
262 		envp = argv + argc + 1;
263 	}
264 
265 	/* Populate argv and envp */
266 	p = current->mm->arg_end = current->mm->arg_start;
267 	while (argc-- > 0) {
268 		size_t len;
269 		if (__put_user((elf_addr_t)p, argv++))
270 			return -EFAULT;
271 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
272 		if (!len || len > MAX_ARG_STRLEN)
273 			return 0;
274 		p += len;
275 	}
276 	if (__put_user(0, argv))
277 		return -EFAULT;
278 	current->mm->arg_end = current->mm->env_start = p;
279 	while (envc-- > 0) {
280 		size_t len;
281 		if (__put_user((elf_addr_t)p, envp++))
282 			return -EFAULT;
283 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
284 		if (!len || len > MAX_ARG_STRLEN)
285 			return 0;
286 		p += len;
287 	}
288 	if (__put_user(0, envp))
289 		return -EFAULT;
290 	current->mm->env_end = p;
291 
292 	/* Put the elf_info on the stack in the right place.  */
293 	sp = (elf_addr_t __user *)envp + 1;
294 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
295 		return -EFAULT;
296 	return 0;
297 }
298 
299 #ifndef elf_map
300 
301 static unsigned long elf_map(struct file *filep, unsigned long addr,
302 		struct elf_phdr *eppnt, int prot, int type,
303 		unsigned long total_size)
304 {
305 	unsigned long map_addr;
306 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
307 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
308 	addr = ELF_PAGESTART(addr);
309 	size = ELF_PAGEALIGN(size);
310 
311 	/* mmap() will return -EINVAL if given a zero size, but a
312 	 * segment with zero filesize is perfectly valid */
313 	if (!size)
314 		return addr;
315 
316 	down_write(&current->mm->mmap_sem);
317 	/*
318 	* total_size is the size of the ELF (interpreter) image.
319 	* The _first_ mmap needs to know the full size, otherwise
320 	* randomization might put this image into an overlapping
321 	* position with the ELF binary image. (since size < total_size)
322 	* So we first map the 'big' image - and unmap the remainder at
323 	* the end. (which unmap is needed for ELF images with holes.)
324 	*/
325 	if (total_size) {
326 		total_size = ELF_PAGEALIGN(total_size);
327 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
328 		if (!BAD_ADDR(map_addr))
329 			do_munmap(current->mm, map_addr+size, total_size-size);
330 	} else
331 		map_addr = do_mmap(filep, addr, size, prot, type, off);
332 
333 	up_write(&current->mm->mmap_sem);
334 	return(map_addr);
335 }
336 
337 #endif /* !elf_map */
338 
339 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
340 {
341 	int i, first_idx = -1, last_idx = -1;
342 
343 	for (i = 0; i < nr; i++) {
344 		if (cmds[i].p_type == PT_LOAD) {
345 			last_idx = i;
346 			if (first_idx == -1)
347 				first_idx = i;
348 		}
349 	}
350 	if (first_idx == -1)
351 		return 0;
352 
353 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
354 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
355 }
356 
357 
358 /* This is much more generalized than the library routine read function,
359    so we keep this separate.  Technically the library read function
360    is only provided so that we can read a.out libraries that have
361    an ELF header */
362 
363 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
364 		struct file *interpreter, unsigned long *interp_map_addr,
365 		unsigned long no_base)
366 {
367 	struct elf_phdr *elf_phdata;
368 	struct elf_phdr *eppnt;
369 	unsigned long load_addr = 0;
370 	int load_addr_set = 0;
371 	unsigned long last_bss = 0, elf_bss = 0;
372 	unsigned long error = ~0UL;
373 	unsigned long total_size;
374 	int retval, i, size;
375 
376 	/* First of all, some simple consistency checks */
377 	if (interp_elf_ex->e_type != ET_EXEC &&
378 	    interp_elf_ex->e_type != ET_DYN)
379 		goto out;
380 	if (!elf_check_arch(interp_elf_ex))
381 		goto out;
382 	if (!interpreter->f_op || !interpreter->f_op->mmap)
383 		goto out;
384 
385 	/*
386 	 * If the size of this structure has changed, then punt, since
387 	 * we will be doing the wrong thing.
388 	 */
389 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
390 		goto out;
391 	if (interp_elf_ex->e_phnum < 1 ||
392 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
393 		goto out;
394 
395 	/* Now read in all of the header information */
396 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
397 	if (size > ELF_MIN_ALIGN)
398 		goto out;
399 	elf_phdata = kmalloc(size, GFP_KERNEL);
400 	if (!elf_phdata)
401 		goto out;
402 
403 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
404 			     (char *)elf_phdata,size);
405 	error = -EIO;
406 	if (retval != size) {
407 		if (retval < 0)
408 			error = retval;
409 		goto out_close;
410 	}
411 
412 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
413 	if (!total_size) {
414 		error = -EINVAL;
415 		goto out_close;
416 	}
417 
418 	eppnt = elf_phdata;
419 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
420 		if (eppnt->p_type == PT_LOAD) {
421 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
422 			int elf_prot = 0;
423 			unsigned long vaddr = 0;
424 			unsigned long k, map_addr;
425 
426 			if (eppnt->p_flags & PF_R)
427 		    		elf_prot = PROT_READ;
428 			if (eppnt->p_flags & PF_W)
429 				elf_prot |= PROT_WRITE;
430 			if (eppnt->p_flags & PF_X)
431 				elf_prot |= PROT_EXEC;
432 			vaddr = eppnt->p_vaddr;
433 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
434 				elf_type |= MAP_FIXED;
435 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
436 				load_addr = -vaddr;
437 
438 			map_addr = elf_map(interpreter, load_addr + vaddr,
439 					eppnt, elf_prot, elf_type, total_size);
440 			total_size = 0;
441 			if (!*interp_map_addr)
442 				*interp_map_addr = map_addr;
443 			error = map_addr;
444 			if (BAD_ADDR(map_addr))
445 				goto out_close;
446 
447 			if (!load_addr_set &&
448 			    interp_elf_ex->e_type == ET_DYN) {
449 				load_addr = map_addr - ELF_PAGESTART(vaddr);
450 				load_addr_set = 1;
451 			}
452 
453 			/*
454 			 * Check to see if the section's size will overflow the
455 			 * allowed task size. Note that p_filesz must always be
456 			 * <= p_memsize so it's only necessary to check p_memsz.
457 			 */
458 			k = load_addr + eppnt->p_vaddr;
459 			if (BAD_ADDR(k) ||
460 			    eppnt->p_filesz > eppnt->p_memsz ||
461 			    eppnt->p_memsz > TASK_SIZE ||
462 			    TASK_SIZE - eppnt->p_memsz < k) {
463 				error = -ENOMEM;
464 				goto out_close;
465 			}
466 
467 			/*
468 			 * Find the end of the file mapping for this phdr, and
469 			 * keep track of the largest address we see for this.
470 			 */
471 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
472 			if (k > elf_bss)
473 				elf_bss = k;
474 
475 			/*
476 			 * Do the same thing for the memory mapping - between
477 			 * elf_bss and last_bss is the bss section.
478 			 */
479 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
480 			if (k > last_bss)
481 				last_bss = k;
482 		}
483 	}
484 
485 	/*
486 	 * Now fill out the bss section.  First pad the last page up
487 	 * to the page boundary, and then perform a mmap to make sure
488 	 * that there are zero-mapped pages up to and including the
489 	 * last bss page.
490 	 */
491 	if (padzero(elf_bss)) {
492 		error = -EFAULT;
493 		goto out_close;
494 	}
495 
496 	/* What we have mapped so far */
497 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
498 
499 	/* Map the last of the bss segment */
500 	if (last_bss > elf_bss) {
501 		down_write(&current->mm->mmap_sem);
502 		error = do_brk(elf_bss, last_bss - elf_bss);
503 		up_write(&current->mm->mmap_sem);
504 		if (BAD_ADDR(error))
505 			goto out_close;
506 	}
507 
508 	error = load_addr;
509 
510 out_close:
511 	kfree(elf_phdata);
512 out:
513 	return error;
514 }
515 
516 static unsigned long load_aout_interp(struct exec *interp_ex,
517 		struct file *interpreter)
518 {
519 	unsigned long text_data, elf_entry = ~0UL;
520 	char __user * addr;
521 	loff_t offset;
522 
523 	current->mm->end_code = interp_ex->a_text;
524 	text_data = interp_ex->a_text + interp_ex->a_data;
525 	current->mm->end_data = text_data;
526 	current->mm->brk = interp_ex->a_bss + text_data;
527 
528 	switch (N_MAGIC(*interp_ex)) {
529 	case OMAGIC:
530 		offset = 32;
531 		addr = (char __user *)0;
532 		break;
533 	case ZMAGIC:
534 	case QMAGIC:
535 		offset = N_TXTOFF(*interp_ex);
536 		addr = (char __user *)N_TXTADDR(*interp_ex);
537 		break;
538 	default:
539 		goto out;
540 	}
541 
542 	down_write(&current->mm->mmap_sem);
543 	do_brk(0, text_data);
544 	up_write(&current->mm->mmap_sem);
545 	if (!interpreter->f_op || !interpreter->f_op->read)
546 		goto out;
547 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
548 		goto out;
549 	flush_icache_range((unsigned long)addr,
550 	                   (unsigned long)addr + text_data);
551 
552 	down_write(&current->mm->mmap_sem);
553 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
554 		interp_ex->a_bss);
555 	up_write(&current->mm->mmap_sem);
556 	elf_entry = interp_ex->a_entry;
557 
558 out:
559 	return elf_entry;
560 }
561 
562 /*
563  * These are the functions used to load ELF style executables and shared
564  * libraries.  There is no binary dependent code anywhere else.
565  */
566 
567 #define INTERPRETER_NONE 0
568 #define INTERPRETER_AOUT 1
569 #define INTERPRETER_ELF 2
570 
571 #ifndef STACK_RND_MASK
572 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
573 #endif
574 
575 static unsigned long randomize_stack_top(unsigned long stack_top)
576 {
577 	unsigned int random_variable = 0;
578 
579 	if ((current->flags & PF_RANDOMIZE) &&
580 		!(current->personality & ADDR_NO_RANDOMIZE)) {
581 		random_variable = get_random_int() & STACK_RND_MASK;
582 		random_variable <<= PAGE_SHIFT;
583 	}
584 #ifdef CONFIG_STACK_GROWSUP
585 	return PAGE_ALIGN(stack_top) + random_variable;
586 #else
587 	return PAGE_ALIGN(stack_top) - random_variable;
588 #endif
589 }
590 
591 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
592 {
593 	struct file *interpreter = NULL; /* to shut gcc up */
594  	unsigned long load_addr = 0, load_bias = 0;
595 	int load_addr_set = 0;
596 	char * elf_interpreter = NULL;
597 	unsigned int interpreter_type = INTERPRETER_NONE;
598 	unsigned long error;
599 	struct elf_phdr *elf_ppnt, *elf_phdata;
600 	unsigned long elf_bss, elf_brk;
601 	int elf_exec_fileno;
602 	int retval, i;
603 	unsigned int size;
604 	unsigned long elf_entry;
605 	unsigned long interp_load_addr = 0;
606 	unsigned long start_code, end_code, start_data, end_data;
607 	unsigned long reloc_func_desc = 0;
608 	char passed_fileno[6];
609 	struct files_struct *files;
610 	int executable_stack = EXSTACK_DEFAULT;
611 	unsigned long def_flags = 0;
612 	struct {
613 		struct elfhdr elf_ex;
614 		struct elfhdr interp_elf_ex;
615   		struct exec interp_ex;
616 	} *loc;
617 
618 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
619 	if (!loc) {
620 		retval = -ENOMEM;
621 		goto out_ret;
622 	}
623 
624 	/* Get the exec-header */
625 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
626 
627 	retval = -ENOEXEC;
628 	/* First of all, some simple consistency checks */
629 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
630 		goto out;
631 
632 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
633 		goto out;
634 	if (!elf_check_arch(&loc->elf_ex))
635 		goto out;
636 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
637 		goto out;
638 
639 	/* Now read in all of the header information */
640 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
641 		goto out;
642 	if (loc->elf_ex.e_phnum < 1 ||
643 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
644 		goto out;
645 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
646 	retval = -ENOMEM;
647 	elf_phdata = kmalloc(size, GFP_KERNEL);
648 	if (!elf_phdata)
649 		goto out;
650 
651 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
652 			     (char *)elf_phdata, size);
653 	if (retval != size) {
654 		if (retval >= 0)
655 			retval = -EIO;
656 		goto out_free_ph;
657 	}
658 
659 	files = current->files;	/* Refcounted so ok */
660 	retval = unshare_files();
661 	if (retval < 0)
662 		goto out_free_ph;
663 	if (files == current->files) {
664 		put_files_struct(files);
665 		files = NULL;
666 	}
667 
668 	/* exec will make our files private anyway, but for the a.out
669 	   loader stuff we need to do it earlier */
670 	retval = get_unused_fd();
671 	if (retval < 0)
672 		goto out_free_fh;
673 	get_file(bprm->file);
674 	fd_install(elf_exec_fileno = retval, bprm->file);
675 
676 	elf_ppnt = elf_phdata;
677 	elf_bss = 0;
678 	elf_brk = 0;
679 
680 	start_code = ~0UL;
681 	end_code = 0;
682 	start_data = 0;
683 	end_data = 0;
684 
685 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
686 		if (elf_ppnt->p_type == PT_INTERP) {
687 			/* This is the program interpreter used for
688 			 * shared libraries - for now assume that this
689 			 * is an a.out format binary
690 			 */
691 			retval = -ENOEXEC;
692 			if (elf_ppnt->p_filesz > PATH_MAX ||
693 			    elf_ppnt->p_filesz < 2)
694 				goto out_free_file;
695 
696 			retval = -ENOMEM;
697 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
698 						  GFP_KERNEL);
699 			if (!elf_interpreter)
700 				goto out_free_file;
701 
702 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
703 					     elf_interpreter,
704 					     elf_ppnt->p_filesz);
705 			if (retval != elf_ppnt->p_filesz) {
706 				if (retval >= 0)
707 					retval = -EIO;
708 				goto out_free_interp;
709 			}
710 			/* make sure path is NULL terminated */
711 			retval = -ENOEXEC;
712 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
713 				goto out_free_interp;
714 
715 			/*
716 			 * The early SET_PERSONALITY here is so that the lookup
717 			 * for the interpreter happens in the namespace of the
718 			 * to-be-execed image.  SET_PERSONALITY can select an
719 			 * alternate root.
720 			 *
721 			 * However, SET_PERSONALITY is NOT allowed to switch
722 			 * this task into the new images's memory mapping
723 			 * policy - that is, TASK_SIZE must still evaluate to
724 			 * that which is appropriate to the execing application.
725 			 * This is because exit_mmap() needs to have TASK_SIZE
726 			 * evaluate to the size of the old image.
727 			 *
728 			 * So if (say) a 64-bit application is execing a 32-bit
729 			 * application it is the architecture's responsibility
730 			 * to defer changing the value of TASK_SIZE until the
731 			 * switch really is going to happen - do this in
732 			 * flush_thread().	- akpm
733 			 */
734 			SET_PERSONALITY(loc->elf_ex, 0);
735 
736 			interpreter = open_exec(elf_interpreter);
737 			retval = PTR_ERR(interpreter);
738 			if (IS_ERR(interpreter))
739 				goto out_free_interp;
740 
741 			/*
742 			 * If the binary is not readable then enforce
743 			 * mm->dumpable = 0 regardless of the interpreter's
744 			 * permissions.
745 			 */
746 			if (file_permission(interpreter, MAY_READ) < 0)
747 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
748 
749 			retval = kernel_read(interpreter, 0, bprm->buf,
750 					     BINPRM_BUF_SIZE);
751 			if (retval != BINPRM_BUF_SIZE) {
752 				if (retval >= 0)
753 					retval = -EIO;
754 				goto out_free_dentry;
755 			}
756 
757 			/* Get the exec headers */
758 			loc->interp_ex = *((struct exec *)bprm->buf);
759 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
760 			break;
761 		}
762 		elf_ppnt++;
763 	}
764 
765 	elf_ppnt = elf_phdata;
766 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
767 		if (elf_ppnt->p_type == PT_GNU_STACK) {
768 			if (elf_ppnt->p_flags & PF_X)
769 				executable_stack = EXSTACK_ENABLE_X;
770 			else
771 				executable_stack = EXSTACK_DISABLE_X;
772 			break;
773 		}
774 
775 	/* Some simple consistency checks for the interpreter */
776 	if (elf_interpreter) {
777 		static int warn;
778 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
779 
780 		/* Now figure out which format our binary is */
781 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
782 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
783 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
784 			interpreter_type = INTERPRETER_ELF;
785 
786 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
787 			interpreter_type &= ~INTERPRETER_ELF;
788 
789 		if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
790 			printk(KERN_WARNING "a.out ELF interpreter %s is "
791 				"deprecated and will not be supported "
792 				"after Linux 2.6.25\n", elf_interpreter);
793 			warn++;
794 		}
795 
796 		retval = -ELIBBAD;
797 		if (!interpreter_type)
798 			goto out_free_dentry;
799 
800 		/* Make sure only one type was selected */
801 		if ((interpreter_type & INTERPRETER_ELF) &&
802 		     interpreter_type != INTERPRETER_ELF) {
803 	     		// FIXME - ratelimit this before re-enabling
804 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
805 			interpreter_type = INTERPRETER_ELF;
806 		}
807 		/* Verify the interpreter has a valid arch */
808 		if ((interpreter_type == INTERPRETER_ELF) &&
809 		    !elf_check_arch(&loc->interp_elf_ex))
810 			goto out_free_dentry;
811 	} else {
812 		/* Executables without an interpreter also need a personality  */
813 		SET_PERSONALITY(loc->elf_ex, 0);
814 	}
815 
816 	/* OK, we are done with that, now set up the arg stuff,
817 	   and then start this sucker up */
818 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
819 		char *passed_p = passed_fileno;
820 		sprintf(passed_fileno, "%d", elf_exec_fileno);
821 
822 		if (elf_interpreter) {
823 			retval = copy_strings_kernel(1, &passed_p, bprm);
824 			if (retval)
825 				goto out_free_dentry;
826 			bprm->argc++;
827 		}
828 	}
829 
830 	/* Flush all traces of the currently running executable */
831 	retval = flush_old_exec(bprm);
832 	if (retval)
833 		goto out_free_dentry;
834 
835 	/* Discard our unneeded old files struct */
836 	if (files) {
837 		put_files_struct(files);
838 		files = NULL;
839 	}
840 
841 	/* OK, This is the point of no return */
842 	current->flags &= ~PF_FORKNOEXEC;
843 	current->mm->def_flags = def_flags;
844 
845 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
846 	   may depend on the personality.  */
847 	SET_PERSONALITY(loc->elf_ex, 0);
848 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
849 		current->personality |= READ_IMPLIES_EXEC;
850 
851 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
852 		current->flags |= PF_RANDOMIZE;
853 	arch_pick_mmap_layout(current->mm);
854 
855 	/* Do this so that we can load the interpreter, if need be.  We will
856 	   change some of these later */
857 	current->mm->free_area_cache = current->mm->mmap_base;
858 	current->mm->cached_hole_size = 0;
859 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
860 				 executable_stack);
861 	if (retval < 0) {
862 		send_sig(SIGKILL, current, 0);
863 		goto out_free_dentry;
864 	}
865 
866 	current->mm->start_stack = bprm->p;
867 
868 	/* Now we do a little grungy work by mmaping the ELF image into
869 	   the correct location in memory. */
870 	for(i = 0, elf_ppnt = elf_phdata;
871 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
872 		int elf_prot = 0, elf_flags;
873 		unsigned long k, vaddr;
874 
875 		if (elf_ppnt->p_type != PT_LOAD)
876 			continue;
877 
878 		if (unlikely (elf_brk > elf_bss)) {
879 			unsigned long nbyte;
880 
881 			/* There was a PT_LOAD segment with p_memsz > p_filesz
882 			   before this one. Map anonymous pages, if needed,
883 			   and clear the area.  */
884 			retval = set_brk (elf_bss + load_bias,
885 					  elf_brk + load_bias);
886 			if (retval) {
887 				send_sig(SIGKILL, current, 0);
888 				goto out_free_dentry;
889 			}
890 			nbyte = ELF_PAGEOFFSET(elf_bss);
891 			if (nbyte) {
892 				nbyte = ELF_MIN_ALIGN - nbyte;
893 				if (nbyte > elf_brk - elf_bss)
894 					nbyte = elf_brk - elf_bss;
895 				if (clear_user((void __user *)elf_bss +
896 							load_bias, nbyte)) {
897 					/*
898 					 * This bss-zeroing can fail if the ELF
899 					 * file specifies odd protections. So
900 					 * we don't check the return value
901 					 */
902 				}
903 			}
904 		}
905 
906 		if (elf_ppnt->p_flags & PF_R)
907 			elf_prot |= PROT_READ;
908 		if (elf_ppnt->p_flags & PF_W)
909 			elf_prot |= PROT_WRITE;
910 		if (elf_ppnt->p_flags & PF_X)
911 			elf_prot |= PROT_EXEC;
912 
913 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
914 
915 		vaddr = elf_ppnt->p_vaddr;
916 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
917 			elf_flags |= MAP_FIXED;
918 		} else if (loc->elf_ex.e_type == ET_DYN) {
919 			/* Try and get dynamic programs out of the way of the
920 			 * default mmap base, as well as whatever program they
921 			 * might try to exec.  This is because the brk will
922 			 * follow the loader, and is not movable.  */
923 #ifdef CONFIG_X86
924 			load_bias = 0;
925 #else
926 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
927 #endif
928 		}
929 
930 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
931 				elf_prot, elf_flags, 0);
932 		if (BAD_ADDR(error)) {
933 			send_sig(SIGKILL, current, 0);
934 			retval = IS_ERR((void *)error) ?
935 				PTR_ERR((void*)error) : -EINVAL;
936 			goto out_free_dentry;
937 		}
938 
939 		if (!load_addr_set) {
940 			load_addr_set = 1;
941 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
942 			if (loc->elf_ex.e_type == ET_DYN) {
943 				load_bias += error -
944 				             ELF_PAGESTART(load_bias + vaddr);
945 				load_addr += load_bias;
946 				reloc_func_desc = load_bias;
947 			}
948 		}
949 		k = elf_ppnt->p_vaddr;
950 		if (k < start_code)
951 			start_code = k;
952 		if (start_data < k)
953 			start_data = k;
954 
955 		/*
956 		 * Check to see if the section's size will overflow the
957 		 * allowed task size. Note that p_filesz must always be
958 		 * <= p_memsz so it is only necessary to check p_memsz.
959 		 */
960 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
961 		    elf_ppnt->p_memsz > TASK_SIZE ||
962 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
963 			/* set_brk can never work. Avoid overflows. */
964 			send_sig(SIGKILL, current, 0);
965 			retval = -EINVAL;
966 			goto out_free_dentry;
967 		}
968 
969 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
970 
971 		if (k > elf_bss)
972 			elf_bss = k;
973 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
974 			end_code = k;
975 		if (end_data < k)
976 			end_data = k;
977 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
978 		if (k > elf_brk)
979 			elf_brk = k;
980 	}
981 
982 	loc->elf_ex.e_entry += load_bias;
983 	elf_bss += load_bias;
984 	elf_brk += load_bias;
985 	start_code += load_bias;
986 	end_code += load_bias;
987 	start_data += load_bias;
988 	end_data += load_bias;
989 
990 	/* Calling set_brk effectively mmaps the pages that we need
991 	 * for the bss and break sections.  We must do this before
992 	 * mapping in the interpreter, to make sure it doesn't wind
993 	 * up getting placed where the bss needs to go.
994 	 */
995 	retval = set_brk(elf_bss, elf_brk);
996 	if (retval) {
997 		send_sig(SIGKILL, current, 0);
998 		goto out_free_dentry;
999 	}
1000 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1001 		send_sig(SIGSEGV, current, 0);
1002 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1003 		goto out_free_dentry;
1004 	}
1005 
1006 	if (elf_interpreter) {
1007 		if (interpreter_type == INTERPRETER_AOUT) {
1008 			elf_entry = load_aout_interp(&loc->interp_ex,
1009 						     interpreter);
1010 		} else {
1011 			unsigned long uninitialized_var(interp_map_addr);
1012 
1013 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
1014 						    interpreter,
1015 						    &interp_map_addr,
1016 						    load_bias);
1017 			if (!IS_ERR((void *)elf_entry)) {
1018 				/*
1019 				 * load_elf_interp() returns relocation
1020 				 * adjustment
1021 				 */
1022 				interp_load_addr = elf_entry;
1023 				elf_entry += loc->interp_elf_ex.e_entry;
1024 			}
1025 		}
1026 		if (BAD_ADDR(elf_entry)) {
1027 			force_sig(SIGSEGV, current);
1028 			retval = IS_ERR((void *)elf_entry) ?
1029 					(int)elf_entry : -EINVAL;
1030 			goto out_free_dentry;
1031 		}
1032 		reloc_func_desc = interp_load_addr;
1033 
1034 		allow_write_access(interpreter);
1035 		fput(interpreter);
1036 		kfree(elf_interpreter);
1037 	} else {
1038 		elf_entry = loc->elf_ex.e_entry;
1039 		if (BAD_ADDR(elf_entry)) {
1040 			force_sig(SIGSEGV, current);
1041 			retval = -EINVAL;
1042 			goto out_free_dentry;
1043 		}
1044 	}
1045 
1046 	kfree(elf_phdata);
1047 
1048 	if (interpreter_type != INTERPRETER_AOUT)
1049 		sys_close(elf_exec_fileno);
1050 
1051 	set_binfmt(&elf_format);
1052 
1053 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1054 	retval = arch_setup_additional_pages(bprm, executable_stack);
1055 	if (retval < 0) {
1056 		send_sig(SIGKILL, current, 0);
1057 		goto out;
1058 	}
1059 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1060 
1061 	compute_creds(bprm);
1062 	current->flags &= ~PF_FORKNOEXEC;
1063 	retval = create_elf_tables(bprm, &loc->elf_ex,
1064 			  (interpreter_type == INTERPRETER_AOUT),
1065 			  load_addr, interp_load_addr);
1066 	if (retval < 0) {
1067 		send_sig(SIGKILL, current, 0);
1068 		goto out;
1069 	}
1070 	/* N.B. passed_fileno might not be initialized? */
1071 	if (interpreter_type == INTERPRETER_AOUT)
1072 		current->mm->arg_start += strlen(passed_fileno) + 1;
1073 	current->mm->end_code = end_code;
1074 	current->mm->start_code = start_code;
1075 	current->mm->start_data = start_data;
1076 	current->mm->end_data = end_data;
1077 	current->mm->start_stack = bprm->p;
1078 
1079 #ifdef arch_randomize_brk
1080 	if (current->flags & PF_RANDOMIZE)
1081 		current->mm->brk = current->mm->start_brk =
1082 			arch_randomize_brk(current->mm);
1083 #endif
1084 
1085 	if (current->personality & MMAP_PAGE_ZERO) {
1086 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1087 		   and some applications "depend" upon this behavior.
1088 		   Since we do not have the power to recompile these, we
1089 		   emulate the SVr4 behavior. Sigh. */
1090 		down_write(&current->mm->mmap_sem);
1091 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1092 				MAP_FIXED | MAP_PRIVATE, 0);
1093 		up_write(&current->mm->mmap_sem);
1094 	}
1095 
1096 #ifdef ELF_PLAT_INIT
1097 	/*
1098 	 * The ABI may specify that certain registers be set up in special
1099 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1100 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1101 	 * that the e_entry field is the address of the function descriptor
1102 	 * for the startup routine, rather than the address of the startup
1103 	 * routine itself.  This macro performs whatever initialization to
1104 	 * the regs structure is required as well as any relocations to the
1105 	 * function descriptor entries when executing dynamically links apps.
1106 	 */
1107 	ELF_PLAT_INIT(regs, reloc_func_desc);
1108 #endif
1109 
1110 	start_thread(regs, elf_entry, bprm->p);
1111 	if (unlikely(current->ptrace & PT_PTRACED)) {
1112 		if (current->ptrace & PT_TRACE_EXEC)
1113 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1114 		else
1115 			send_sig(SIGTRAP, current, 0);
1116 	}
1117 	retval = 0;
1118 out:
1119 	kfree(loc);
1120 out_ret:
1121 	return retval;
1122 
1123 	/* error cleanup */
1124 out_free_dentry:
1125 	allow_write_access(interpreter);
1126 	if (interpreter)
1127 		fput(interpreter);
1128 out_free_interp:
1129 	kfree(elf_interpreter);
1130 out_free_file:
1131 	sys_close(elf_exec_fileno);
1132 out_free_fh:
1133 	if (files)
1134 		reset_files_struct(current, files);
1135 out_free_ph:
1136 	kfree(elf_phdata);
1137 	goto out;
1138 }
1139 
1140 /* This is really simpleminded and specialized - we are loading an
1141    a.out library that is given an ELF header. */
1142 static int load_elf_library(struct file *file)
1143 {
1144 	struct elf_phdr *elf_phdata;
1145 	struct elf_phdr *eppnt;
1146 	unsigned long elf_bss, bss, len;
1147 	int retval, error, i, j;
1148 	struct elfhdr elf_ex;
1149 
1150 	error = -ENOEXEC;
1151 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1152 	if (retval != sizeof(elf_ex))
1153 		goto out;
1154 
1155 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1156 		goto out;
1157 
1158 	/* First of all, some simple consistency checks */
1159 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1160 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1161 		goto out;
1162 
1163 	/* Now read in all of the header information */
1164 
1165 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1166 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1167 
1168 	error = -ENOMEM;
1169 	elf_phdata = kmalloc(j, GFP_KERNEL);
1170 	if (!elf_phdata)
1171 		goto out;
1172 
1173 	eppnt = elf_phdata;
1174 	error = -ENOEXEC;
1175 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1176 	if (retval != j)
1177 		goto out_free_ph;
1178 
1179 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1180 		if ((eppnt + i)->p_type == PT_LOAD)
1181 			j++;
1182 	if (j != 1)
1183 		goto out_free_ph;
1184 
1185 	while (eppnt->p_type != PT_LOAD)
1186 		eppnt++;
1187 
1188 	/* Now use mmap to map the library into memory. */
1189 	down_write(&current->mm->mmap_sem);
1190 	error = do_mmap(file,
1191 			ELF_PAGESTART(eppnt->p_vaddr),
1192 			(eppnt->p_filesz +
1193 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1194 			PROT_READ | PROT_WRITE | PROT_EXEC,
1195 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1196 			(eppnt->p_offset -
1197 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1198 	up_write(&current->mm->mmap_sem);
1199 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1200 		goto out_free_ph;
1201 
1202 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1203 	if (padzero(elf_bss)) {
1204 		error = -EFAULT;
1205 		goto out_free_ph;
1206 	}
1207 
1208 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1209 			    ELF_MIN_ALIGN - 1);
1210 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1211 	if (bss > len) {
1212 		down_write(&current->mm->mmap_sem);
1213 		do_brk(len, bss - len);
1214 		up_write(&current->mm->mmap_sem);
1215 	}
1216 	error = 0;
1217 
1218 out_free_ph:
1219 	kfree(elf_phdata);
1220 out:
1221 	return error;
1222 }
1223 
1224 /*
1225  * Note that some platforms still use traditional core dumps and not
1226  * the ELF core dump.  Each platform can select it as appropriate.
1227  */
1228 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1229 
1230 /*
1231  * ELF core dumper
1232  *
1233  * Modelled on fs/exec.c:aout_core_dump()
1234  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1235  */
1236 /*
1237  * These are the only things you should do on a core-file: use only these
1238  * functions to write out all the necessary info.
1239  */
1240 static int dump_write(struct file *file, const void *addr, int nr)
1241 {
1242 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1243 }
1244 
1245 static int dump_seek(struct file *file, loff_t off)
1246 {
1247 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1248 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1249 			return 0;
1250 	} else {
1251 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1252 		if (!buf)
1253 			return 0;
1254 		while (off > 0) {
1255 			unsigned long n = off;
1256 			if (n > PAGE_SIZE)
1257 				n = PAGE_SIZE;
1258 			if (!dump_write(file, buf, n))
1259 				return 0;
1260 			off -= n;
1261 		}
1262 		free_page((unsigned long)buf);
1263 	}
1264 	return 1;
1265 }
1266 
1267 /*
1268  * Decide what to dump of a segment, part, all or none.
1269  */
1270 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1271 				   unsigned long mm_flags)
1272 {
1273 	/* The vma can be set up to tell us the answer directly.  */
1274 	if (vma->vm_flags & VM_ALWAYSDUMP)
1275 		goto whole;
1276 
1277 	/* Do not dump I/O mapped devices or special mappings */
1278 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1279 		return 0;
1280 
1281 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1282 
1283 	/* By default, dump shared memory if mapped from an anonymous file. */
1284 	if (vma->vm_flags & VM_SHARED) {
1285 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1286 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1287 			goto whole;
1288 		return 0;
1289 	}
1290 
1291 	/* Dump segments that have been written to.  */
1292 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1293 		goto whole;
1294 	if (vma->vm_file == NULL)
1295 		return 0;
1296 
1297 	if (FILTER(MAPPED_PRIVATE))
1298 		goto whole;
1299 
1300 	/*
1301 	 * If this looks like the beginning of a DSO or executable mapping,
1302 	 * check for an ELF header.  If we find one, dump the first page to
1303 	 * aid in determining what was mapped here.
1304 	 */
1305 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1306 		u32 __user *header = (u32 __user *) vma->vm_start;
1307 		u32 word;
1308 		/*
1309 		 * Doing it this way gets the constant folded by GCC.
1310 		 */
1311 		union {
1312 			u32 cmp;
1313 			char elfmag[SELFMAG];
1314 		} magic;
1315 		BUILD_BUG_ON(SELFMAG != sizeof word);
1316 		magic.elfmag[EI_MAG0] = ELFMAG0;
1317 		magic.elfmag[EI_MAG1] = ELFMAG1;
1318 		magic.elfmag[EI_MAG2] = ELFMAG2;
1319 		magic.elfmag[EI_MAG3] = ELFMAG3;
1320 		if (get_user(word, header) == 0 && word == magic.cmp)
1321 			return PAGE_SIZE;
1322 	}
1323 
1324 #undef	FILTER
1325 
1326 	return 0;
1327 
1328 whole:
1329 	return vma->vm_end - vma->vm_start;
1330 }
1331 
1332 /* An ELF note in memory */
1333 struct memelfnote
1334 {
1335 	const char *name;
1336 	int type;
1337 	unsigned int datasz;
1338 	void *data;
1339 };
1340 
1341 static int notesize(struct memelfnote *en)
1342 {
1343 	int sz;
1344 
1345 	sz = sizeof(struct elf_note);
1346 	sz += roundup(strlen(en->name) + 1, 4);
1347 	sz += roundup(en->datasz, 4);
1348 
1349 	return sz;
1350 }
1351 
1352 #define DUMP_WRITE(addr, nr, foffset)	\
1353 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1354 
1355 static int alignfile(struct file *file, loff_t *foffset)
1356 {
1357 	static const char buf[4] = { 0, };
1358 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1359 	return 1;
1360 }
1361 
1362 static int writenote(struct memelfnote *men, struct file *file,
1363 			loff_t *foffset)
1364 {
1365 	struct elf_note en;
1366 	en.n_namesz = strlen(men->name) + 1;
1367 	en.n_descsz = men->datasz;
1368 	en.n_type = men->type;
1369 
1370 	DUMP_WRITE(&en, sizeof(en), foffset);
1371 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1372 	if (!alignfile(file, foffset))
1373 		return 0;
1374 	DUMP_WRITE(men->data, men->datasz, foffset);
1375 	if (!alignfile(file, foffset))
1376 		return 0;
1377 
1378 	return 1;
1379 }
1380 #undef DUMP_WRITE
1381 
1382 #define DUMP_WRITE(addr, nr)	\
1383 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1384 		goto end_coredump;
1385 #define DUMP_SEEK(off)	\
1386 	if (!dump_seek(file, (off))) \
1387 		goto end_coredump;
1388 
1389 static void fill_elf_header(struct elfhdr *elf, int segs,
1390 			    u16 machine, u32 flags, u8 osabi)
1391 {
1392 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1393 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1394 	elf->e_ident[EI_DATA] = ELF_DATA;
1395 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1396 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1397 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1398 
1399 	elf->e_type = ET_CORE;
1400 	elf->e_machine = machine;
1401 	elf->e_version = EV_CURRENT;
1402 	elf->e_entry = 0;
1403 	elf->e_phoff = sizeof(struct elfhdr);
1404 	elf->e_shoff = 0;
1405 	elf->e_flags = flags;
1406 	elf->e_ehsize = sizeof(struct elfhdr);
1407 	elf->e_phentsize = sizeof(struct elf_phdr);
1408 	elf->e_phnum = segs;
1409 	elf->e_shentsize = 0;
1410 	elf->e_shnum = 0;
1411 	elf->e_shstrndx = 0;
1412 	return;
1413 }
1414 
1415 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1416 {
1417 	phdr->p_type = PT_NOTE;
1418 	phdr->p_offset = offset;
1419 	phdr->p_vaddr = 0;
1420 	phdr->p_paddr = 0;
1421 	phdr->p_filesz = sz;
1422 	phdr->p_memsz = 0;
1423 	phdr->p_flags = 0;
1424 	phdr->p_align = 0;
1425 	return;
1426 }
1427 
1428 static void fill_note(struct memelfnote *note, const char *name, int type,
1429 		unsigned int sz, void *data)
1430 {
1431 	note->name = name;
1432 	note->type = type;
1433 	note->datasz = sz;
1434 	note->data = data;
1435 	return;
1436 }
1437 
1438 /*
1439  * fill up all the fields in prstatus from the given task struct, except
1440  * registers which need to be filled up separately.
1441  */
1442 static void fill_prstatus(struct elf_prstatus *prstatus,
1443 		struct task_struct *p, long signr)
1444 {
1445 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1446 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1447 	prstatus->pr_sighold = p->blocked.sig[0];
1448 	prstatus->pr_pid = task_pid_vnr(p);
1449 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1450 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1451 	prstatus->pr_sid = task_session_vnr(p);
1452 	if (thread_group_leader(p)) {
1453 		/*
1454 		 * This is the record for the group leader.  Add in the
1455 		 * cumulative times of previous dead threads.  This total
1456 		 * won't include the time of each live thread whose state
1457 		 * is included in the core dump.  The final total reported
1458 		 * to our parent process when it calls wait4 will include
1459 		 * those sums as well as the little bit more time it takes
1460 		 * this and each other thread to finish dying after the
1461 		 * core dump synchronization phase.
1462 		 */
1463 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1464 				   &prstatus->pr_utime);
1465 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1466 				   &prstatus->pr_stime);
1467 	} else {
1468 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1469 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1470 	}
1471 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1472 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1473 }
1474 
1475 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1476 		       struct mm_struct *mm)
1477 {
1478 	unsigned int i, len;
1479 
1480 	/* first copy the parameters from user space */
1481 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1482 
1483 	len = mm->arg_end - mm->arg_start;
1484 	if (len >= ELF_PRARGSZ)
1485 		len = ELF_PRARGSZ-1;
1486 	if (copy_from_user(&psinfo->pr_psargs,
1487 		           (const char __user *)mm->arg_start, len))
1488 		return -EFAULT;
1489 	for(i = 0; i < len; i++)
1490 		if (psinfo->pr_psargs[i] == 0)
1491 			psinfo->pr_psargs[i] = ' ';
1492 	psinfo->pr_psargs[len] = 0;
1493 
1494 	psinfo->pr_pid = task_pid_vnr(p);
1495 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1496 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1497 	psinfo->pr_sid = task_session_vnr(p);
1498 
1499 	i = p->state ? ffz(~p->state) + 1 : 0;
1500 	psinfo->pr_state = i;
1501 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1502 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1503 	psinfo->pr_nice = task_nice(p);
1504 	psinfo->pr_flag = p->flags;
1505 	SET_UID(psinfo->pr_uid, p->uid);
1506 	SET_GID(psinfo->pr_gid, p->gid);
1507 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1508 
1509 	return 0;
1510 }
1511 
1512 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1513 {
1514 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1515 	int i = 0;
1516 	do
1517 		i += 2;
1518 	while (auxv[i - 2] != AT_NULL);
1519 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1520 }
1521 
1522 #ifdef CORE_DUMP_USE_REGSET
1523 #include <linux/regset.h>
1524 
1525 struct elf_thread_core_info {
1526 	struct elf_thread_core_info *next;
1527 	struct task_struct *task;
1528 	struct elf_prstatus prstatus;
1529 	struct memelfnote notes[0];
1530 };
1531 
1532 struct elf_note_info {
1533 	struct elf_thread_core_info *thread;
1534 	struct memelfnote psinfo;
1535 	struct memelfnote auxv;
1536 	size_t size;
1537 	int thread_notes;
1538 };
1539 
1540 static int fill_thread_core_info(struct elf_thread_core_info *t,
1541 				 const struct user_regset_view *view,
1542 				 long signr, size_t *total)
1543 {
1544 	unsigned int i;
1545 
1546 	/*
1547 	 * NT_PRSTATUS is the one special case, because the regset data
1548 	 * goes into the pr_reg field inside the note contents, rather
1549 	 * than being the whole note contents.  We fill the reset in here.
1550 	 * We assume that regset 0 is NT_PRSTATUS.
1551 	 */
1552 	fill_prstatus(&t->prstatus, t->task, signr);
1553 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1554 				    0, sizeof(t->prstatus.pr_reg),
1555 				    &t->prstatus.pr_reg, NULL);
1556 
1557 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1558 		  sizeof(t->prstatus), &t->prstatus);
1559 	*total += notesize(&t->notes[0]);
1560 
1561 	/*
1562 	 * Each other regset might generate a note too.  For each regset
1563 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1564 	 * all zero and we'll know to skip writing it later.
1565 	 */
1566 	for (i = 1; i < view->n; ++i) {
1567 		const struct user_regset *regset = &view->regsets[i];
1568 		if (regset->core_note_type &&
1569 		    (!regset->active || regset->active(t->task, regset))) {
1570 			int ret;
1571 			size_t size = regset->n * regset->size;
1572 			void *data = kmalloc(size, GFP_KERNEL);
1573 			if (unlikely(!data))
1574 				return 0;
1575 			ret = regset->get(t->task, regset,
1576 					  0, size, data, NULL);
1577 			if (unlikely(ret))
1578 				kfree(data);
1579 			else {
1580 				if (regset->core_note_type != NT_PRFPREG)
1581 					fill_note(&t->notes[i], "LINUX",
1582 						  regset->core_note_type,
1583 						  size, data);
1584 				else {
1585 					t->prstatus.pr_fpvalid = 1;
1586 					fill_note(&t->notes[i], "CORE",
1587 						  NT_PRFPREG, size, data);
1588 				}
1589 				*total += notesize(&t->notes[i]);
1590 			}
1591 		}
1592 	}
1593 
1594 	return 1;
1595 }
1596 
1597 static int fill_note_info(struct elfhdr *elf, int phdrs,
1598 			  struct elf_note_info *info,
1599 			  long signr, struct pt_regs *regs)
1600 {
1601 	struct task_struct *dump_task = current;
1602 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1603 	struct elf_thread_core_info *t;
1604 	struct elf_prpsinfo *psinfo;
1605 	struct task_struct *g, *p;
1606 	unsigned int i;
1607 
1608 	info->size = 0;
1609 	info->thread = NULL;
1610 
1611 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1612 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1613 
1614 	if (psinfo == NULL)
1615 		return 0;
1616 
1617 	/*
1618 	 * Figure out how many notes we're going to need for each thread.
1619 	 */
1620 	info->thread_notes = 0;
1621 	for (i = 0; i < view->n; ++i)
1622 		if (view->regsets[i].core_note_type != 0)
1623 			++info->thread_notes;
1624 
1625 	/*
1626 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1627 	 * since it is our one special case.
1628 	 */
1629 	if (unlikely(info->thread_notes == 0) ||
1630 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1631 		WARN_ON(1);
1632 		return 0;
1633 	}
1634 
1635 	/*
1636 	 * Initialize the ELF file header.
1637 	 */
1638 	fill_elf_header(elf, phdrs,
1639 			view->e_machine, view->e_flags, view->ei_osabi);
1640 
1641 	/*
1642 	 * Allocate a structure for each thread.
1643 	 */
1644 	rcu_read_lock();
1645 	do_each_thread(g, p)
1646 		if (p->mm == dump_task->mm) {
1647 			t = kzalloc(offsetof(struct elf_thread_core_info,
1648 					     notes[info->thread_notes]),
1649 				    GFP_ATOMIC);
1650 			if (unlikely(!t)) {
1651 				rcu_read_unlock();
1652 				return 0;
1653 			}
1654 			t->task = p;
1655 			if (p == dump_task || !info->thread) {
1656 				t->next = info->thread;
1657 				info->thread = t;
1658 			} else {
1659 				/*
1660 				 * Make sure to keep the original task at
1661 				 * the head of the list.
1662 				 */
1663 				t->next = info->thread->next;
1664 				info->thread->next = t;
1665 			}
1666 		}
1667 	while_each_thread(g, p);
1668 	rcu_read_unlock();
1669 
1670 	/*
1671 	 * Now fill in each thread's information.
1672 	 */
1673 	for (t = info->thread; t != NULL; t = t->next)
1674 		if (!fill_thread_core_info(t, view, signr, &info->size))
1675 			return 0;
1676 
1677 	/*
1678 	 * Fill in the two process-wide notes.
1679 	 */
1680 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1681 	info->size += notesize(&info->psinfo);
1682 
1683 	fill_auxv_note(&info->auxv, current->mm);
1684 	info->size += notesize(&info->auxv);
1685 
1686 	return 1;
1687 }
1688 
1689 static size_t get_note_info_size(struct elf_note_info *info)
1690 {
1691 	return info->size;
1692 }
1693 
1694 /*
1695  * Write all the notes for each thread.  When writing the first thread, the
1696  * process-wide notes are interleaved after the first thread-specific note.
1697  */
1698 static int write_note_info(struct elf_note_info *info,
1699 			   struct file *file, loff_t *foffset)
1700 {
1701 	bool first = 1;
1702 	struct elf_thread_core_info *t = info->thread;
1703 
1704 	do {
1705 		int i;
1706 
1707 		if (!writenote(&t->notes[0], file, foffset))
1708 			return 0;
1709 
1710 		if (first && !writenote(&info->psinfo, file, foffset))
1711 			return 0;
1712 		if (first && !writenote(&info->auxv, file, foffset))
1713 			return 0;
1714 
1715 		for (i = 1; i < info->thread_notes; ++i)
1716 			if (t->notes[i].data &&
1717 			    !writenote(&t->notes[i], file, foffset))
1718 				return 0;
1719 
1720 		first = 0;
1721 		t = t->next;
1722 	} while (t);
1723 
1724 	return 1;
1725 }
1726 
1727 static void free_note_info(struct elf_note_info *info)
1728 {
1729 	struct elf_thread_core_info *threads = info->thread;
1730 	while (threads) {
1731 		unsigned int i;
1732 		struct elf_thread_core_info *t = threads;
1733 		threads = t->next;
1734 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1735 		for (i = 1; i < info->thread_notes; ++i)
1736 			kfree(t->notes[i].data);
1737 		kfree(t);
1738 	}
1739 	kfree(info->psinfo.data);
1740 }
1741 
1742 #else
1743 
1744 /* Here is the structure in which status of each thread is captured. */
1745 struct elf_thread_status
1746 {
1747 	struct list_head list;
1748 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1749 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1750 	struct task_struct *thread;
1751 #ifdef ELF_CORE_COPY_XFPREGS
1752 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1753 #endif
1754 	struct memelfnote notes[3];
1755 	int num_notes;
1756 };
1757 
1758 /*
1759  * In order to add the specific thread information for the elf file format,
1760  * we need to keep a linked list of every threads pr_status and then create
1761  * a single section for them in the final core file.
1762  */
1763 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1764 {
1765 	int sz = 0;
1766 	struct task_struct *p = t->thread;
1767 	t->num_notes = 0;
1768 
1769 	fill_prstatus(&t->prstatus, p, signr);
1770 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1771 
1772 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1773 		  &(t->prstatus));
1774 	t->num_notes++;
1775 	sz += notesize(&t->notes[0]);
1776 
1777 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1778 								&t->fpu))) {
1779 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1780 			  &(t->fpu));
1781 		t->num_notes++;
1782 		sz += notesize(&t->notes[1]);
1783 	}
1784 
1785 #ifdef ELF_CORE_COPY_XFPREGS
1786 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1787 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1788 			  sizeof(t->xfpu), &t->xfpu);
1789 		t->num_notes++;
1790 		sz += notesize(&t->notes[2]);
1791 	}
1792 #endif
1793 	return sz;
1794 }
1795 
1796 struct elf_note_info {
1797 	struct memelfnote *notes;
1798 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1799 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1800 	struct list_head thread_list;
1801 	elf_fpregset_t *fpu;
1802 #ifdef ELF_CORE_COPY_XFPREGS
1803 	elf_fpxregset_t *xfpu;
1804 #endif
1805 	int thread_status_size;
1806 	int numnote;
1807 };
1808 
1809 static int fill_note_info(struct elfhdr *elf, int phdrs,
1810 			  struct elf_note_info *info,
1811 			  long signr, struct pt_regs *regs)
1812 {
1813 #define	NUM_NOTES	6
1814 	struct list_head *t;
1815 	struct task_struct *g, *p;
1816 
1817 	info->notes = NULL;
1818 	info->prstatus = NULL;
1819 	info->psinfo = NULL;
1820 	info->fpu = NULL;
1821 #ifdef ELF_CORE_COPY_XFPREGS
1822 	info->xfpu = NULL;
1823 #endif
1824 	INIT_LIST_HEAD(&info->thread_list);
1825 
1826 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1827 			      GFP_KERNEL);
1828 	if (!info->notes)
1829 		return 0;
1830 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1831 	if (!info->psinfo)
1832 		return 0;
1833 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1834 	if (!info->prstatus)
1835 		return 0;
1836 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1837 	if (!info->fpu)
1838 		return 0;
1839 #ifdef ELF_CORE_COPY_XFPREGS
1840 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1841 	if (!info->xfpu)
1842 		return 0;
1843 #endif
1844 
1845 	info->thread_status_size = 0;
1846 	if (signr) {
1847 		struct elf_thread_status *tmp;
1848 		rcu_read_lock();
1849 		do_each_thread(g, p)
1850 			if (current->mm == p->mm && current != p) {
1851 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1852 				if (!tmp) {
1853 					rcu_read_unlock();
1854 					return 0;
1855 				}
1856 				tmp->thread = p;
1857 				list_add(&tmp->list, &info->thread_list);
1858 			}
1859 		while_each_thread(g, p);
1860 		rcu_read_unlock();
1861 		list_for_each(t, &info->thread_list) {
1862 			struct elf_thread_status *tmp;
1863 			int sz;
1864 
1865 			tmp = list_entry(t, struct elf_thread_status, list);
1866 			sz = elf_dump_thread_status(signr, tmp);
1867 			info->thread_status_size += sz;
1868 		}
1869 	}
1870 	/* now collect the dump for the current */
1871 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1872 	fill_prstatus(info->prstatus, current, signr);
1873 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1874 
1875 	/* Set up header */
1876 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1877 
1878 	/*
1879 	 * Set up the notes in similar form to SVR4 core dumps made
1880 	 * with info from their /proc.
1881 	 */
1882 
1883 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1884 		  sizeof(*info->prstatus), info->prstatus);
1885 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1886 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1887 		  sizeof(*info->psinfo), info->psinfo);
1888 
1889 	info->numnote = 2;
1890 
1891 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1892 
1893 	/* Try to dump the FPU. */
1894 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1895 							       info->fpu);
1896 	if (info->prstatus->pr_fpvalid)
1897 		fill_note(info->notes + info->numnote++,
1898 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1899 #ifdef ELF_CORE_COPY_XFPREGS
1900 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1901 		fill_note(info->notes + info->numnote++,
1902 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1903 			  sizeof(*info->xfpu), info->xfpu);
1904 #endif
1905 
1906 	return 1;
1907 
1908 #undef NUM_NOTES
1909 }
1910 
1911 static size_t get_note_info_size(struct elf_note_info *info)
1912 {
1913 	int sz = 0;
1914 	int i;
1915 
1916 	for (i = 0; i < info->numnote; i++)
1917 		sz += notesize(info->notes + i);
1918 
1919 	sz += info->thread_status_size;
1920 
1921 	return sz;
1922 }
1923 
1924 static int write_note_info(struct elf_note_info *info,
1925 			   struct file *file, loff_t *foffset)
1926 {
1927 	int i;
1928 	struct list_head *t;
1929 
1930 	for (i = 0; i < info->numnote; i++)
1931 		if (!writenote(info->notes + i, file, foffset))
1932 			return 0;
1933 
1934 	/* write out the thread status notes section */
1935 	list_for_each(t, &info->thread_list) {
1936 		struct elf_thread_status *tmp =
1937 				list_entry(t, struct elf_thread_status, list);
1938 
1939 		for (i = 0; i < tmp->num_notes; i++)
1940 			if (!writenote(&tmp->notes[i], file, foffset))
1941 				return 0;
1942 	}
1943 
1944 	return 1;
1945 }
1946 
1947 static void free_note_info(struct elf_note_info *info)
1948 {
1949 	while (!list_empty(&info->thread_list)) {
1950 		struct list_head *tmp = info->thread_list.next;
1951 		list_del(tmp);
1952 		kfree(list_entry(tmp, struct elf_thread_status, list));
1953 	}
1954 
1955 	kfree(info->prstatus);
1956 	kfree(info->psinfo);
1957 	kfree(info->notes);
1958 	kfree(info->fpu);
1959 #ifdef ELF_CORE_COPY_XFPREGS
1960 	kfree(info->xfpu);
1961 #endif
1962 }
1963 
1964 #endif
1965 
1966 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1967 					struct vm_area_struct *gate_vma)
1968 {
1969 	struct vm_area_struct *ret = tsk->mm->mmap;
1970 
1971 	if (ret)
1972 		return ret;
1973 	return gate_vma;
1974 }
1975 /*
1976  * Helper function for iterating across a vma list.  It ensures that the caller
1977  * will visit `gate_vma' prior to terminating the search.
1978  */
1979 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1980 					struct vm_area_struct *gate_vma)
1981 {
1982 	struct vm_area_struct *ret;
1983 
1984 	ret = this_vma->vm_next;
1985 	if (ret)
1986 		return ret;
1987 	if (this_vma == gate_vma)
1988 		return NULL;
1989 	return gate_vma;
1990 }
1991 
1992 /*
1993  * Actual dumper
1994  *
1995  * This is a two-pass process; first we find the offsets of the bits,
1996  * and then they are actually written out.  If we run out of core limit
1997  * we just truncate.
1998  */
1999 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
2000 {
2001 	int has_dumped = 0;
2002 	mm_segment_t fs;
2003 	int segs;
2004 	size_t size = 0;
2005 	struct vm_area_struct *vma, *gate_vma;
2006 	struct elfhdr *elf = NULL;
2007 	loff_t offset = 0, dataoff, foffset;
2008 	unsigned long mm_flags;
2009 	struct elf_note_info info;
2010 
2011 	/*
2012 	 * We no longer stop all VM operations.
2013 	 *
2014 	 * This is because those proceses that could possibly change map_count
2015 	 * or the mmap / vma pages are now blocked in do_exit on current
2016 	 * finishing this core dump.
2017 	 *
2018 	 * Only ptrace can touch these memory addresses, but it doesn't change
2019 	 * the map_count or the pages allocated. So no possibility of crashing
2020 	 * exists while dumping the mm->vm_next areas to the core file.
2021 	 */
2022 
2023 	/* alloc memory for large data structures: too large to be on stack */
2024 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2025 	if (!elf)
2026 		goto cleanup;
2027 
2028 	segs = current->mm->map_count;
2029 #ifdef ELF_CORE_EXTRA_PHDRS
2030 	segs += ELF_CORE_EXTRA_PHDRS;
2031 #endif
2032 
2033 	gate_vma = get_gate_vma(current);
2034 	if (gate_vma != NULL)
2035 		segs++;
2036 
2037 	/*
2038 	 * Collect all the non-memory information about the process for the
2039 	 * notes.  This also sets up the file header.
2040 	 */
2041 	if (!fill_note_info(elf, segs + 1, /* including notes section */
2042 			    &info, signr, regs))
2043 		goto cleanup;
2044 
2045 	has_dumped = 1;
2046 	current->flags |= PF_DUMPCORE;
2047 
2048 	fs = get_fs();
2049 	set_fs(KERNEL_DS);
2050 
2051 	DUMP_WRITE(elf, sizeof(*elf));
2052 	offset += sizeof(*elf);				/* Elf header */
2053 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
2054 	foffset = offset;
2055 
2056 	/* Write notes phdr entry */
2057 	{
2058 		struct elf_phdr phdr;
2059 		size_t sz = get_note_info_size(&info);
2060 
2061 		sz += elf_coredump_extra_notes_size();
2062 
2063 		fill_elf_note_phdr(&phdr, sz, offset);
2064 		offset += sz;
2065 		DUMP_WRITE(&phdr, sizeof(phdr));
2066 	}
2067 
2068 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2069 
2070 	/*
2071 	 * We must use the same mm->flags while dumping core to avoid
2072 	 * inconsistency between the program headers and bodies, otherwise an
2073 	 * unusable core file can be generated.
2074 	 */
2075 	mm_flags = current->mm->flags;
2076 
2077 	/* Write program headers for segments dump */
2078 	for (vma = first_vma(current, gate_vma); vma != NULL;
2079 			vma = next_vma(vma, gate_vma)) {
2080 		struct elf_phdr phdr;
2081 
2082 		phdr.p_type = PT_LOAD;
2083 		phdr.p_offset = offset;
2084 		phdr.p_vaddr = vma->vm_start;
2085 		phdr.p_paddr = 0;
2086 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
2087 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2088 		offset += phdr.p_filesz;
2089 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2090 		if (vma->vm_flags & VM_WRITE)
2091 			phdr.p_flags |= PF_W;
2092 		if (vma->vm_flags & VM_EXEC)
2093 			phdr.p_flags |= PF_X;
2094 		phdr.p_align = ELF_EXEC_PAGESIZE;
2095 
2096 		DUMP_WRITE(&phdr, sizeof(phdr));
2097 	}
2098 
2099 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2100 	ELF_CORE_WRITE_EXTRA_PHDRS;
2101 #endif
2102 
2103  	/* write out the notes section */
2104 	if (!write_note_info(&info, file, &foffset))
2105 		goto end_coredump;
2106 
2107 	if (elf_coredump_extra_notes_write(file, &foffset))
2108 		goto end_coredump;
2109 
2110 	/* Align to page */
2111 	DUMP_SEEK(dataoff - foffset);
2112 
2113 	for (vma = first_vma(current, gate_vma); vma != NULL;
2114 			vma = next_vma(vma, gate_vma)) {
2115 		unsigned long addr;
2116 		unsigned long end;
2117 
2118 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2119 
2120 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2121 			struct page *page;
2122 			struct vm_area_struct *vma;
2123 
2124 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2125 						&page, &vma) <= 0) {
2126 				DUMP_SEEK(PAGE_SIZE);
2127 			} else {
2128 				if (page == ZERO_PAGE(0)) {
2129 					if (!dump_seek(file, PAGE_SIZE)) {
2130 						page_cache_release(page);
2131 						goto end_coredump;
2132 					}
2133 				} else {
2134 					void *kaddr;
2135 					flush_cache_page(vma, addr,
2136 							 page_to_pfn(page));
2137 					kaddr = kmap(page);
2138 					if ((size += PAGE_SIZE) > limit ||
2139 					    !dump_write(file, kaddr,
2140 					    PAGE_SIZE)) {
2141 						kunmap(page);
2142 						page_cache_release(page);
2143 						goto end_coredump;
2144 					}
2145 					kunmap(page);
2146 				}
2147 				page_cache_release(page);
2148 			}
2149 		}
2150 	}
2151 
2152 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2153 	ELF_CORE_WRITE_EXTRA_DATA;
2154 #endif
2155 
2156 end_coredump:
2157 	set_fs(fs);
2158 
2159 cleanup:
2160 	kfree(elf);
2161 	free_note_info(&info);
2162 	return has_dumped;
2163 }
2164 
2165 #endif		/* USE_ELF_CORE_DUMP */
2166 
2167 static int __init init_elf_binfmt(void)
2168 {
2169 	return register_binfmt(&elf_format);
2170 }
2171 
2172 static void __exit exit_elf_binfmt(void)
2173 {
2174 	/* Remove the COFF and ELF loaders. */
2175 	unregister_binfmt(&elf_format);
2176 }
2177 
2178 core_initcall(init_elf_binfmt);
2179 module_exit(exit_elf_binfmt);
2180 MODULE_LICENSE("GPL");
2181