xref: /openbmc/linux/fs/binfmt_elf.c (revision 3366e358)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42 				int, int, unsigned long);
43 
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump	NULL
52 #endif
53 
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN	PAGE_SIZE
58 #endif
59 
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS	0
62 #endif
63 
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67 
68 static struct linux_binfmt elf_format = {
69 		.module		= THIS_MODULE,
70 		.load_binary	= load_elf_binary,
71 		.load_shlib	= load_elf_library,
72 		.core_dump	= elf_core_dump,
73 		.min_coredump	= ELF_EXEC_PAGESIZE,
74 		.hasvdso	= 1
75 };
76 
77 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78 
79 static int set_brk(unsigned long start, unsigned long end)
80 {
81 	start = ELF_PAGEALIGN(start);
82 	end = ELF_PAGEALIGN(end);
83 	if (end > start) {
84 		unsigned long addr;
85 		down_write(&current->mm->mmap_sem);
86 		addr = do_brk(start, end - start);
87 		up_write(&current->mm->mmap_sem);
88 		if (BAD_ADDR(addr))
89 			return addr;
90 	}
91 	current->mm->start_brk = current->mm->brk = end;
92 	return 0;
93 }
94 
95 /* We need to explicitly zero any fractional pages
96    after the data section (i.e. bss).  This would
97    contain the junk from the file that should not
98    be in memory
99  */
100 static int padzero(unsigned long elf_bss)
101 {
102 	unsigned long nbyte;
103 
104 	nbyte = ELF_PAGEOFFSET(elf_bss);
105 	if (nbyte) {
106 		nbyte = ELF_MIN_ALIGN - nbyte;
107 		if (clear_user((void __user *) elf_bss, nbyte))
108 			return -EFAULT;
109 	}
110 	return 0;
111 }
112 
113 /* Let's use some macros to make this stack manipulation a little clearer */
114 #ifdef CONFIG_STACK_GROWSUP
115 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
116 #define STACK_ROUND(sp, items) \
117 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
118 #define STACK_ALLOC(sp, len) ({ \
119 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
120 	old_sp; })
121 #else
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
123 #define STACK_ROUND(sp, items) \
124 	(((unsigned long) (sp - items)) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
126 #endif
127 
128 #ifndef ELF_BASE_PLATFORM
129 /*
130  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
131  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
132  * will be copied to the user stack in the same manner as AT_PLATFORM.
133  */
134 #define ELF_BASE_PLATFORM NULL
135 #endif
136 
137 static int
138 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
139 		unsigned long load_addr, unsigned long interp_load_addr)
140 {
141 	unsigned long p = bprm->p;
142 	int argc = bprm->argc;
143 	int envc = bprm->envc;
144 	elf_addr_t __user *argv;
145 	elf_addr_t __user *envp;
146 	elf_addr_t __user *sp;
147 	elf_addr_t __user *u_platform;
148 	elf_addr_t __user *u_base_platform;
149 	elf_addr_t __user *u_rand_bytes;
150 	const char *k_platform = ELF_PLATFORM;
151 	const char *k_base_platform = ELF_BASE_PLATFORM;
152 	unsigned char k_rand_bytes[16];
153 	int items;
154 	elf_addr_t *elf_info;
155 	int ei_index = 0;
156 	const struct cred *cred = current_cred();
157 	struct vm_area_struct *vma;
158 
159 	/*
160 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
161 	 * evictions by the processes running on the same package. One
162 	 * thing we can do is to shuffle the initial stack for them.
163 	 */
164 
165 	p = arch_align_stack(p);
166 
167 	/*
168 	 * If this architecture has a platform capability string, copy it
169 	 * to userspace.  In some cases (Sparc), this info is impossible
170 	 * for userspace to get any other way, in others (i386) it is
171 	 * merely difficult.
172 	 */
173 	u_platform = NULL;
174 	if (k_platform) {
175 		size_t len = strlen(k_platform) + 1;
176 
177 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
178 		if (__copy_to_user(u_platform, k_platform, len))
179 			return -EFAULT;
180 	}
181 
182 	/*
183 	 * If this architecture has a "base" platform capability
184 	 * string, copy it to userspace.
185 	 */
186 	u_base_platform = NULL;
187 	if (k_base_platform) {
188 		size_t len = strlen(k_base_platform) + 1;
189 
190 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 		if (__copy_to_user(u_base_platform, k_base_platform, len))
192 			return -EFAULT;
193 	}
194 
195 	/*
196 	 * Generate 16 random bytes for userspace PRNG seeding.
197 	 */
198 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
199 	u_rand_bytes = (elf_addr_t __user *)
200 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
201 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
202 		return -EFAULT;
203 
204 	/* Create the ELF interpreter info */
205 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
206 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
207 #define NEW_AUX_ENT(id, val) \
208 	do { \
209 		elf_info[ei_index++] = id; \
210 		elf_info[ei_index++] = val; \
211 	} while (0)
212 
213 #ifdef ARCH_DLINFO
214 	/*
215 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
216 	 * AUXV.
217 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
218 	 * ARCH_DLINFO changes
219 	 */
220 	ARCH_DLINFO;
221 #endif
222 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
223 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
224 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
225 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
226 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
227 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
228 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
229 	NEW_AUX_ENT(AT_FLAGS, 0);
230 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
231 	NEW_AUX_ENT(AT_UID, cred->uid);
232 	NEW_AUX_ENT(AT_EUID, cred->euid);
233 	NEW_AUX_ENT(AT_GID, cred->gid);
234 	NEW_AUX_ENT(AT_EGID, cred->egid);
235  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
236 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
237 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
238 	if (k_platform) {
239 		NEW_AUX_ENT(AT_PLATFORM,
240 			    (elf_addr_t)(unsigned long)u_platform);
241 	}
242 	if (k_base_platform) {
243 		NEW_AUX_ENT(AT_BASE_PLATFORM,
244 			    (elf_addr_t)(unsigned long)u_base_platform);
245 	}
246 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
247 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
248 	}
249 #undef NEW_AUX_ENT
250 	/* AT_NULL is zero; clear the rest too */
251 	memset(&elf_info[ei_index], 0,
252 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
253 
254 	/* And advance past the AT_NULL entry.  */
255 	ei_index += 2;
256 
257 	sp = STACK_ADD(p, ei_index);
258 
259 	items = (argc + 1) + (envc + 1) + 1;
260 	bprm->p = STACK_ROUND(sp, items);
261 
262 	/* Point sp at the lowest address on the stack */
263 #ifdef CONFIG_STACK_GROWSUP
264 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
265 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
266 #else
267 	sp = (elf_addr_t __user *)bprm->p;
268 #endif
269 
270 
271 	/*
272 	 * Grow the stack manually; some architectures have a limit on how
273 	 * far ahead a user-space access may be in order to grow the stack.
274 	 */
275 	vma = find_extend_vma(current->mm, bprm->p);
276 	if (!vma)
277 		return -EFAULT;
278 
279 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
280 	if (__put_user(argc, sp++))
281 		return -EFAULT;
282 	argv = sp;
283 	envp = argv + argc + 1;
284 
285 	/* Populate argv and envp */
286 	p = current->mm->arg_end = current->mm->arg_start;
287 	while (argc-- > 0) {
288 		size_t len;
289 		if (__put_user((elf_addr_t)p, argv++))
290 			return -EFAULT;
291 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
292 		if (!len || len > MAX_ARG_STRLEN)
293 			return -EINVAL;
294 		p += len;
295 	}
296 	if (__put_user(0, argv))
297 		return -EFAULT;
298 	current->mm->arg_end = current->mm->env_start = p;
299 	while (envc-- > 0) {
300 		size_t len;
301 		if (__put_user((elf_addr_t)p, envp++))
302 			return -EFAULT;
303 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
304 		if (!len || len > MAX_ARG_STRLEN)
305 			return -EINVAL;
306 		p += len;
307 	}
308 	if (__put_user(0, envp))
309 		return -EFAULT;
310 	current->mm->env_end = p;
311 
312 	/* Put the elf_info on the stack in the right place.  */
313 	sp = (elf_addr_t __user *)envp + 1;
314 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
315 		return -EFAULT;
316 	return 0;
317 }
318 
319 #ifndef elf_map
320 
321 static unsigned long elf_map(struct file *filep, unsigned long addr,
322 		struct elf_phdr *eppnt, int prot, int type,
323 		unsigned long total_size)
324 {
325 	unsigned long map_addr;
326 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
327 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
328 	addr = ELF_PAGESTART(addr);
329 	size = ELF_PAGEALIGN(size);
330 
331 	/* mmap() will return -EINVAL if given a zero size, but a
332 	 * segment with zero filesize is perfectly valid */
333 	if (!size)
334 		return addr;
335 
336 	down_write(&current->mm->mmap_sem);
337 	/*
338 	* total_size is the size of the ELF (interpreter) image.
339 	* The _first_ mmap needs to know the full size, otherwise
340 	* randomization might put this image into an overlapping
341 	* position with the ELF binary image. (since size < total_size)
342 	* So we first map the 'big' image - and unmap the remainder at
343 	* the end. (which unmap is needed for ELF images with holes.)
344 	*/
345 	if (total_size) {
346 		total_size = ELF_PAGEALIGN(total_size);
347 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
348 		if (!BAD_ADDR(map_addr))
349 			do_munmap(current->mm, map_addr+size, total_size-size);
350 	} else
351 		map_addr = do_mmap(filep, addr, size, prot, type, off);
352 
353 	up_write(&current->mm->mmap_sem);
354 	return(map_addr);
355 }
356 
357 #endif /* !elf_map */
358 
359 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
360 {
361 	int i, first_idx = -1, last_idx = -1;
362 
363 	for (i = 0; i < nr; i++) {
364 		if (cmds[i].p_type == PT_LOAD) {
365 			last_idx = i;
366 			if (first_idx == -1)
367 				first_idx = i;
368 		}
369 	}
370 	if (first_idx == -1)
371 		return 0;
372 
373 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
374 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
375 }
376 
377 
378 /* This is much more generalized than the library routine read function,
379    so we keep this separate.  Technically the library read function
380    is only provided so that we can read a.out libraries that have
381    an ELF header */
382 
383 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
384 		struct file *interpreter, unsigned long *interp_map_addr,
385 		unsigned long no_base)
386 {
387 	struct elf_phdr *elf_phdata;
388 	struct elf_phdr *eppnt;
389 	unsigned long load_addr = 0;
390 	int load_addr_set = 0;
391 	unsigned long last_bss = 0, elf_bss = 0;
392 	unsigned long error = ~0UL;
393 	unsigned long total_size;
394 	int retval, i, size;
395 
396 	/* First of all, some simple consistency checks */
397 	if (interp_elf_ex->e_type != ET_EXEC &&
398 	    interp_elf_ex->e_type != ET_DYN)
399 		goto out;
400 	if (!elf_check_arch(interp_elf_ex))
401 		goto out;
402 	if (!interpreter->f_op || !interpreter->f_op->mmap)
403 		goto out;
404 
405 	/*
406 	 * If the size of this structure has changed, then punt, since
407 	 * we will be doing the wrong thing.
408 	 */
409 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
410 		goto out;
411 	if (interp_elf_ex->e_phnum < 1 ||
412 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
413 		goto out;
414 
415 	/* Now read in all of the header information */
416 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
417 	if (size > ELF_MIN_ALIGN)
418 		goto out;
419 	elf_phdata = kmalloc(size, GFP_KERNEL);
420 	if (!elf_phdata)
421 		goto out;
422 
423 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
424 			     (char *)elf_phdata,size);
425 	error = -EIO;
426 	if (retval != size) {
427 		if (retval < 0)
428 			error = retval;
429 		goto out_close;
430 	}
431 
432 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
433 	if (!total_size) {
434 		error = -EINVAL;
435 		goto out_close;
436 	}
437 
438 	eppnt = elf_phdata;
439 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
440 		if (eppnt->p_type == PT_LOAD) {
441 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
442 			int elf_prot = 0;
443 			unsigned long vaddr = 0;
444 			unsigned long k, map_addr;
445 
446 			if (eppnt->p_flags & PF_R)
447 		    		elf_prot = PROT_READ;
448 			if (eppnt->p_flags & PF_W)
449 				elf_prot |= PROT_WRITE;
450 			if (eppnt->p_flags & PF_X)
451 				elf_prot |= PROT_EXEC;
452 			vaddr = eppnt->p_vaddr;
453 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
454 				elf_type |= MAP_FIXED;
455 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
456 				load_addr = -vaddr;
457 
458 			map_addr = elf_map(interpreter, load_addr + vaddr,
459 					eppnt, elf_prot, elf_type, total_size);
460 			total_size = 0;
461 			if (!*interp_map_addr)
462 				*interp_map_addr = map_addr;
463 			error = map_addr;
464 			if (BAD_ADDR(map_addr))
465 				goto out_close;
466 
467 			if (!load_addr_set &&
468 			    interp_elf_ex->e_type == ET_DYN) {
469 				load_addr = map_addr - ELF_PAGESTART(vaddr);
470 				load_addr_set = 1;
471 			}
472 
473 			/*
474 			 * Check to see if the section's size will overflow the
475 			 * allowed task size. Note that p_filesz must always be
476 			 * <= p_memsize so it's only necessary to check p_memsz.
477 			 */
478 			k = load_addr + eppnt->p_vaddr;
479 			if (BAD_ADDR(k) ||
480 			    eppnt->p_filesz > eppnt->p_memsz ||
481 			    eppnt->p_memsz > TASK_SIZE ||
482 			    TASK_SIZE - eppnt->p_memsz < k) {
483 				error = -ENOMEM;
484 				goto out_close;
485 			}
486 
487 			/*
488 			 * Find the end of the file mapping for this phdr, and
489 			 * keep track of the largest address we see for this.
490 			 */
491 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
492 			if (k > elf_bss)
493 				elf_bss = k;
494 
495 			/*
496 			 * Do the same thing for the memory mapping - between
497 			 * elf_bss and last_bss is the bss section.
498 			 */
499 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
500 			if (k > last_bss)
501 				last_bss = k;
502 		}
503 	}
504 
505 	if (last_bss > elf_bss) {
506 		/*
507 		 * Now fill out the bss section.  First pad the last page up
508 		 * to the page boundary, and then perform a mmap to make sure
509 		 * that there are zero-mapped pages up to and including the
510 		 * last bss page.
511 		 */
512 		if (padzero(elf_bss)) {
513 			error = -EFAULT;
514 			goto out_close;
515 		}
516 
517 		/* What we have mapped so far */
518 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
519 
520 		/* Map the last of the bss segment */
521 		down_write(&current->mm->mmap_sem);
522 		error = do_brk(elf_bss, last_bss - elf_bss);
523 		up_write(&current->mm->mmap_sem);
524 		if (BAD_ADDR(error))
525 			goto out_close;
526 	}
527 
528 	error = load_addr;
529 
530 out_close:
531 	kfree(elf_phdata);
532 out:
533 	return error;
534 }
535 
536 /*
537  * These are the functions used to load ELF style executables and shared
538  * libraries.  There is no binary dependent code anywhere else.
539  */
540 
541 #define INTERPRETER_NONE 0
542 #define INTERPRETER_ELF 2
543 
544 #ifndef STACK_RND_MASK
545 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
546 #endif
547 
548 static unsigned long randomize_stack_top(unsigned long stack_top)
549 {
550 	unsigned int random_variable = 0;
551 
552 	if ((current->flags & PF_RANDOMIZE) &&
553 		!(current->personality & ADDR_NO_RANDOMIZE)) {
554 		random_variable = get_random_int() & STACK_RND_MASK;
555 		random_variable <<= PAGE_SHIFT;
556 	}
557 #ifdef CONFIG_STACK_GROWSUP
558 	return PAGE_ALIGN(stack_top) + random_variable;
559 #else
560 	return PAGE_ALIGN(stack_top) - random_variable;
561 #endif
562 }
563 
564 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
565 {
566 	struct file *interpreter = NULL; /* to shut gcc up */
567  	unsigned long load_addr = 0, load_bias = 0;
568 	int load_addr_set = 0;
569 	char * elf_interpreter = NULL;
570 	unsigned long error;
571 	struct elf_phdr *elf_ppnt, *elf_phdata;
572 	unsigned long elf_bss, elf_brk;
573 	int retval, i;
574 	unsigned int size;
575 	unsigned long elf_entry;
576 	unsigned long interp_load_addr = 0;
577 	unsigned long start_code, end_code, start_data, end_data;
578 	unsigned long reloc_func_desc = 0;
579 	int executable_stack = EXSTACK_DEFAULT;
580 	unsigned long def_flags = 0;
581 	struct {
582 		struct elfhdr elf_ex;
583 		struct elfhdr interp_elf_ex;
584 	} *loc;
585 
586 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
587 	if (!loc) {
588 		retval = -ENOMEM;
589 		goto out_ret;
590 	}
591 
592 	/* Get the exec-header */
593 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
594 
595 	retval = -ENOEXEC;
596 	/* First of all, some simple consistency checks */
597 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
598 		goto out;
599 
600 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
601 		goto out;
602 	if (!elf_check_arch(&loc->elf_ex))
603 		goto out;
604 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
605 		goto out;
606 
607 	/* Now read in all of the header information */
608 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
609 		goto out;
610 	if (loc->elf_ex.e_phnum < 1 ||
611 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
612 		goto out;
613 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
614 	retval = -ENOMEM;
615 	elf_phdata = kmalloc(size, GFP_KERNEL);
616 	if (!elf_phdata)
617 		goto out;
618 
619 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
620 			     (char *)elf_phdata, size);
621 	if (retval != size) {
622 		if (retval >= 0)
623 			retval = -EIO;
624 		goto out_free_ph;
625 	}
626 
627 	elf_ppnt = elf_phdata;
628 	elf_bss = 0;
629 	elf_brk = 0;
630 
631 	start_code = ~0UL;
632 	end_code = 0;
633 	start_data = 0;
634 	end_data = 0;
635 
636 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
637 		if (elf_ppnt->p_type == PT_INTERP) {
638 			/* This is the program interpreter used for
639 			 * shared libraries - for now assume that this
640 			 * is an a.out format binary
641 			 */
642 			retval = -ENOEXEC;
643 			if (elf_ppnt->p_filesz > PATH_MAX ||
644 			    elf_ppnt->p_filesz < 2)
645 				goto out_free_ph;
646 
647 			retval = -ENOMEM;
648 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
649 						  GFP_KERNEL);
650 			if (!elf_interpreter)
651 				goto out_free_ph;
652 
653 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
654 					     elf_interpreter,
655 					     elf_ppnt->p_filesz);
656 			if (retval != elf_ppnt->p_filesz) {
657 				if (retval >= 0)
658 					retval = -EIO;
659 				goto out_free_interp;
660 			}
661 			/* make sure path is NULL terminated */
662 			retval = -ENOEXEC;
663 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
664 				goto out_free_interp;
665 
666 			interpreter = open_exec(elf_interpreter);
667 			retval = PTR_ERR(interpreter);
668 			if (IS_ERR(interpreter))
669 				goto out_free_interp;
670 
671 			/*
672 			 * If the binary is not readable then enforce
673 			 * mm->dumpable = 0 regardless of the interpreter's
674 			 * permissions.
675 			 */
676 			if (file_permission(interpreter, MAY_READ) < 0)
677 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
678 
679 			retval = kernel_read(interpreter, 0, bprm->buf,
680 					     BINPRM_BUF_SIZE);
681 			if (retval != BINPRM_BUF_SIZE) {
682 				if (retval >= 0)
683 					retval = -EIO;
684 				goto out_free_dentry;
685 			}
686 
687 			/* Get the exec headers */
688 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
689 			break;
690 		}
691 		elf_ppnt++;
692 	}
693 
694 	elf_ppnt = elf_phdata;
695 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
696 		if (elf_ppnt->p_type == PT_GNU_STACK) {
697 			if (elf_ppnt->p_flags & PF_X)
698 				executable_stack = EXSTACK_ENABLE_X;
699 			else
700 				executable_stack = EXSTACK_DISABLE_X;
701 			break;
702 		}
703 
704 	/* Some simple consistency checks for the interpreter */
705 	if (elf_interpreter) {
706 		retval = -ELIBBAD;
707 		/* Not an ELF interpreter */
708 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
709 			goto out_free_dentry;
710 		/* Verify the interpreter has a valid arch */
711 		if (!elf_check_arch(&loc->interp_elf_ex))
712 			goto out_free_dentry;
713 	}
714 
715 	/* Flush all traces of the currently running executable */
716 	retval = flush_old_exec(bprm);
717 	if (retval)
718 		goto out_free_dentry;
719 
720 	/* OK, This is the point of no return */
721 	current->flags &= ~PF_FORKNOEXEC;
722 	current->mm->def_flags = def_flags;
723 
724 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
725 	   may depend on the personality.  */
726 	SET_PERSONALITY(loc->elf_ex);
727 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
728 		current->personality |= READ_IMPLIES_EXEC;
729 
730 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
731 		current->flags |= PF_RANDOMIZE;
732 
733 	setup_new_exec(bprm);
734 
735 	/* Do this so that we can load the interpreter, if need be.  We will
736 	   change some of these later */
737 	current->mm->free_area_cache = current->mm->mmap_base;
738 	current->mm->cached_hole_size = 0;
739 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
740 				 executable_stack);
741 	if (retval < 0) {
742 		send_sig(SIGKILL, current, 0);
743 		goto out_free_dentry;
744 	}
745 
746 	current->mm->start_stack = bprm->p;
747 
748 	/* Now we do a little grungy work by mmapping the ELF image into
749 	   the correct location in memory. */
750 	for(i = 0, elf_ppnt = elf_phdata;
751 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
752 		int elf_prot = 0, elf_flags;
753 		unsigned long k, vaddr;
754 
755 		if (elf_ppnt->p_type != PT_LOAD)
756 			continue;
757 
758 		if (unlikely (elf_brk > elf_bss)) {
759 			unsigned long nbyte;
760 
761 			/* There was a PT_LOAD segment with p_memsz > p_filesz
762 			   before this one. Map anonymous pages, if needed,
763 			   and clear the area.  */
764 			retval = set_brk (elf_bss + load_bias,
765 					  elf_brk + load_bias);
766 			if (retval) {
767 				send_sig(SIGKILL, current, 0);
768 				goto out_free_dentry;
769 			}
770 			nbyte = ELF_PAGEOFFSET(elf_bss);
771 			if (nbyte) {
772 				nbyte = ELF_MIN_ALIGN - nbyte;
773 				if (nbyte > elf_brk - elf_bss)
774 					nbyte = elf_brk - elf_bss;
775 				if (clear_user((void __user *)elf_bss +
776 							load_bias, nbyte)) {
777 					/*
778 					 * This bss-zeroing can fail if the ELF
779 					 * file specifies odd protections. So
780 					 * we don't check the return value
781 					 */
782 				}
783 			}
784 		}
785 
786 		if (elf_ppnt->p_flags & PF_R)
787 			elf_prot |= PROT_READ;
788 		if (elf_ppnt->p_flags & PF_W)
789 			elf_prot |= PROT_WRITE;
790 		if (elf_ppnt->p_flags & PF_X)
791 			elf_prot |= PROT_EXEC;
792 
793 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
794 
795 		vaddr = elf_ppnt->p_vaddr;
796 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
797 			elf_flags |= MAP_FIXED;
798 		} else if (loc->elf_ex.e_type == ET_DYN) {
799 			/* Try and get dynamic programs out of the way of the
800 			 * default mmap base, as well as whatever program they
801 			 * might try to exec.  This is because the brk will
802 			 * follow the loader, and is not movable.  */
803 #ifdef CONFIG_X86
804 			load_bias = 0;
805 #else
806 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
807 #endif
808 		}
809 
810 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
811 				elf_prot, elf_flags, 0);
812 		if (BAD_ADDR(error)) {
813 			send_sig(SIGKILL, current, 0);
814 			retval = IS_ERR((void *)error) ?
815 				PTR_ERR((void*)error) : -EINVAL;
816 			goto out_free_dentry;
817 		}
818 
819 		if (!load_addr_set) {
820 			load_addr_set = 1;
821 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
822 			if (loc->elf_ex.e_type == ET_DYN) {
823 				load_bias += error -
824 				             ELF_PAGESTART(load_bias + vaddr);
825 				load_addr += load_bias;
826 				reloc_func_desc = load_bias;
827 			}
828 		}
829 		k = elf_ppnt->p_vaddr;
830 		if (k < start_code)
831 			start_code = k;
832 		if (start_data < k)
833 			start_data = k;
834 
835 		/*
836 		 * Check to see if the section's size will overflow the
837 		 * allowed task size. Note that p_filesz must always be
838 		 * <= p_memsz so it is only necessary to check p_memsz.
839 		 */
840 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
841 		    elf_ppnt->p_memsz > TASK_SIZE ||
842 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
843 			/* set_brk can never work. Avoid overflows. */
844 			send_sig(SIGKILL, current, 0);
845 			retval = -EINVAL;
846 			goto out_free_dentry;
847 		}
848 
849 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
850 
851 		if (k > elf_bss)
852 			elf_bss = k;
853 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
854 			end_code = k;
855 		if (end_data < k)
856 			end_data = k;
857 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
858 		if (k > elf_brk)
859 			elf_brk = k;
860 	}
861 
862 	loc->elf_ex.e_entry += load_bias;
863 	elf_bss += load_bias;
864 	elf_brk += load_bias;
865 	start_code += load_bias;
866 	end_code += load_bias;
867 	start_data += load_bias;
868 	end_data += load_bias;
869 
870 	/* Calling set_brk effectively mmaps the pages that we need
871 	 * for the bss and break sections.  We must do this before
872 	 * mapping in the interpreter, to make sure it doesn't wind
873 	 * up getting placed where the bss needs to go.
874 	 */
875 	retval = set_brk(elf_bss, elf_brk);
876 	if (retval) {
877 		send_sig(SIGKILL, current, 0);
878 		goto out_free_dentry;
879 	}
880 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
881 		send_sig(SIGSEGV, current, 0);
882 		retval = -EFAULT; /* Nobody gets to see this, but.. */
883 		goto out_free_dentry;
884 	}
885 
886 	if (elf_interpreter) {
887 		unsigned long uninitialized_var(interp_map_addr);
888 
889 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
890 					    interpreter,
891 					    &interp_map_addr,
892 					    load_bias);
893 		if (!IS_ERR((void *)elf_entry)) {
894 			/*
895 			 * load_elf_interp() returns relocation
896 			 * adjustment
897 			 */
898 			interp_load_addr = elf_entry;
899 			elf_entry += loc->interp_elf_ex.e_entry;
900 		}
901 		if (BAD_ADDR(elf_entry)) {
902 			force_sig(SIGSEGV, current);
903 			retval = IS_ERR((void *)elf_entry) ?
904 					(int)elf_entry : -EINVAL;
905 			goto out_free_dentry;
906 		}
907 		reloc_func_desc = interp_load_addr;
908 
909 		allow_write_access(interpreter);
910 		fput(interpreter);
911 		kfree(elf_interpreter);
912 	} else {
913 		elf_entry = loc->elf_ex.e_entry;
914 		if (BAD_ADDR(elf_entry)) {
915 			force_sig(SIGSEGV, current);
916 			retval = -EINVAL;
917 			goto out_free_dentry;
918 		}
919 	}
920 
921 	kfree(elf_phdata);
922 
923 	set_binfmt(&elf_format);
924 
925 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
926 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
927 	if (retval < 0) {
928 		send_sig(SIGKILL, current, 0);
929 		goto out;
930 	}
931 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
932 
933 	install_exec_creds(bprm);
934 	current->flags &= ~PF_FORKNOEXEC;
935 	retval = create_elf_tables(bprm, &loc->elf_ex,
936 			  load_addr, interp_load_addr);
937 	if (retval < 0) {
938 		send_sig(SIGKILL, current, 0);
939 		goto out;
940 	}
941 	/* N.B. passed_fileno might not be initialized? */
942 	current->mm->end_code = end_code;
943 	current->mm->start_code = start_code;
944 	current->mm->start_data = start_data;
945 	current->mm->end_data = end_data;
946 	current->mm->start_stack = bprm->p;
947 
948 #ifdef arch_randomize_brk
949 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
950 		current->mm->brk = current->mm->start_brk =
951 			arch_randomize_brk(current->mm);
952 #endif
953 
954 	if (current->personality & MMAP_PAGE_ZERO) {
955 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
956 		   and some applications "depend" upon this behavior.
957 		   Since we do not have the power to recompile these, we
958 		   emulate the SVr4 behavior. Sigh. */
959 		down_write(&current->mm->mmap_sem);
960 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
961 				MAP_FIXED | MAP_PRIVATE, 0);
962 		up_write(&current->mm->mmap_sem);
963 	}
964 
965 #ifdef ELF_PLAT_INIT
966 	/*
967 	 * The ABI may specify that certain registers be set up in special
968 	 * ways (on i386 %edx is the address of a DT_FINI function, for
969 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
970 	 * that the e_entry field is the address of the function descriptor
971 	 * for the startup routine, rather than the address of the startup
972 	 * routine itself.  This macro performs whatever initialization to
973 	 * the regs structure is required as well as any relocations to the
974 	 * function descriptor entries when executing dynamically links apps.
975 	 */
976 	ELF_PLAT_INIT(regs, reloc_func_desc);
977 #endif
978 
979 	start_thread(regs, elf_entry, bprm->p);
980 	retval = 0;
981 out:
982 	kfree(loc);
983 out_ret:
984 	return retval;
985 
986 	/* error cleanup */
987 out_free_dentry:
988 	allow_write_access(interpreter);
989 	if (interpreter)
990 		fput(interpreter);
991 out_free_interp:
992 	kfree(elf_interpreter);
993 out_free_ph:
994 	kfree(elf_phdata);
995 	goto out;
996 }
997 
998 /* This is really simpleminded and specialized - we are loading an
999    a.out library that is given an ELF header. */
1000 static int load_elf_library(struct file *file)
1001 {
1002 	struct elf_phdr *elf_phdata;
1003 	struct elf_phdr *eppnt;
1004 	unsigned long elf_bss, bss, len;
1005 	int retval, error, i, j;
1006 	struct elfhdr elf_ex;
1007 
1008 	error = -ENOEXEC;
1009 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1010 	if (retval != sizeof(elf_ex))
1011 		goto out;
1012 
1013 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1014 		goto out;
1015 
1016 	/* First of all, some simple consistency checks */
1017 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1018 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1019 		goto out;
1020 
1021 	/* Now read in all of the header information */
1022 
1023 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1024 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1025 
1026 	error = -ENOMEM;
1027 	elf_phdata = kmalloc(j, GFP_KERNEL);
1028 	if (!elf_phdata)
1029 		goto out;
1030 
1031 	eppnt = elf_phdata;
1032 	error = -ENOEXEC;
1033 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1034 	if (retval != j)
1035 		goto out_free_ph;
1036 
1037 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1038 		if ((eppnt + i)->p_type == PT_LOAD)
1039 			j++;
1040 	if (j != 1)
1041 		goto out_free_ph;
1042 
1043 	while (eppnt->p_type != PT_LOAD)
1044 		eppnt++;
1045 
1046 	/* Now use mmap to map the library into memory. */
1047 	down_write(&current->mm->mmap_sem);
1048 	error = do_mmap(file,
1049 			ELF_PAGESTART(eppnt->p_vaddr),
1050 			(eppnt->p_filesz +
1051 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1052 			PROT_READ | PROT_WRITE | PROT_EXEC,
1053 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1054 			(eppnt->p_offset -
1055 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1056 	up_write(&current->mm->mmap_sem);
1057 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1058 		goto out_free_ph;
1059 
1060 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1061 	if (padzero(elf_bss)) {
1062 		error = -EFAULT;
1063 		goto out_free_ph;
1064 	}
1065 
1066 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1067 			    ELF_MIN_ALIGN - 1);
1068 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1069 	if (bss > len) {
1070 		down_write(&current->mm->mmap_sem);
1071 		do_brk(len, bss - len);
1072 		up_write(&current->mm->mmap_sem);
1073 	}
1074 	error = 0;
1075 
1076 out_free_ph:
1077 	kfree(elf_phdata);
1078 out:
1079 	return error;
1080 }
1081 
1082 #ifdef CONFIG_ELF_CORE
1083 /*
1084  * ELF core dumper
1085  *
1086  * Modelled on fs/exec.c:aout_core_dump()
1087  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1088  */
1089 
1090 /*
1091  * Decide what to dump of a segment, part, all or none.
1092  */
1093 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1094 				   unsigned long mm_flags)
1095 {
1096 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1097 
1098 	/* The vma can be set up to tell us the answer directly.  */
1099 	if (vma->vm_flags & VM_ALWAYSDUMP)
1100 		goto whole;
1101 
1102 	/* Hugetlb memory check */
1103 	if (vma->vm_flags & VM_HUGETLB) {
1104 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1105 			goto whole;
1106 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1107 			goto whole;
1108 	}
1109 
1110 	/* Do not dump I/O mapped devices or special mappings */
1111 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1112 		return 0;
1113 
1114 	/* By default, dump shared memory if mapped from an anonymous file. */
1115 	if (vma->vm_flags & VM_SHARED) {
1116 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1117 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1118 			goto whole;
1119 		return 0;
1120 	}
1121 
1122 	/* Dump segments that have been written to.  */
1123 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1124 		goto whole;
1125 	if (vma->vm_file == NULL)
1126 		return 0;
1127 
1128 	if (FILTER(MAPPED_PRIVATE))
1129 		goto whole;
1130 
1131 	/*
1132 	 * If this looks like the beginning of a DSO or executable mapping,
1133 	 * check for an ELF header.  If we find one, dump the first page to
1134 	 * aid in determining what was mapped here.
1135 	 */
1136 	if (FILTER(ELF_HEADERS) &&
1137 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1138 		u32 __user *header = (u32 __user *) vma->vm_start;
1139 		u32 word;
1140 		mm_segment_t fs = get_fs();
1141 		/*
1142 		 * Doing it this way gets the constant folded by GCC.
1143 		 */
1144 		union {
1145 			u32 cmp;
1146 			char elfmag[SELFMAG];
1147 		} magic;
1148 		BUILD_BUG_ON(SELFMAG != sizeof word);
1149 		magic.elfmag[EI_MAG0] = ELFMAG0;
1150 		magic.elfmag[EI_MAG1] = ELFMAG1;
1151 		magic.elfmag[EI_MAG2] = ELFMAG2;
1152 		magic.elfmag[EI_MAG3] = ELFMAG3;
1153 		/*
1154 		 * Switch to the user "segment" for get_user(),
1155 		 * then put back what elf_core_dump() had in place.
1156 		 */
1157 		set_fs(USER_DS);
1158 		if (unlikely(get_user(word, header)))
1159 			word = 0;
1160 		set_fs(fs);
1161 		if (word == magic.cmp)
1162 			return PAGE_SIZE;
1163 	}
1164 
1165 #undef	FILTER
1166 
1167 	return 0;
1168 
1169 whole:
1170 	return vma->vm_end - vma->vm_start;
1171 }
1172 
1173 /* An ELF note in memory */
1174 struct memelfnote
1175 {
1176 	const char *name;
1177 	int type;
1178 	unsigned int datasz;
1179 	void *data;
1180 };
1181 
1182 static int notesize(struct memelfnote *en)
1183 {
1184 	int sz;
1185 
1186 	sz = sizeof(struct elf_note);
1187 	sz += roundup(strlen(en->name) + 1, 4);
1188 	sz += roundup(en->datasz, 4);
1189 
1190 	return sz;
1191 }
1192 
1193 #define DUMP_WRITE(addr, nr, foffset)	\
1194 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1195 
1196 static int alignfile(struct file *file, loff_t *foffset)
1197 {
1198 	static const char buf[4] = { 0, };
1199 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1200 	return 1;
1201 }
1202 
1203 static int writenote(struct memelfnote *men, struct file *file,
1204 			loff_t *foffset)
1205 {
1206 	struct elf_note en;
1207 	en.n_namesz = strlen(men->name) + 1;
1208 	en.n_descsz = men->datasz;
1209 	en.n_type = men->type;
1210 
1211 	DUMP_WRITE(&en, sizeof(en), foffset);
1212 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1213 	if (!alignfile(file, foffset))
1214 		return 0;
1215 	DUMP_WRITE(men->data, men->datasz, foffset);
1216 	if (!alignfile(file, foffset))
1217 		return 0;
1218 
1219 	return 1;
1220 }
1221 #undef DUMP_WRITE
1222 
1223 static void fill_elf_header(struct elfhdr *elf, int segs,
1224 			    u16 machine, u32 flags, u8 osabi)
1225 {
1226 	memset(elf, 0, sizeof(*elf));
1227 
1228 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1229 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1230 	elf->e_ident[EI_DATA] = ELF_DATA;
1231 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1232 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1233 
1234 	elf->e_type = ET_CORE;
1235 	elf->e_machine = machine;
1236 	elf->e_version = EV_CURRENT;
1237 	elf->e_phoff = sizeof(struct elfhdr);
1238 	elf->e_flags = flags;
1239 	elf->e_ehsize = sizeof(struct elfhdr);
1240 	elf->e_phentsize = sizeof(struct elf_phdr);
1241 	elf->e_phnum = segs;
1242 
1243 	return;
1244 }
1245 
1246 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1247 {
1248 	phdr->p_type = PT_NOTE;
1249 	phdr->p_offset = offset;
1250 	phdr->p_vaddr = 0;
1251 	phdr->p_paddr = 0;
1252 	phdr->p_filesz = sz;
1253 	phdr->p_memsz = 0;
1254 	phdr->p_flags = 0;
1255 	phdr->p_align = 0;
1256 	return;
1257 }
1258 
1259 static void fill_note(struct memelfnote *note, const char *name, int type,
1260 		unsigned int sz, void *data)
1261 {
1262 	note->name = name;
1263 	note->type = type;
1264 	note->datasz = sz;
1265 	note->data = data;
1266 	return;
1267 }
1268 
1269 /*
1270  * fill up all the fields in prstatus from the given task struct, except
1271  * registers which need to be filled up separately.
1272  */
1273 static void fill_prstatus(struct elf_prstatus *prstatus,
1274 		struct task_struct *p, long signr)
1275 {
1276 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1277 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1278 	prstatus->pr_sighold = p->blocked.sig[0];
1279 	rcu_read_lock();
1280 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1281 	rcu_read_unlock();
1282 	prstatus->pr_pid = task_pid_vnr(p);
1283 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1284 	prstatus->pr_sid = task_session_vnr(p);
1285 	if (thread_group_leader(p)) {
1286 		struct task_cputime cputime;
1287 
1288 		/*
1289 		 * This is the record for the group leader.  It shows the
1290 		 * group-wide total, not its individual thread total.
1291 		 */
1292 		thread_group_cputime(p, &cputime);
1293 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1294 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1295 	} else {
1296 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1297 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1298 	}
1299 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1300 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1301 }
1302 
1303 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1304 		       struct mm_struct *mm)
1305 {
1306 	const struct cred *cred;
1307 	unsigned int i, len;
1308 
1309 	/* first copy the parameters from user space */
1310 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1311 
1312 	len = mm->arg_end - mm->arg_start;
1313 	if (len >= ELF_PRARGSZ)
1314 		len = ELF_PRARGSZ-1;
1315 	if (copy_from_user(&psinfo->pr_psargs,
1316 		           (const char __user *)mm->arg_start, len))
1317 		return -EFAULT;
1318 	for(i = 0; i < len; i++)
1319 		if (psinfo->pr_psargs[i] == 0)
1320 			psinfo->pr_psargs[i] = ' ';
1321 	psinfo->pr_psargs[len] = 0;
1322 
1323 	rcu_read_lock();
1324 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1325 	rcu_read_unlock();
1326 	psinfo->pr_pid = task_pid_vnr(p);
1327 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1328 	psinfo->pr_sid = task_session_vnr(p);
1329 
1330 	i = p->state ? ffz(~p->state) + 1 : 0;
1331 	psinfo->pr_state = i;
1332 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1333 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1334 	psinfo->pr_nice = task_nice(p);
1335 	psinfo->pr_flag = p->flags;
1336 	rcu_read_lock();
1337 	cred = __task_cred(p);
1338 	SET_UID(psinfo->pr_uid, cred->uid);
1339 	SET_GID(psinfo->pr_gid, cred->gid);
1340 	rcu_read_unlock();
1341 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1342 
1343 	return 0;
1344 }
1345 
1346 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1347 {
1348 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1349 	int i = 0;
1350 	do
1351 		i += 2;
1352 	while (auxv[i - 2] != AT_NULL);
1353 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1354 }
1355 
1356 #ifdef CORE_DUMP_USE_REGSET
1357 #include <linux/regset.h>
1358 
1359 struct elf_thread_core_info {
1360 	struct elf_thread_core_info *next;
1361 	struct task_struct *task;
1362 	struct elf_prstatus prstatus;
1363 	struct memelfnote notes[0];
1364 };
1365 
1366 struct elf_note_info {
1367 	struct elf_thread_core_info *thread;
1368 	struct memelfnote psinfo;
1369 	struct memelfnote auxv;
1370 	size_t size;
1371 	int thread_notes;
1372 };
1373 
1374 /*
1375  * When a regset has a writeback hook, we call it on each thread before
1376  * dumping user memory.  On register window machines, this makes sure the
1377  * user memory backing the register data is up to date before we read it.
1378  */
1379 static void do_thread_regset_writeback(struct task_struct *task,
1380 				       const struct user_regset *regset)
1381 {
1382 	if (regset->writeback)
1383 		regset->writeback(task, regset, 1);
1384 }
1385 
1386 static int fill_thread_core_info(struct elf_thread_core_info *t,
1387 				 const struct user_regset_view *view,
1388 				 long signr, size_t *total)
1389 {
1390 	unsigned int i;
1391 
1392 	/*
1393 	 * NT_PRSTATUS is the one special case, because the regset data
1394 	 * goes into the pr_reg field inside the note contents, rather
1395 	 * than being the whole note contents.  We fill the reset in here.
1396 	 * We assume that regset 0 is NT_PRSTATUS.
1397 	 */
1398 	fill_prstatus(&t->prstatus, t->task, signr);
1399 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1400 				    0, sizeof(t->prstatus.pr_reg),
1401 				    &t->prstatus.pr_reg, NULL);
1402 
1403 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1404 		  sizeof(t->prstatus), &t->prstatus);
1405 	*total += notesize(&t->notes[0]);
1406 
1407 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1408 
1409 	/*
1410 	 * Each other regset might generate a note too.  For each regset
1411 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1412 	 * all zero and we'll know to skip writing it later.
1413 	 */
1414 	for (i = 1; i < view->n; ++i) {
1415 		const struct user_regset *regset = &view->regsets[i];
1416 		do_thread_regset_writeback(t->task, regset);
1417 		if (regset->core_note_type &&
1418 		    (!regset->active || regset->active(t->task, regset))) {
1419 			int ret;
1420 			size_t size = regset->n * regset->size;
1421 			void *data = kmalloc(size, GFP_KERNEL);
1422 			if (unlikely(!data))
1423 				return 0;
1424 			ret = regset->get(t->task, regset,
1425 					  0, size, data, NULL);
1426 			if (unlikely(ret))
1427 				kfree(data);
1428 			else {
1429 				if (regset->core_note_type != NT_PRFPREG)
1430 					fill_note(&t->notes[i], "LINUX",
1431 						  regset->core_note_type,
1432 						  size, data);
1433 				else {
1434 					t->prstatus.pr_fpvalid = 1;
1435 					fill_note(&t->notes[i], "CORE",
1436 						  NT_PRFPREG, size, data);
1437 				}
1438 				*total += notesize(&t->notes[i]);
1439 			}
1440 		}
1441 	}
1442 
1443 	return 1;
1444 }
1445 
1446 static int fill_note_info(struct elfhdr *elf, int phdrs,
1447 			  struct elf_note_info *info,
1448 			  long signr, struct pt_regs *regs)
1449 {
1450 	struct task_struct *dump_task = current;
1451 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1452 	struct elf_thread_core_info *t;
1453 	struct elf_prpsinfo *psinfo;
1454 	struct core_thread *ct;
1455 	unsigned int i;
1456 
1457 	info->size = 0;
1458 	info->thread = NULL;
1459 
1460 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1461 	if (psinfo == NULL)
1462 		return 0;
1463 
1464 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1465 
1466 	/*
1467 	 * Figure out how many notes we're going to need for each thread.
1468 	 */
1469 	info->thread_notes = 0;
1470 	for (i = 0; i < view->n; ++i)
1471 		if (view->regsets[i].core_note_type != 0)
1472 			++info->thread_notes;
1473 
1474 	/*
1475 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1476 	 * since it is our one special case.
1477 	 */
1478 	if (unlikely(info->thread_notes == 0) ||
1479 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1480 		WARN_ON(1);
1481 		return 0;
1482 	}
1483 
1484 	/*
1485 	 * Initialize the ELF file header.
1486 	 */
1487 	fill_elf_header(elf, phdrs,
1488 			view->e_machine, view->e_flags, view->ei_osabi);
1489 
1490 	/*
1491 	 * Allocate a structure for each thread.
1492 	 */
1493 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1494 		t = kzalloc(offsetof(struct elf_thread_core_info,
1495 				     notes[info->thread_notes]),
1496 			    GFP_KERNEL);
1497 		if (unlikely(!t))
1498 			return 0;
1499 
1500 		t->task = ct->task;
1501 		if (ct->task == dump_task || !info->thread) {
1502 			t->next = info->thread;
1503 			info->thread = t;
1504 		} else {
1505 			/*
1506 			 * Make sure to keep the original task at
1507 			 * the head of the list.
1508 			 */
1509 			t->next = info->thread->next;
1510 			info->thread->next = t;
1511 		}
1512 	}
1513 
1514 	/*
1515 	 * Now fill in each thread's information.
1516 	 */
1517 	for (t = info->thread; t != NULL; t = t->next)
1518 		if (!fill_thread_core_info(t, view, signr, &info->size))
1519 			return 0;
1520 
1521 	/*
1522 	 * Fill in the two process-wide notes.
1523 	 */
1524 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1525 	info->size += notesize(&info->psinfo);
1526 
1527 	fill_auxv_note(&info->auxv, current->mm);
1528 	info->size += notesize(&info->auxv);
1529 
1530 	return 1;
1531 }
1532 
1533 static size_t get_note_info_size(struct elf_note_info *info)
1534 {
1535 	return info->size;
1536 }
1537 
1538 /*
1539  * Write all the notes for each thread.  When writing the first thread, the
1540  * process-wide notes are interleaved after the first thread-specific note.
1541  */
1542 static int write_note_info(struct elf_note_info *info,
1543 			   struct file *file, loff_t *foffset)
1544 {
1545 	bool first = 1;
1546 	struct elf_thread_core_info *t = info->thread;
1547 
1548 	do {
1549 		int i;
1550 
1551 		if (!writenote(&t->notes[0], file, foffset))
1552 			return 0;
1553 
1554 		if (first && !writenote(&info->psinfo, file, foffset))
1555 			return 0;
1556 		if (first && !writenote(&info->auxv, file, foffset))
1557 			return 0;
1558 
1559 		for (i = 1; i < info->thread_notes; ++i)
1560 			if (t->notes[i].data &&
1561 			    !writenote(&t->notes[i], file, foffset))
1562 				return 0;
1563 
1564 		first = 0;
1565 		t = t->next;
1566 	} while (t);
1567 
1568 	return 1;
1569 }
1570 
1571 static void free_note_info(struct elf_note_info *info)
1572 {
1573 	struct elf_thread_core_info *threads = info->thread;
1574 	while (threads) {
1575 		unsigned int i;
1576 		struct elf_thread_core_info *t = threads;
1577 		threads = t->next;
1578 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1579 		for (i = 1; i < info->thread_notes; ++i)
1580 			kfree(t->notes[i].data);
1581 		kfree(t);
1582 	}
1583 	kfree(info->psinfo.data);
1584 }
1585 
1586 #else
1587 
1588 /* Here is the structure in which status of each thread is captured. */
1589 struct elf_thread_status
1590 {
1591 	struct list_head list;
1592 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1593 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1594 	struct task_struct *thread;
1595 #ifdef ELF_CORE_COPY_XFPREGS
1596 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1597 #endif
1598 	struct memelfnote notes[3];
1599 	int num_notes;
1600 };
1601 
1602 /*
1603  * In order to add the specific thread information for the elf file format,
1604  * we need to keep a linked list of every threads pr_status and then create
1605  * a single section for them in the final core file.
1606  */
1607 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1608 {
1609 	int sz = 0;
1610 	struct task_struct *p = t->thread;
1611 	t->num_notes = 0;
1612 
1613 	fill_prstatus(&t->prstatus, p, signr);
1614 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1615 
1616 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1617 		  &(t->prstatus));
1618 	t->num_notes++;
1619 	sz += notesize(&t->notes[0]);
1620 
1621 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1622 								&t->fpu))) {
1623 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1624 			  &(t->fpu));
1625 		t->num_notes++;
1626 		sz += notesize(&t->notes[1]);
1627 	}
1628 
1629 #ifdef ELF_CORE_COPY_XFPREGS
1630 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1631 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1632 			  sizeof(t->xfpu), &t->xfpu);
1633 		t->num_notes++;
1634 		sz += notesize(&t->notes[2]);
1635 	}
1636 #endif
1637 	return sz;
1638 }
1639 
1640 struct elf_note_info {
1641 	struct memelfnote *notes;
1642 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1643 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1644 	struct list_head thread_list;
1645 	elf_fpregset_t *fpu;
1646 #ifdef ELF_CORE_COPY_XFPREGS
1647 	elf_fpxregset_t *xfpu;
1648 #endif
1649 	int thread_status_size;
1650 	int numnote;
1651 };
1652 
1653 static int elf_note_info_init(struct elf_note_info *info)
1654 {
1655 	memset(info, 0, sizeof(*info));
1656 	INIT_LIST_HEAD(&info->thread_list);
1657 
1658 	/* Allocate space for six ELF notes */
1659 	info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1660 	if (!info->notes)
1661 		return 0;
1662 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1663 	if (!info->psinfo)
1664 		goto notes_free;
1665 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1666 	if (!info->prstatus)
1667 		goto psinfo_free;
1668 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1669 	if (!info->fpu)
1670 		goto prstatus_free;
1671 #ifdef ELF_CORE_COPY_XFPREGS
1672 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1673 	if (!info->xfpu)
1674 		goto fpu_free;
1675 #endif
1676 	return 1;
1677 #ifdef ELF_CORE_COPY_XFPREGS
1678  fpu_free:
1679 	kfree(info->fpu);
1680 #endif
1681  prstatus_free:
1682 	kfree(info->prstatus);
1683  psinfo_free:
1684 	kfree(info->psinfo);
1685  notes_free:
1686 	kfree(info->notes);
1687 	return 0;
1688 }
1689 
1690 static int fill_note_info(struct elfhdr *elf, int phdrs,
1691 			  struct elf_note_info *info,
1692 			  long signr, struct pt_regs *regs)
1693 {
1694 	struct list_head *t;
1695 
1696 	if (!elf_note_info_init(info))
1697 		return 0;
1698 
1699 	if (signr) {
1700 		struct core_thread *ct;
1701 		struct elf_thread_status *ets;
1702 
1703 		for (ct = current->mm->core_state->dumper.next;
1704 						ct; ct = ct->next) {
1705 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1706 			if (!ets)
1707 				return 0;
1708 
1709 			ets->thread = ct->task;
1710 			list_add(&ets->list, &info->thread_list);
1711 		}
1712 
1713 		list_for_each(t, &info->thread_list) {
1714 			int sz;
1715 
1716 			ets = list_entry(t, struct elf_thread_status, list);
1717 			sz = elf_dump_thread_status(signr, ets);
1718 			info->thread_status_size += sz;
1719 		}
1720 	}
1721 	/* now collect the dump for the current */
1722 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1723 	fill_prstatus(info->prstatus, current, signr);
1724 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1725 
1726 	/* Set up header */
1727 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1728 
1729 	/*
1730 	 * Set up the notes in similar form to SVR4 core dumps made
1731 	 * with info from their /proc.
1732 	 */
1733 
1734 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1735 		  sizeof(*info->prstatus), info->prstatus);
1736 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1737 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1738 		  sizeof(*info->psinfo), info->psinfo);
1739 
1740 	info->numnote = 2;
1741 
1742 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1743 
1744 	/* Try to dump the FPU. */
1745 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1746 							       info->fpu);
1747 	if (info->prstatus->pr_fpvalid)
1748 		fill_note(info->notes + info->numnote++,
1749 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1750 #ifdef ELF_CORE_COPY_XFPREGS
1751 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1752 		fill_note(info->notes + info->numnote++,
1753 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1754 			  sizeof(*info->xfpu), info->xfpu);
1755 #endif
1756 
1757 	return 1;
1758 }
1759 
1760 static size_t get_note_info_size(struct elf_note_info *info)
1761 {
1762 	int sz = 0;
1763 	int i;
1764 
1765 	for (i = 0; i < info->numnote; i++)
1766 		sz += notesize(info->notes + i);
1767 
1768 	sz += info->thread_status_size;
1769 
1770 	return sz;
1771 }
1772 
1773 static int write_note_info(struct elf_note_info *info,
1774 			   struct file *file, loff_t *foffset)
1775 {
1776 	int i;
1777 	struct list_head *t;
1778 
1779 	for (i = 0; i < info->numnote; i++)
1780 		if (!writenote(info->notes + i, file, foffset))
1781 			return 0;
1782 
1783 	/* write out the thread status notes section */
1784 	list_for_each(t, &info->thread_list) {
1785 		struct elf_thread_status *tmp =
1786 				list_entry(t, struct elf_thread_status, list);
1787 
1788 		for (i = 0; i < tmp->num_notes; i++)
1789 			if (!writenote(&tmp->notes[i], file, foffset))
1790 				return 0;
1791 	}
1792 
1793 	return 1;
1794 }
1795 
1796 static void free_note_info(struct elf_note_info *info)
1797 {
1798 	while (!list_empty(&info->thread_list)) {
1799 		struct list_head *tmp = info->thread_list.next;
1800 		list_del(tmp);
1801 		kfree(list_entry(tmp, struct elf_thread_status, list));
1802 	}
1803 
1804 	kfree(info->prstatus);
1805 	kfree(info->psinfo);
1806 	kfree(info->notes);
1807 	kfree(info->fpu);
1808 #ifdef ELF_CORE_COPY_XFPREGS
1809 	kfree(info->xfpu);
1810 #endif
1811 }
1812 
1813 #endif
1814 
1815 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1816 					struct vm_area_struct *gate_vma)
1817 {
1818 	struct vm_area_struct *ret = tsk->mm->mmap;
1819 
1820 	if (ret)
1821 		return ret;
1822 	return gate_vma;
1823 }
1824 /*
1825  * Helper function for iterating across a vma list.  It ensures that the caller
1826  * will visit `gate_vma' prior to terminating the search.
1827  */
1828 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1829 					struct vm_area_struct *gate_vma)
1830 {
1831 	struct vm_area_struct *ret;
1832 
1833 	ret = this_vma->vm_next;
1834 	if (ret)
1835 		return ret;
1836 	if (this_vma == gate_vma)
1837 		return NULL;
1838 	return gate_vma;
1839 }
1840 
1841 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1842 			     elf_addr_t e_shoff, int segs)
1843 {
1844 	elf->e_shoff = e_shoff;
1845 	elf->e_shentsize = sizeof(*shdr4extnum);
1846 	elf->e_shnum = 1;
1847 	elf->e_shstrndx = SHN_UNDEF;
1848 
1849 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1850 
1851 	shdr4extnum->sh_type = SHT_NULL;
1852 	shdr4extnum->sh_size = elf->e_shnum;
1853 	shdr4extnum->sh_link = elf->e_shstrndx;
1854 	shdr4extnum->sh_info = segs;
1855 }
1856 
1857 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1858 				     unsigned long mm_flags)
1859 {
1860 	struct vm_area_struct *vma;
1861 	size_t size = 0;
1862 
1863 	for (vma = first_vma(current, gate_vma); vma != NULL;
1864 	     vma = next_vma(vma, gate_vma))
1865 		size += vma_dump_size(vma, mm_flags);
1866 	return size;
1867 }
1868 
1869 /*
1870  * Actual dumper
1871  *
1872  * This is a two-pass process; first we find the offsets of the bits,
1873  * and then they are actually written out.  If we run out of core limit
1874  * we just truncate.
1875  */
1876 static int elf_core_dump(struct coredump_params *cprm)
1877 {
1878 	int has_dumped = 0;
1879 	mm_segment_t fs;
1880 	int segs;
1881 	size_t size = 0;
1882 	struct vm_area_struct *vma, *gate_vma;
1883 	struct elfhdr *elf = NULL;
1884 	loff_t offset = 0, dataoff, foffset;
1885 	struct elf_note_info info;
1886 	struct elf_phdr *phdr4note = NULL;
1887 	struct elf_shdr *shdr4extnum = NULL;
1888 	Elf_Half e_phnum;
1889 	elf_addr_t e_shoff;
1890 
1891 	/*
1892 	 * We no longer stop all VM operations.
1893 	 *
1894 	 * This is because those proceses that could possibly change map_count
1895 	 * or the mmap / vma pages are now blocked in do_exit on current
1896 	 * finishing this core dump.
1897 	 *
1898 	 * Only ptrace can touch these memory addresses, but it doesn't change
1899 	 * the map_count or the pages allocated. So no possibility of crashing
1900 	 * exists while dumping the mm->vm_next areas to the core file.
1901 	 */
1902 
1903 	/* alloc memory for large data structures: too large to be on stack */
1904 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1905 	if (!elf)
1906 		goto out;
1907 	/*
1908 	 * The number of segs are recored into ELF header as 16bit value.
1909 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1910 	 */
1911 	segs = current->mm->map_count;
1912 	segs += elf_core_extra_phdrs();
1913 
1914 	gate_vma = get_gate_vma(current);
1915 	if (gate_vma != NULL)
1916 		segs++;
1917 
1918 	/* for notes section */
1919 	segs++;
1920 
1921 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1922 	 * this, kernel supports extended numbering. Have a look at
1923 	 * include/linux/elf.h for further information. */
1924 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1925 
1926 	/*
1927 	 * Collect all the non-memory information about the process for the
1928 	 * notes.  This also sets up the file header.
1929 	 */
1930 	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1931 		goto cleanup;
1932 
1933 	has_dumped = 1;
1934 	current->flags |= PF_DUMPCORE;
1935 
1936 	fs = get_fs();
1937 	set_fs(KERNEL_DS);
1938 
1939 	offset += sizeof(*elf);				/* Elf header */
1940 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
1941 	foffset = offset;
1942 
1943 	/* Write notes phdr entry */
1944 	{
1945 		size_t sz = get_note_info_size(&info);
1946 
1947 		sz += elf_coredump_extra_notes_size();
1948 
1949 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1950 		if (!phdr4note)
1951 			goto end_coredump;
1952 
1953 		fill_elf_note_phdr(phdr4note, sz, offset);
1954 		offset += sz;
1955 	}
1956 
1957 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1958 
1959 	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1960 	offset += elf_core_extra_data_size();
1961 	e_shoff = offset;
1962 
1963 	if (e_phnum == PN_XNUM) {
1964 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1965 		if (!shdr4extnum)
1966 			goto end_coredump;
1967 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1968 	}
1969 
1970 	offset = dataoff;
1971 
1972 	size += sizeof(*elf);
1973 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1974 		goto end_coredump;
1975 
1976 	size += sizeof(*phdr4note);
1977 	if (size > cprm->limit
1978 	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1979 		goto end_coredump;
1980 
1981 	/* Write program headers for segments dump */
1982 	for (vma = first_vma(current, gate_vma); vma != NULL;
1983 			vma = next_vma(vma, gate_vma)) {
1984 		struct elf_phdr phdr;
1985 
1986 		phdr.p_type = PT_LOAD;
1987 		phdr.p_offset = offset;
1988 		phdr.p_vaddr = vma->vm_start;
1989 		phdr.p_paddr = 0;
1990 		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
1991 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1992 		offset += phdr.p_filesz;
1993 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1994 		if (vma->vm_flags & VM_WRITE)
1995 			phdr.p_flags |= PF_W;
1996 		if (vma->vm_flags & VM_EXEC)
1997 			phdr.p_flags |= PF_X;
1998 		phdr.p_align = ELF_EXEC_PAGESIZE;
1999 
2000 		size += sizeof(phdr);
2001 		if (size > cprm->limit
2002 		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2003 			goto end_coredump;
2004 	}
2005 
2006 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2007 		goto end_coredump;
2008 
2009  	/* write out the notes section */
2010 	if (!write_note_info(&info, cprm->file, &foffset))
2011 		goto end_coredump;
2012 
2013 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2014 		goto end_coredump;
2015 
2016 	/* Align to page */
2017 	if (!dump_seek(cprm->file, dataoff - foffset))
2018 		goto end_coredump;
2019 
2020 	for (vma = first_vma(current, gate_vma); vma != NULL;
2021 			vma = next_vma(vma, gate_vma)) {
2022 		unsigned long addr;
2023 		unsigned long end;
2024 
2025 		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2026 
2027 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2028 			struct page *page;
2029 			int stop;
2030 
2031 			page = get_dump_page(addr);
2032 			if (page) {
2033 				void *kaddr = kmap(page);
2034 				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2035 					!dump_write(cprm->file, kaddr,
2036 						    PAGE_SIZE);
2037 				kunmap(page);
2038 				page_cache_release(page);
2039 			} else
2040 				stop = !dump_seek(cprm->file, PAGE_SIZE);
2041 			if (stop)
2042 				goto end_coredump;
2043 		}
2044 	}
2045 
2046 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2047 		goto end_coredump;
2048 
2049 	if (e_phnum == PN_XNUM) {
2050 		size += sizeof(*shdr4extnum);
2051 		if (size > cprm->limit
2052 		    || !dump_write(cprm->file, shdr4extnum,
2053 				   sizeof(*shdr4extnum)))
2054 			goto end_coredump;
2055 	}
2056 
2057 end_coredump:
2058 	set_fs(fs);
2059 
2060 cleanup:
2061 	free_note_info(&info);
2062 	kfree(shdr4extnum);
2063 	kfree(phdr4note);
2064 	kfree(elf);
2065 out:
2066 	return has_dumped;
2067 }
2068 
2069 #endif		/* CONFIG_ELF_CORE */
2070 
2071 static int __init init_elf_binfmt(void)
2072 {
2073 	return register_binfmt(&elf_format);
2074 }
2075 
2076 static void __exit exit_elf_binfmt(void)
2077 {
2078 	/* Remove the COFF and ELF loaders. */
2079 	unregister_binfmt(&elf_format);
2080 }
2081 
2082 core_initcall(init_elf_binfmt);
2083 module_exit(exit_elf_binfmt);
2084 MODULE_LICENSE("GPL");
2085