xref: /openbmc/linux/fs/binfmt_elf.c (revision 28949b84b2cb2473507ec2fed06728f995dd7942)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * linux/fs/binfmt_elf.c
4   *
5   * These are the functions used to load ELF format executables as used
6   * on SVr4 machines.  Information on the format may be found in the book
7   * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8   * Tools".
9   *
10   * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11   */
12  
13  #include <linux/module.h>
14  #include <linux/kernel.h>
15  #include <linux/fs.h>
16  #include <linux/mm.h>
17  #include <linux/mman.h>
18  #include <linux/errno.h>
19  #include <linux/signal.h>
20  #include <linux/binfmts.h>
21  #include <linux/string.h>
22  #include <linux/file.h>
23  #include <linux/slab.h>
24  #include <linux/personality.h>
25  #include <linux/elfcore.h>
26  #include <linux/init.h>
27  #include <linux/highuid.h>
28  #include <linux/compiler.h>
29  #include <linux/highmem.h>
30  #include <linux/hugetlb.h>
31  #include <linux/pagemap.h>
32  #include <linux/vmalloc.h>
33  #include <linux/security.h>
34  #include <linux/random.h>
35  #include <linux/elf.h>
36  #include <linux/elf-randomize.h>
37  #include <linux/utsname.h>
38  #include <linux/coredump.h>
39  #include <linux/sched.h>
40  #include <linux/sched/coredump.h>
41  #include <linux/sched/task_stack.h>
42  #include <linux/sched/cputime.h>
43  #include <linux/cred.h>
44  #include <linux/dax.h>
45  #include <linux/uaccess.h>
46  #include <asm/param.h>
47  #include <asm/page.h>
48  
49  #ifndef user_long_t
50  #define user_long_t long
51  #endif
52  #ifndef user_siginfo_t
53  #define user_siginfo_t siginfo_t
54  #endif
55  
56  /* That's for binfmt_elf_fdpic to deal with */
57  #ifndef elf_check_fdpic
58  #define elf_check_fdpic(ex) false
59  #endif
60  
61  static int load_elf_binary(struct linux_binprm *bprm);
62  
63  #ifdef CONFIG_USELIB
64  static int load_elf_library(struct file *);
65  #else
66  #define load_elf_library NULL
67  #endif
68  
69  /*
70   * If we don't support core dumping, then supply a NULL so we
71   * don't even try.
72   */
73  #ifdef CONFIG_ELF_CORE
74  static int elf_core_dump(struct coredump_params *cprm);
75  #else
76  #define elf_core_dump	NULL
77  #endif
78  
79  #if ELF_EXEC_PAGESIZE > PAGE_SIZE
80  #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
81  #else
82  #define ELF_MIN_ALIGN	PAGE_SIZE
83  #endif
84  
85  #ifndef ELF_CORE_EFLAGS
86  #define ELF_CORE_EFLAGS	0
87  #endif
88  
89  #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
90  #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
91  #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
92  
93  static struct linux_binfmt elf_format = {
94  	.module		= THIS_MODULE,
95  	.load_binary	= load_elf_binary,
96  	.load_shlib	= load_elf_library,
97  	.core_dump	= elf_core_dump,
98  	.min_coredump	= ELF_EXEC_PAGESIZE,
99  };
100  
101  #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
102  
103  static int set_brk(unsigned long start, unsigned long end, int prot)
104  {
105  	start = ELF_PAGEALIGN(start);
106  	end = ELF_PAGEALIGN(end);
107  	if (end > start) {
108  		/*
109  		 * Map the last of the bss segment.
110  		 * If the header is requesting these pages to be
111  		 * executable, honour that (ppc32 needs this).
112  		 */
113  		int error = vm_brk_flags(start, end - start,
114  				prot & PROT_EXEC ? VM_EXEC : 0);
115  		if (error)
116  			return error;
117  	}
118  	current->mm->start_brk = current->mm->brk = end;
119  	return 0;
120  }
121  
122  /* We need to explicitly zero any fractional pages
123     after the data section (i.e. bss).  This would
124     contain the junk from the file that should not
125     be in memory
126   */
127  static int padzero(unsigned long elf_bss)
128  {
129  	unsigned long nbyte;
130  
131  	nbyte = ELF_PAGEOFFSET(elf_bss);
132  	if (nbyte) {
133  		nbyte = ELF_MIN_ALIGN - nbyte;
134  		if (clear_user((void __user *) elf_bss, nbyte))
135  			return -EFAULT;
136  	}
137  	return 0;
138  }
139  
140  /* Let's use some macros to make this stack manipulation a little clearer */
141  #ifdef CONFIG_STACK_GROWSUP
142  #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
143  #define STACK_ROUND(sp, items) \
144  	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
145  #define STACK_ALLOC(sp, len) ({ \
146  	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
147  	old_sp; })
148  #else
149  #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
150  #define STACK_ROUND(sp, items) \
151  	(((unsigned long) (sp - items)) &~ 15UL)
152  #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
153  #endif
154  
155  #ifndef ELF_BASE_PLATFORM
156  /*
157   * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
158   * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
159   * will be copied to the user stack in the same manner as AT_PLATFORM.
160   */
161  #define ELF_BASE_PLATFORM NULL
162  #endif
163  
164  static int
165  create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
166  		unsigned long load_addr, unsigned long interp_load_addr,
167  		unsigned long e_entry)
168  {
169  	struct mm_struct *mm = current->mm;
170  	unsigned long p = bprm->p;
171  	int argc = bprm->argc;
172  	int envc = bprm->envc;
173  	elf_addr_t __user *sp;
174  	elf_addr_t __user *u_platform;
175  	elf_addr_t __user *u_base_platform;
176  	elf_addr_t __user *u_rand_bytes;
177  	const char *k_platform = ELF_PLATFORM;
178  	const char *k_base_platform = ELF_BASE_PLATFORM;
179  	unsigned char k_rand_bytes[16];
180  	int items;
181  	elf_addr_t *elf_info;
182  	int ei_index;
183  	const struct cred *cred = current_cred();
184  	struct vm_area_struct *vma;
185  
186  	/*
187  	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
188  	 * evictions by the processes running on the same package. One
189  	 * thing we can do is to shuffle the initial stack for them.
190  	 */
191  
192  	p = arch_align_stack(p);
193  
194  	/*
195  	 * If this architecture has a platform capability string, copy it
196  	 * to userspace.  In some cases (Sparc), this info is impossible
197  	 * for userspace to get any other way, in others (i386) it is
198  	 * merely difficult.
199  	 */
200  	u_platform = NULL;
201  	if (k_platform) {
202  		size_t len = strlen(k_platform) + 1;
203  
204  		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
205  		if (__copy_to_user(u_platform, k_platform, len))
206  			return -EFAULT;
207  	}
208  
209  	/*
210  	 * If this architecture has a "base" platform capability
211  	 * string, copy it to userspace.
212  	 */
213  	u_base_platform = NULL;
214  	if (k_base_platform) {
215  		size_t len = strlen(k_base_platform) + 1;
216  
217  		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
218  		if (__copy_to_user(u_base_platform, k_base_platform, len))
219  			return -EFAULT;
220  	}
221  
222  	/*
223  	 * Generate 16 random bytes for userspace PRNG seeding.
224  	 */
225  	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
226  	u_rand_bytes = (elf_addr_t __user *)
227  		       STACK_ALLOC(p, sizeof(k_rand_bytes));
228  	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
229  		return -EFAULT;
230  
231  	/* Create the ELF interpreter info */
232  	elf_info = (elf_addr_t *)mm->saved_auxv;
233  	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
234  #define NEW_AUX_ENT(id, val) \
235  	do { \
236  		*elf_info++ = id; \
237  		*elf_info++ = val; \
238  	} while (0)
239  
240  #ifdef ARCH_DLINFO
241  	/*
242  	 * ARCH_DLINFO must come first so PPC can do its special alignment of
243  	 * AUXV.
244  	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
245  	 * ARCH_DLINFO changes
246  	 */
247  	ARCH_DLINFO;
248  #endif
249  	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
250  	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
251  	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
252  	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
253  	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
254  	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
255  	NEW_AUX_ENT(AT_BASE, interp_load_addr);
256  	NEW_AUX_ENT(AT_FLAGS, 0);
257  	NEW_AUX_ENT(AT_ENTRY, e_entry);
258  	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
259  	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
260  	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
261  	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
262  	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
263  	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
264  #ifdef ELF_HWCAP2
265  	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
266  #endif
267  	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
268  	if (k_platform) {
269  		NEW_AUX_ENT(AT_PLATFORM,
270  			    (elf_addr_t)(unsigned long)u_platform);
271  	}
272  	if (k_base_platform) {
273  		NEW_AUX_ENT(AT_BASE_PLATFORM,
274  			    (elf_addr_t)(unsigned long)u_base_platform);
275  	}
276  	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
277  		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
278  	}
279  #undef NEW_AUX_ENT
280  	/* AT_NULL is zero; clear the rest too */
281  	memset(elf_info, 0, (char *)mm->saved_auxv +
282  			sizeof(mm->saved_auxv) - (char *)elf_info);
283  
284  	/* And advance past the AT_NULL entry.  */
285  	elf_info += 2;
286  
287  	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
288  	sp = STACK_ADD(p, ei_index);
289  
290  	items = (argc + 1) + (envc + 1) + 1;
291  	bprm->p = STACK_ROUND(sp, items);
292  
293  	/* Point sp at the lowest address on the stack */
294  #ifdef CONFIG_STACK_GROWSUP
295  	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
296  	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
297  #else
298  	sp = (elf_addr_t __user *)bprm->p;
299  #endif
300  
301  
302  	/*
303  	 * Grow the stack manually; some architectures have a limit on how
304  	 * far ahead a user-space access may be in order to grow the stack.
305  	 */
306  	vma = find_extend_vma(mm, bprm->p);
307  	if (!vma)
308  		return -EFAULT;
309  
310  	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
311  	if (__put_user(argc, sp++))
312  		return -EFAULT;
313  
314  	/* Populate list of argv pointers back to argv strings. */
315  	p = mm->arg_end = mm->arg_start;
316  	while (argc-- > 0) {
317  		size_t len;
318  		if (__put_user((elf_addr_t)p, sp++))
319  			return -EFAULT;
320  		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
321  		if (!len || len > MAX_ARG_STRLEN)
322  			return -EINVAL;
323  		p += len;
324  	}
325  	if (__put_user(0, sp++))
326  		return -EFAULT;
327  	mm->arg_end = p;
328  
329  	/* Populate list of envp pointers back to envp strings. */
330  	mm->env_end = mm->env_start = p;
331  	while (envc-- > 0) {
332  		size_t len;
333  		if (__put_user((elf_addr_t)p, sp++))
334  			return -EFAULT;
335  		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336  		if (!len || len > MAX_ARG_STRLEN)
337  			return -EINVAL;
338  		p += len;
339  	}
340  	if (__put_user(0, sp++))
341  		return -EFAULT;
342  	mm->env_end = p;
343  
344  	/* Put the elf_info on the stack in the right place.  */
345  	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
346  		return -EFAULT;
347  	return 0;
348  }
349  
350  #ifndef elf_map
351  
352  static unsigned long elf_map(struct file *filep, unsigned long addr,
353  		const struct elf_phdr *eppnt, int prot, int type,
354  		unsigned long total_size)
355  {
356  	unsigned long map_addr;
357  	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
358  	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
359  	addr = ELF_PAGESTART(addr);
360  	size = ELF_PAGEALIGN(size);
361  
362  	/* mmap() will return -EINVAL if given a zero size, but a
363  	 * segment with zero filesize is perfectly valid */
364  	if (!size)
365  		return addr;
366  
367  	/*
368  	* total_size is the size of the ELF (interpreter) image.
369  	* The _first_ mmap needs to know the full size, otherwise
370  	* randomization might put this image into an overlapping
371  	* position with the ELF binary image. (since size < total_size)
372  	* So we first map the 'big' image - and unmap the remainder at
373  	* the end. (which unmap is needed for ELF images with holes.)
374  	*/
375  	if (total_size) {
376  		total_size = ELF_PAGEALIGN(total_size);
377  		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
378  		if (!BAD_ADDR(map_addr))
379  			vm_munmap(map_addr+size, total_size-size);
380  	} else
381  		map_addr = vm_mmap(filep, addr, size, prot, type, off);
382  
383  	if ((type & MAP_FIXED_NOREPLACE) &&
384  	    PTR_ERR((void *)map_addr) == -EEXIST)
385  		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
386  			task_pid_nr(current), current->comm, (void *)addr);
387  
388  	return(map_addr);
389  }
390  
391  #endif /* !elf_map */
392  
393  static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
394  {
395  	int i, first_idx = -1, last_idx = -1;
396  
397  	for (i = 0; i < nr; i++) {
398  		if (cmds[i].p_type == PT_LOAD) {
399  			last_idx = i;
400  			if (first_idx == -1)
401  				first_idx = i;
402  		}
403  	}
404  	if (first_idx == -1)
405  		return 0;
406  
407  	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
408  				ELF_PAGESTART(cmds[first_idx].p_vaddr);
409  }
410  
411  static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
412  {
413  	ssize_t rv;
414  
415  	rv = kernel_read(file, buf, len, &pos);
416  	if (unlikely(rv != len)) {
417  		return (rv < 0) ? rv : -EIO;
418  	}
419  	return 0;
420  }
421  
422  /**
423   * load_elf_phdrs() - load ELF program headers
424   * @elf_ex:   ELF header of the binary whose program headers should be loaded
425   * @elf_file: the opened ELF binary file
426   *
427   * Loads ELF program headers from the binary file elf_file, which has the ELF
428   * header pointed to by elf_ex, into a newly allocated array. The caller is
429   * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
430   */
431  static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
432  				       struct file *elf_file)
433  {
434  	struct elf_phdr *elf_phdata = NULL;
435  	int retval, err = -1;
436  	unsigned int size;
437  
438  	/*
439  	 * If the size of this structure has changed, then punt, since
440  	 * we will be doing the wrong thing.
441  	 */
442  	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
443  		goto out;
444  
445  	/* Sanity check the number of program headers... */
446  	/* ...and their total size. */
447  	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
448  	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
449  		goto out;
450  
451  	elf_phdata = kmalloc(size, GFP_KERNEL);
452  	if (!elf_phdata)
453  		goto out;
454  
455  	/* Read in the program headers */
456  	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
457  	if (retval < 0) {
458  		err = retval;
459  		goto out;
460  	}
461  
462  	/* Success! */
463  	err = 0;
464  out:
465  	if (err) {
466  		kfree(elf_phdata);
467  		elf_phdata = NULL;
468  	}
469  	return elf_phdata;
470  }
471  
472  #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
473  
474  /**
475   * struct arch_elf_state - arch-specific ELF loading state
476   *
477   * This structure is used to preserve architecture specific data during
478   * the loading of an ELF file, throughout the checking of architecture
479   * specific ELF headers & through to the point where the ELF load is
480   * known to be proceeding (ie. SET_PERSONALITY).
481   *
482   * This implementation is a dummy for architectures which require no
483   * specific state.
484   */
485  struct arch_elf_state {
486  };
487  
488  #define INIT_ARCH_ELF_STATE {}
489  
490  /**
491   * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
492   * @ehdr:	The main ELF header
493   * @phdr:	The program header to check
494   * @elf:	The open ELF file
495   * @is_interp:	True if the phdr is from the interpreter of the ELF being
496   *		loaded, else false.
497   * @state:	Architecture-specific state preserved throughout the process
498   *		of loading the ELF.
499   *
500   * Inspects the program header phdr to validate its correctness and/or
501   * suitability for the system. Called once per ELF program header in the
502   * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
503   * interpreter.
504   *
505   * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
506   *         with that return code.
507   */
508  static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
509  				   struct elf_phdr *phdr,
510  				   struct file *elf, bool is_interp,
511  				   struct arch_elf_state *state)
512  {
513  	/* Dummy implementation, always proceed */
514  	return 0;
515  }
516  
517  /**
518   * arch_check_elf() - check an ELF executable
519   * @ehdr:	The main ELF header
520   * @has_interp:	True if the ELF has an interpreter, else false.
521   * @interp_ehdr: The interpreter's ELF header
522   * @state:	Architecture-specific state preserved throughout the process
523   *		of loading the ELF.
524   *
525   * Provides a final opportunity for architecture code to reject the loading
526   * of the ELF & cause an exec syscall to return an error. This is called after
527   * all program headers to be checked by arch_elf_pt_proc have been.
528   *
529   * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
530   *         with that return code.
531   */
532  static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
533  				 struct elfhdr *interp_ehdr,
534  				 struct arch_elf_state *state)
535  {
536  	/* Dummy implementation, always proceed */
537  	return 0;
538  }
539  
540  #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
541  
542  static inline int make_prot(u32 p_flags)
543  {
544  	int prot = 0;
545  
546  	if (p_flags & PF_R)
547  		prot |= PROT_READ;
548  	if (p_flags & PF_W)
549  		prot |= PROT_WRITE;
550  	if (p_flags & PF_X)
551  		prot |= PROT_EXEC;
552  	return prot;
553  }
554  
555  /* This is much more generalized than the library routine read function,
556     so we keep this separate.  Technically the library read function
557     is only provided so that we can read a.out libraries that have
558     an ELF header */
559  
560  static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
561  		struct file *interpreter,
562  		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
563  {
564  	struct elf_phdr *eppnt;
565  	unsigned long load_addr = 0;
566  	int load_addr_set = 0;
567  	unsigned long last_bss = 0, elf_bss = 0;
568  	int bss_prot = 0;
569  	unsigned long error = ~0UL;
570  	unsigned long total_size;
571  	int i;
572  
573  	/* First of all, some simple consistency checks */
574  	if (interp_elf_ex->e_type != ET_EXEC &&
575  	    interp_elf_ex->e_type != ET_DYN)
576  		goto out;
577  	if (!elf_check_arch(interp_elf_ex) ||
578  	    elf_check_fdpic(interp_elf_ex))
579  		goto out;
580  	if (!interpreter->f_op->mmap)
581  		goto out;
582  
583  	total_size = total_mapping_size(interp_elf_phdata,
584  					interp_elf_ex->e_phnum);
585  	if (!total_size) {
586  		error = -EINVAL;
587  		goto out;
588  	}
589  
590  	eppnt = interp_elf_phdata;
591  	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
592  		if (eppnt->p_type == PT_LOAD) {
593  			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
594  			int elf_prot = make_prot(eppnt->p_flags);
595  			unsigned long vaddr = 0;
596  			unsigned long k, map_addr;
597  
598  			vaddr = eppnt->p_vaddr;
599  			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
600  				elf_type |= MAP_FIXED_NOREPLACE;
601  			else if (no_base && interp_elf_ex->e_type == ET_DYN)
602  				load_addr = -vaddr;
603  
604  			map_addr = elf_map(interpreter, load_addr + vaddr,
605  					eppnt, elf_prot, elf_type, total_size);
606  			total_size = 0;
607  			error = map_addr;
608  			if (BAD_ADDR(map_addr))
609  				goto out;
610  
611  			if (!load_addr_set &&
612  			    interp_elf_ex->e_type == ET_DYN) {
613  				load_addr = map_addr - ELF_PAGESTART(vaddr);
614  				load_addr_set = 1;
615  			}
616  
617  			/*
618  			 * Check to see if the section's size will overflow the
619  			 * allowed task size. Note that p_filesz must always be
620  			 * <= p_memsize so it's only necessary to check p_memsz.
621  			 */
622  			k = load_addr + eppnt->p_vaddr;
623  			if (BAD_ADDR(k) ||
624  			    eppnt->p_filesz > eppnt->p_memsz ||
625  			    eppnt->p_memsz > TASK_SIZE ||
626  			    TASK_SIZE - eppnt->p_memsz < k) {
627  				error = -ENOMEM;
628  				goto out;
629  			}
630  
631  			/*
632  			 * Find the end of the file mapping for this phdr, and
633  			 * keep track of the largest address we see for this.
634  			 */
635  			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
636  			if (k > elf_bss)
637  				elf_bss = k;
638  
639  			/*
640  			 * Do the same thing for the memory mapping - between
641  			 * elf_bss and last_bss is the bss section.
642  			 */
643  			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
644  			if (k > last_bss) {
645  				last_bss = k;
646  				bss_prot = elf_prot;
647  			}
648  		}
649  	}
650  
651  	/*
652  	 * Now fill out the bss section: first pad the last page from
653  	 * the file up to the page boundary, and zero it from elf_bss
654  	 * up to the end of the page.
655  	 */
656  	if (padzero(elf_bss)) {
657  		error = -EFAULT;
658  		goto out;
659  	}
660  	/*
661  	 * Next, align both the file and mem bss up to the page size,
662  	 * since this is where elf_bss was just zeroed up to, and where
663  	 * last_bss will end after the vm_brk_flags() below.
664  	 */
665  	elf_bss = ELF_PAGEALIGN(elf_bss);
666  	last_bss = ELF_PAGEALIGN(last_bss);
667  	/* Finally, if there is still more bss to allocate, do it. */
668  	if (last_bss > elf_bss) {
669  		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
670  				bss_prot & PROT_EXEC ? VM_EXEC : 0);
671  		if (error)
672  			goto out;
673  	}
674  
675  	error = load_addr;
676  out:
677  	return error;
678  }
679  
680  /*
681   * These are the functions used to load ELF style executables and shared
682   * libraries.  There is no binary dependent code anywhere else.
683   */
684  
685  static int load_elf_binary(struct linux_binprm *bprm)
686  {
687  	struct file *interpreter = NULL; /* to shut gcc up */
688   	unsigned long load_addr = 0, load_bias = 0;
689  	int load_addr_set = 0;
690  	unsigned long error;
691  	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
692  	unsigned long elf_bss, elf_brk;
693  	int bss_prot = 0;
694  	int retval, i;
695  	unsigned long elf_entry;
696  	unsigned long e_entry;
697  	unsigned long interp_load_addr = 0;
698  	unsigned long start_code, end_code, start_data, end_data;
699  	unsigned long reloc_func_desc __maybe_unused = 0;
700  	int executable_stack = EXSTACK_DEFAULT;
701  	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
702  	struct elfhdr *interp_elf_ex = NULL;
703  	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
704  	struct mm_struct *mm;
705  	struct pt_regs *regs;
706  
707  	retval = -ENOEXEC;
708  	/* First of all, some simple consistency checks */
709  	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
710  		goto out;
711  
712  	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
713  		goto out;
714  	if (!elf_check_arch(elf_ex))
715  		goto out;
716  	if (elf_check_fdpic(elf_ex))
717  		goto out;
718  	if (!bprm->file->f_op->mmap)
719  		goto out;
720  
721  	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
722  	if (!elf_phdata)
723  		goto out;
724  
725  	elf_ppnt = elf_phdata;
726  	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
727  		char *elf_interpreter;
728  
729  		if (elf_ppnt->p_type != PT_INTERP)
730  			continue;
731  
732  		/*
733  		 * This is the program interpreter used for shared libraries -
734  		 * for now assume that this is an a.out format binary.
735  		 */
736  		retval = -ENOEXEC;
737  		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
738  			goto out_free_ph;
739  
740  		retval = -ENOMEM;
741  		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
742  		if (!elf_interpreter)
743  			goto out_free_ph;
744  
745  		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
746  				  elf_ppnt->p_offset);
747  		if (retval < 0)
748  			goto out_free_interp;
749  		/* make sure path is NULL terminated */
750  		retval = -ENOEXEC;
751  		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
752  			goto out_free_interp;
753  
754  		interpreter = open_exec(elf_interpreter);
755  		kfree(elf_interpreter);
756  		retval = PTR_ERR(interpreter);
757  		if (IS_ERR(interpreter))
758  			goto out_free_ph;
759  
760  		/*
761  		 * If the binary is not readable then enforce mm->dumpable = 0
762  		 * regardless of the interpreter's permissions.
763  		 */
764  		would_dump(bprm, interpreter);
765  
766  		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
767  		if (!interp_elf_ex) {
768  			retval = -ENOMEM;
769  			goto out_free_ph;
770  		}
771  
772  		/* Get the exec headers */
773  		retval = elf_read(interpreter, interp_elf_ex,
774  				  sizeof(*interp_elf_ex), 0);
775  		if (retval < 0)
776  			goto out_free_dentry;
777  
778  		break;
779  
780  out_free_interp:
781  		kfree(elf_interpreter);
782  		goto out_free_ph;
783  	}
784  
785  	elf_ppnt = elf_phdata;
786  	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
787  		switch (elf_ppnt->p_type) {
788  		case PT_GNU_STACK:
789  			if (elf_ppnt->p_flags & PF_X)
790  				executable_stack = EXSTACK_ENABLE_X;
791  			else
792  				executable_stack = EXSTACK_DISABLE_X;
793  			break;
794  
795  		case PT_LOPROC ... PT_HIPROC:
796  			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
797  						  bprm->file, false,
798  						  &arch_state);
799  			if (retval)
800  				goto out_free_dentry;
801  			break;
802  		}
803  
804  	/* Some simple consistency checks for the interpreter */
805  	if (interpreter) {
806  		retval = -ELIBBAD;
807  		/* Not an ELF interpreter */
808  		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
809  			goto out_free_dentry;
810  		/* Verify the interpreter has a valid arch */
811  		if (!elf_check_arch(interp_elf_ex) ||
812  		    elf_check_fdpic(interp_elf_ex))
813  			goto out_free_dentry;
814  
815  		/* Load the interpreter program headers */
816  		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
817  						   interpreter);
818  		if (!interp_elf_phdata)
819  			goto out_free_dentry;
820  
821  		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
822  		elf_ppnt = interp_elf_phdata;
823  		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
824  			switch (elf_ppnt->p_type) {
825  			case PT_LOPROC ... PT_HIPROC:
826  				retval = arch_elf_pt_proc(interp_elf_ex,
827  							  elf_ppnt, interpreter,
828  							  true, &arch_state);
829  				if (retval)
830  					goto out_free_dentry;
831  				break;
832  			}
833  	}
834  
835  	/*
836  	 * Allow arch code to reject the ELF at this point, whilst it's
837  	 * still possible to return an error to the code that invoked
838  	 * the exec syscall.
839  	 */
840  	retval = arch_check_elf(elf_ex,
841  				!!interpreter, interp_elf_ex,
842  				&arch_state);
843  	if (retval)
844  		goto out_free_dentry;
845  
846  	/* Flush all traces of the currently running executable */
847  	retval = flush_old_exec(bprm);
848  	if (retval)
849  		goto out_free_dentry;
850  
851  	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
852  	   may depend on the personality.  */
853  	SET_PERSONALITY2(*elf_ex, &arch_state);
854  	if (elf_read_implies_exec(*elf_ex, executable_stack))
855  		current->personality |= READ_IMPLIES_EXEC;
856  
857  	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
858  		current->flags |= PF_RANDOMIZE;
859  
860  	setup_new_exec(bprm);
861  	install_exec_creds(bprm);
862  
863  	/* Do this so that we can load the interpreter, if need be.  We will
864  	   change some of these later */
865  	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
866  				 executable_stack);
867  	if (retval < 0)
868  		goto out_free_dentry;
869  
870  	elf_bss = 0;
871  	elf_brk = 0;
872  
873  	start_code = ~0UL;
874  	end_code = 0;
875  	start_data = 0;
876  	end_data = 0;
877  
878  	/* Now we do a little grungy work by mmapping the ELF image into
879  	   the correct location in memory. */
880  	for(i = 0, elf_ppnt = elf_phdata;
881  	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
882  		int elf_prot, elf_flags;
883  		unsigned long k, vaddr;
884  		unsigned long total_size = 0;
885  
886  		if (elf_ppnt->p_type != PT_LOAD)
887  			continue;
888  
889  		if (unlikely (elf_brk > elf_bss)) {
890  			unsigned long nbyte;
891  
892  			/* There was a PT_LOAD segment with p_memsz > p_filesz
893  			   before this one. Map anonymous pages, if needed,
894  			   and clear the area.  */
895  			retval = set_brk(elf_bss + load_bias,
896  					 elf_brk + load_bias,
897  					 bss_prot);
898  			if (retval)
899  				goto out_free_dentry;
900  			nbyte = ELF_PAGEOFFSET(elf_bss);
901  			if (nbyte) {
902  				nbyte = ELF_MIN_ALIGN - nbyte;
903  				if (nbyte > elf_brk - elf_bss)
904  					nbyte = elf_brk - elf_bss;
905  				if (clear_user((void __user *)elf_bss +
906  							load_bias, nbyte)) {
907  					/*
908  					 * This bss-zeroing can fail if the ELF
909  					 * file specifies odd protections. So
910  					 * we don't check the return value
911  					 */
912  				}
913  			}
914  		}
915  
916  		elf_prot = make_prot(elf_ppnt->p_flags);
917  
918  		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
919  
920  		vaddr = elf_ppnt->p_vaddr;
921  		/*
922  		 * If we are loading ET_EXEC or we have already performed
923  		 * the ET_DYN load_addr calculations, proceed normally.
924  		 */
925  		if (elf_ex->e_type == ET_EXEC || load_addr_set) {
926  			elf_flags |= MAP_FIXED;
927  		} else if (elf_ex->e_type == ET_DYN) {
928  			/*
929  			 * This logic is run once for the first LOAD Program
930  			 * Header for ET_DYN binaries to calculate the
931  			 * randomization (load_bias) for all the LOAD
932  			 * Program Headers, and to calculate the entire
933  			 * size of the ELF mapping (total_size). (Note that
934  			 * load_addr_set is set to true later once the
935  			 * initial mapping is performed.)
936  			 *
937  			 * There are effectively two types of ET_DYN
938  			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
939  			 * and loaders (ET_DYN without INTERP, since they
940  			 * _are_ the ELF interpreter). The loaders must
941  			 * be loaded away from programs since the program
942  			 * may otherwise collide with the loader (especially
943  			 * for ET_EXEC which does not have a randomized
944  			 * position). For example to handle invocations of
945  			 * "./ld.so someprog" to test out a new version of
946  			 * the loader, the subsequent program that the
947  			 * loader loads must avoid the loader itself, so
948  			 * they cannot share the same load range. Sufficient
949  			 * room for the brk must be allocated with the
950  			 * loader as well, since brk must be available with
951  			 * the loader.
952  			 *
953  			 * Therefore, programs are loaded offset from
954  			 * ELF_ET_DYN_BASE and loaders are loaded into the
955  			 * independently randomized mmap region (0 load_bias
956  			 * without MAP_FIXED).
957  			 */
958  			if (interpreter) {
959  				load_bias = ELF_ET_DYN_BASE;
960  				if (current->flags & PF_RANDOMIZE)
961  					load_bias += arch_mmap_rnd();
962  				elf_flags |= MAP_FIXED;
963  			} else
964  				load_bias = 0;
965  
966  			/*
967  			 * Since load_bias is used for all subsequent loading
968  			 * calculations, we must lower it by the first vaddr
969  			 * so that the remaining calculations based on the
970  			 * ELF vaddrs will be correctly offset. The result
971  			 * is then page aligned.
972  			 */
973  			load_bias = ELF_PAGESTART(load_bias - vaddr);
974  
975  			total_size = total_mapping_size(elf_phdata,
976  							elf_ex->e_phnum);
977  			if (!total_size) {
978  				retval = -EINVAL;
979  				goto out_free_dentry;
980  			}
981  		}
982  
983  		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
984  				elf_prot, elf_flags, total_size);
985  		if (BAD_ADDR(error)) {
986  			retval = IS_ERR((void *)error) ?
987  				PTR_ERR((void*)error) : -EINVAL;
988  			goto out_free_dentry;
989  		}
990  
991  		if (!load_addr_set) {
992  			load_addr_set = 1;
993  			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
994  			if (elf_ex->e_type == ET_DYN) {
995  				load_bias += error -
996  				             ELF_PAGESTART(load_bias + vaddr);
997  				load_addr += load_bias;
998  				reloc_func_desc = load_bias;
999  			}
1000  		}
1001  		k = elf_ppnt->p_vaddr;
1002  		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1003  			start_code = k;
1004  		if (start_data < k)
1005  			start_data = k;
1006  
1007  		/*
1008  		 * Check to see if the section's size will overflow the
1009  		 * allowed task size. Note that p_filesz must always be
1010  		 * <= p_memsz so it is only necessary to check p_memsz.
1011  		 */
1012  		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1013  		    elf_ppnt->p_memsz > TASK_SIZE ||
1014  		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1015  			/* set_brk can never work. Avoid overflows. */
1016  			retval = -EINVAL;
1017  			goto out_free_dentry;
1018  		}
1019  
1020  		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1021  
1022  		if (k > elf_bss)
1023  			elf_bss = k;
1024  		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1025  			end_code = k;
1026  		if (end_data < k)
1027  			end_data = k;
1028  		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1029  		if (k > elf_brk) {
1030  			bss_prot = elf_prot;
1031  			elf_brk = k;
1032  		}
1033  	}
1034  
1035  	e_entry = elf_ex->e_entry + load_bias;
1036  	elf_bss += load_bias;
1037  	elf_brk += load_bias;
1038  	start_code += load_bias;
1039  	end_code += load_bias;
1040  	start_data += load_bias;
1041  	end_data += load_bias;
1042  
1043  	/* Calling set_brk effectively mmaps the pages that we need
1044  	 * for the bss and break sections.  We must do this before
1045  	 * mapping in the interpreter, to make sure it doesn't wind
1046  	 * up getting placed where the bss needs to go.
1047  	 */
1048  	retval = set_brk(elf_bss, elf_brk, bss_prot);
1049  	if (retval)
1050  		goto out_free_dentry;
1051  	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1052  		retval = -EFAULT; /* Nobody gets to see this, but.. */
1053  		goto out_free_dentry;
1054  	}
1055  
1056  	if (interpreter) {
1057  		elf_entry = load_elf_interp(interp_elf_ex,
1058  					    interpreter,
1059  					    load_bias, interp_elf_phdata);
1060  		if (!IS_ERR((void *)elf_entry)) {
1061  			/*
1062  			 * load_elf_interp() returns relocation
1063  			 * adjustment
1064  			 */
1065  			interp_load_addr = elf_entry;
1066  			elf_entry += interp_elf_ex->e_entry;
1067  		}
1068  		if (BAD_ADDR(elf_entry)) {
1069  			retval = IS_ERR((void *)elf_entry) ?
1070  					(int)elf_entry : -EINVAL;
1071  			goto out_free_dentry;
1072  		}
1073  		reloc_func_desc = interp_load_addr;
1074  
1075  		allow_write_access(interpreter);
1076  		fput(interpreter);
1077  
1078  		kfree(interp_elf_ex);
1079  		kfree(interp_elf_phdata);
1080  	} else {
1081  		elf_entry = e_entry;
1082  		if (BAD_ADDR(elf_entry)) {
1083  			retval = -EINVAL;
1084  			goto out_free_dentry;
1085  		}
1086  	}
1087  
1088  	kfree(elf_phdata);
1089  
1090  	set_binfmt(&elf_format);
1091  
1092  #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1093  	retval = arch_setup_additional_pages(bprm, !!interpreter);
1094  	if (retval < 0)
1095  		goto out;
1096  #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1097  
1098  	retval = create_elf_tables(bprm, elf_ex,
1099  			  load_addr, interp_load_addr, e_entry);
1100  	if (retval < 0)
1101  		goto out;
1102  
1103  	mm = current->mm;
1104  	mm->end_code = end_code;
1105  	mm->start_code = start_code;
1106  	mm->start_data = start_data;
1107  	mm->end_data = end_data;
1108  	mm->start_stack = bprm->p;
1109  
1110  	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1111  		/*
1112  		 * For architectures with ELF randomization, when executing
1113  		 * a loader directly (i.e. no interpreter listed in ELF
1114  		 * headers), move the brk area out of the mmap region
1115  		 * (since it grows up, and may collide early with the stack
1116  		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1117  		 */
1118  		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1119  		    elf_ex->e_type == ET_DYN && !interpreter) {
1120  			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1121  		}
1122  
1123  		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1124  #ifdef compat_brk_randomized
1125  		current->brk_randomized = 1;
1126  #endif
1127  	}
1128  
1129  	if (current->personality & MMAP_PAGE_ZERO) {
1130  		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1131  		   and some applications "depend" upon this behavior.
1132  		   Since we do not have the power to recompile these, we
1133  		   emulate the SVr4 behavior. Sigh. */
1134  		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1135  				MAP_FIXED | MAP_PRIVATE, 0);
1136  	}
1137  
1138  	regs = current_pt_regs();
1139  #ifdef ELF_PLAT_INIT
1140  	/*
1141  	 * The ABI may specify that certain registers be set up in special
1142  	 * ways (on i386 %edx is the address of a DT_FINI function, for
1143  	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1144  	 * that the e_entry field is the address of the function descriptor
1145  	 * for the startup routine, rather than the address of the startup
1146  	 * routine itself.  This macro performs whatever initialization to
1147  	 * the regs structure is required as well as any relocations to the
1148  	 * function descriptor entries when executing dynamically links apps.
1149  	 */
1150  	ELF_PLAT_INIT(regs, reloc_func_desc);
1151  #endif
1152  
1153  	finalize_exec(bprm);
1154  	start_thread(regs, elf_entry, bprm->p);
1155  	retval = 0;
1156  out:
1157  	return retval;
1158  
1159  	/* error cleanup */
1160  out_free_dentry:
1161  	kfree(interp_elf_ex);
1162  	kfree(interp_elf_phdata);
1163  	allow_write_access(interpreter);
1164  	if (interpreter)
1165  		fput(interpreter);
1166  out_free_ph:
1167  	kfree(elf_phdata);
1168  	goto out;
1169  }
1170  
1171  #ifdef CONFIG_USELIB
1172  /* This is really simpleminded and specialized - we are loading an
1173     a.out library that is given an ELF header. */
1174  static int load_elf_library(struct file *file)
1175  {
1176  	struct elf_phdr *elf_phdata;
1177  	struct elf_phdr *eppnt;
1178  	unsigned long elf_bss, bss, len;
1179  	int retval, error, i, j;
1180  	struct elfhdr elf_ex;
1181  
1182  	error = -ENOEXEC;
1183  	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1184  	if (retval < 0)
1185  		goto out;
1186  
1187  	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1188  		goto out;
1189  
1190  	/* First of all, some simple consistency checks */
1191  	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1192  	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1193  		goto out;
1194  	if (elf_check_fdpic(&elf_ex))
1195  		goto out;
1196  
1197  	/* Now read in all of the header information */
1198  
1199  	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1200  	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1201  
1202  	error = -ENOMEM;
1203  	elf_phdata = kmalloc(j, GFP_KERNEL);
1204  	if (!elf_phdata)
1205  		goto out;
1206  
1207  	eppnt = elf_phdata;
1208  	error = -ENOEXEC;
1209  	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1210  	if (retval < 0)
1211  		goto out_free_ph;
1212  
1213  	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1214  		if ((eppnt + i)->p_type == PT_LOAD)
1215  			j++;
1216  	if (j != 1)
1217  		goto out_free_ph;
1218  
1219  	while (eppnt->p_type != PT_LOAD)
1220  		eppnt++;
1221  
1222  	/* Now use mmap to map the library into memory. */
1223  	error = vm_mmap(file,
1224  			ELF_PAGESTART(eppnt->p_vaddr),
1225  			(eppnt->p_filesz +
1226  			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1227  			PROT_READ | PROT_WRITE | PROT_EXEC,
1228  			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1229  			(eppnt->p_offset -
1230  			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1231  	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1232  		goto out_free_ph;
1233  
1234  	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1235  	if (padzero(elf_bss)) {
1236  		error = -EFAULT;
1237  		goto out_free_ph;
1238  	}
1239  
1240  	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1241  	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1242  	if (bss > len) {
1243  		error = vm_brk(len, bss - len);
1244  		if (error)
1245  			goto out_free_ph;
1246  	}
1247  	error = 0;
1248  
1249  out_free_ph:
1250  	kfree(elf_phdata);
1251  out:
1252  	return error;
1253  }
1254  #endif /* #ifdef CONFIG_USELIB */
1255  
1256  #ifdef CONFIG_ELF_CORE
1257  /*
1258   * ELF core dumper
1259   *
1260   * Modelled on fs/exec.c:aout_core_dump()
1261   * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1262   */
1263  
1264  /*
1265   * The purpose of always_dump_vma() is to make sure that special kernel mappings
1266   * that are useful for post-mortem analysis are included in every core dump.
1267   * In that way we ensure that the core dump is fully interpretable later
1268   * without matching up the same kernel and hardware config to see what PC values
1269   * meant. These special mappings include - vDSO, vsyscall, and other
1270   * architecture specific mappings
1271   */
1272  static bool always_dump_vma(struct vm_area_struct *vma)
1273  {
1274  	/* Any vsyscall mappings? */
1275  	if (vma == get_gate_vma(vma->vm_mm))
1276  		return true;
1277  
1278  	/*
1279  	 * Assume that all vmas with a .name op should always be dumped.
1280  	 * If this changes, a new vm_ops field can easily be added.
1281  	 */
1282  	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1283  		return true;
1284  
1285  	/*
1286  	 * arch_vma_name() returns non-NULL for special architecture mappings,
1287  	 * such as vDSO sections.
1288  	 */
1289  	if (arch_vma_name(vma))
1290  		return true;
1291  
1292  	return false;
1293  }
1294  
1295  /*
1296   * Decide what to dump of a segment, part, all or none.
1297   */
1298  static unsigned long vma_dump_size(struct vm_area_struct *vma,
1299  				   unsigned long mm_flags)
1300  {
1301  #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1302  
1303  	/* always dump the vdso and vsyscall sections */
1304  	if (always_dump_vma(vma))
1305  		goto whole;
1306  
1307  	if (vma->vm_flags & VM_DONTDUMP)
1308  		return 0;
1309  
1310  	/* support for DAX */
1311  	if (vma_is_dax(vma)) {
1312  		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1313  			goto whole;
1314  		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1315  			goto whole;
1316  		return 0;
1317  	}
1318  
1319  	/* Hugetlb memory check */
1320  	if (is_vm_hugetlb_page(vma)) {
1321  		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1322  			goto whole;
1323  		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1324  			goto whole;
1325  		return 0;
1326  	}
1327  
1328  	/* Do not dump I/O mapped devices or special mappings */
1329  	if (vma->vm_flags & VM_IO)
1330  		return 0;
1331  
1332  	/* By default, dump shared memory if mapped from an anonymous file. */
1333  	if (vma->vm_flags & VM_SHARED) {
1334  		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1335  		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1336  			goto whole;
1337  		return 0;
1338  	}
1339  
1340  	/* Dump segments that have been written to.  */
1341  	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1342  		goto whole;
1343  	if (vma->vm_file == NULL)
1344  		return 0;
1345  
1346  	if (FILTER(MAPPED_PRIVATE))
1347  		goto whole;
1348  
1349  	/*
1350  	 * If this looks like the beginning of a DSO or executable mapping,
1351  	 * check for an ELF header.  If we find one, dump the first page to
1352  	 * aid in determining what was mapped here.
1353  	 */
1354  	if (FILTER(ELF_HEADERS) &&
1355  	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1356  		u32 __user *header = (u32 __user *) vma->vm_start;
1357  		u32 word;
1358  		mm_segment_t fs = get_fs();
1359  		/*
1360  		 * Doing it this way gets the constant folded by GCC.
1361  		 */
1362  		union {
1363  			u32 cmp;
1364  			char elfmag[SELFMAG];
1365  		} magic;
1366  		BUILD_BUG_ON(SELFMAG != sizeof word);
1367  		magic.elfmag[EI_MAG0] = ELFMAG0;
1368  		magic.elfmag[EI_MAG1] = ELFMAG1;
1369  		magic.elfmag[EI_MAG2] = ELFMAG2;
1370  		magic.elfmag[EI_MAG3] = ELFMAG3;
1371  		/*
1372  		 * Switch to the user "segment" for get_user(),
1373  		 * then put back what elf_core_dump() had in place.
1374  		 */
1375  		set_fs(USER_DS);
1376  		if (unlikely(get_user(word, header)))
1377  			word = 0;
1378  		set_fs(fs);
1379  		if (word == magic.cmp)
1380  			return PAGE_SIZE;
1381  	}
1382  
1383  #undef	FILTER
1384  
1385  	return 0;
1386  
1387  whole:
1388  	return vma->vm_end - vma->vm_start;
1389  }
1390  
1391  /* An ELF note in memory */
1392  struct memelfnote
1393  {
1394  	const char *name;
1395  	int type;
1396  	unsigned int datasz;
1397  	void *data;
1398  };
1399  
1400  static int notesize(struct memelfnote *en)
1401  {
1402  	int sz;
1403  
1404  	sz = sizeof(struct elf_note);
1405  	sz += roundup(strlen(en->name) + 1, 4);
1406  	sz += roundup(en->datasz, 4);
1407  
1408  	return sz;
1409  }
1410  
1411  static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1412  {
1413  	struct elf_note en;
1414  	en.n_namesz = strlen(men->name) + 1;
1415  	en.n_descsz = men->datasz;
1416  	en.n_type = men->type;
1417  
1418  	return dump_emit(cprm, &en, sizeof(en)) &&
1419  	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1420  	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1421  }
1422  
1423  static void fill_elf_header(struct elfhdr *elf, int segs,
1424  			    u16 machine, u32 flags)
1425  {
1426  	memset(elf, 0, sizeof(*elf));
1427  
1428  	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1429  	elf->e_ident[EI_CLASS] = ELF_CLASS;
1430  	elf->e_ident[EI_DATA] = ELF_DATA;
1431  	elf->e_ident[EI_VERSION] = EV_CURRENT;
1432  	elf->e_ident[EI_OSABI] = ELF_OSABI;
1433  
1434  	elf->e_type = ET_CORE;
1435  	elf->e_machine = machine;
1436  	elf->e_version = EV_CURRENT;
1437  	elf->e_phoff = sizeof(struct elfhdr);
1438  	elf->e_flags = flags;
1439  	elf->e_ehsize = sizeof(struct elfhdr);
1440  	elf->e_phentsize = sizeof(struct elf_phdr);
1441  	elf->e_phnum = segs;
1442  }
1443  
1444  static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1445  {
1446  	phdr->p_type = PT_NOTE;
1447  	phdr->p_offset = offset;
1448  	phdr->p_vaddr = 0;
1449  	phdr->p_paddr = 0;
1450  	phdr->p_filesz = sz;
1451  	phdr->p_memsz = 0;
1452  	phdr->p_flags = 0;
1453  	phdr->p_align = 0;
1454  }
1455  
1456  static void fill_note(struct memelfnote *note, const char *name, int type,
1457  		unsigned int sz, void *data)
1458  {
1459  	note->name = name;
1460  	note->type = type;
1461  	note->datasz = sz;
1462  	note->data = data;
1463  }
1464  
1465  /*
1466   * fill up all the fields in prstatus from the given task struct, except
1467   * registers which need to be filled up separately.
1468   */
1469  static void fill_prstatus(struct elf_prstatus *prstatus,
1470  		struct task_struct *p, long signr)
1471  {
1472  	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1473  	prstatus->pr_sigpend = p->pending.signal.sig[0];
1474  	prstatus->pr_sighold = p->blocked.sig[0];
1475  	rcu_read_lock();
1476  	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1477  	rcu_read_unlock();
1478  	prstatus->pr_pid = task_pid_vnr(p);
1479  	prstatus->pr_pgrp = task_pgrp_vnr(p);
1480  	prstatus->pr_sid = task_session_vnr(p);
1481  	if (thread_group_leader(p)) {
1482  		struct task_cputime cputime;
1483  
1484  		/*
1485  		 * This is the record for the group leader.  It shows the
1486  		 * group-wide total, not its individual thread total.
1487  		 */
1488  		thread_group_cputime(p, &cputime);
1489  		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1490  		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1491  	} else {
1492  		u64 utime, stime;
1493  
1494  		task_cputime(p, &utime, &stime);
1495  		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1496  		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1497  	}
1498  
1499  	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1500  	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1501  }
1502  
1503  static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1504  		       struct mm_struct *mm)
1505  {
1506  	const struct cred *cred;
1507  	unsigned int i, len;
1508  
1509  	/* first copy the parameters from user space */
1510  	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1511  
1512  	len = mm->arg_end - mm->arg_start;
1513  	if (len >= ELF_PRARGSZ)
1514  		len = ELF_PRARGSZ-1;
1515  	if (copy_from_user(&psinfo->pr_psargs,
1516  		           (const char __user *)mm->arg_start, len))
1517  		return -EFAULT;
1518  	for(i = 0; i < len; i++)
1519  		if (psinfo->pr_psargs[i] == 0)
1520  			psinfo->pr_psargs[i] = ' ';
1521  	psinfo->pr_psargs[len] = 0;
1522  
1523  	rcu_read_lock();
1524  	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1525  	rcu_read_unlock();
1526  	psinfo->pr_pid = task_pid_vnr(p);
1527  	psinfo->pr_pgrp = task_pgrp_vnr(p);
1528  	psinfo->pr_sid = task_session_vnr(p);
1529  
1530  	i = p->state ? ffz(~p->state) + 1 : 0;
1531  	psinfo->pr_state = i;
1532  	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1533  	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1534  	psinfo->pr_nice = task_nice(p);
1535  	psinfo->pr_flag = p->flags;
1536  	rcu_read_lock();
1537  	cred = __task_cred(p);
1538  	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1539  	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1540  	rcu_read_unlock();
1541  	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1542  
1543  	return 0;
1544  }
1545  
1546  static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1547  {
1548  	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1549  	int i = 0;
1550  	do
1551  		i += 2;
1552  	while (auxv[i - 2] != AT_NULL);
1553  	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1554  }
1555  
1556  static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1557  		const kernel_siginfo_t *siginfo)
1558  {
1559  	mm_segment_t old_fs = get_fs();
1560  	set_fs(KERNEL_DS);
1561  	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1562  	set_fs(old_fs);
1563  	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1564  }
1565  
1566  #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1567  /*
1568   * Format of NT_FILE note:
1569   *
1570   * long count     -- how many files are mapped
1571   * long page_size -- units for file_ofs
1572   * array of [COUNT] elements of
1573   *   long start
1574   *   long end
1575   *   long file_ofs
1576   * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1577   */
1578  static int fill_files_note(struct memelfnote *note)
1579  {
1580  	struct mm_struct *mm = current->mm;
1581  	struct vm_area_struct *vma;
1582  	unsigned count, size, names_ofs, remaining, n;
1583  	user_long_t *data;
1584  	user_long_t *start_end_ofs;
1585  	char *name_base, *name_curpos;
1586  
1587  	/* *Estimated* file count and total data size needed */
1588  	count = mm->map_count;
1589  	if (count > UINT_MAX / 64)
1590  		return -EINVAL;
1591  	size = count * 64;
1592  
1593  	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1594   alloc:
1595  	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1596  		return -EINVAL;
1597  	size = round_up(size, PAGE_SIZE);
1598  	/*
1599  	 * "size" can be 0 here legitimately.
1600  	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1601  	 */
1602  	data = kvmalloc(size, GFP_KERNEL);
1603  	if (ZERO_OR_NULL_PTR(data))
1604  		return -ENOMEM;
1605  
1606  	start_end_ofs = data + 2;
1607  	name_base = name_curpos = ((char *)data) + names_ofs;
1608  	remaining = size - names_ofs;
1609  	count = 0;
1610  	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1611  		struct file *file;
1612  		const char *filename;
1613  
1614  		file = vma->vm_file;
1615  		if (!file)
1616  			continue;
1617  		filename = file_path(file, name_curpos, remaining);
1618  		if (IS_ERR(filename)) {
1619  			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1620  				kvfree(data);
1621  				size = size * 5 / 4;
1622  				goto alloc;
1623  			}
1624  			continue;
1625  		}
1626  
1627  		/* file_path() fills at the end, move name down */
1628  		/* n = strlen(filename) + 1: */
1629  		n = (name_curpos + remaining) - filename;
1630  		remaining = filename - name_curpos;
1631  		memmove(name_curpos, filename, n);
1632  		name_curpos += n;
1633  
1634  		*start_end_ofs++ = vma->vm_start;
1635  		*start_end_ofs++ = vma->vm_end;
1636  		*start_end_ofs++ = vma->vm_pgoff;
1637  		count++;
1638  	}
1639  
1640  	/* Now we know exact count of files, can store it */
1641  	data[0] = count;
1642  	data[1] = PAGE_SIZE;
1643  	/*
1644  	 * Count usually is less than mm->map_count,
1645  	 * we need to move filenames down.
1646  	 */
1647  	n = mm->map_count - count;
1648  	if (n != 0) {
1649  		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1650  		memmove(name_base - shift_bytes, name_base,
1651  			name_curpos - name_base);
1652  		name_curpos -= shift_bytes;
1653  	}
1654  
1655  	size = name_curpos - (char *)data;
1656  	fill_note(note, "CORE", NT_FILE, size, data);
1657  	return 0;
1658  }
1659  
1660  #ifdef CORE_DUMP_USE_REGSET
1661  #include <linux/regset.h>
1662  
1663  struct elf_thread_core_info {
1664  	struct elf_thread_core_info *next;
1665  	struct task_struct *task;
1666  	struct elf_prstatus prstatus;
1667  	struct memelfnote notes[0];
1668  };
1669  
1670  struct elf_note_info {
1671  	struct elf_thread_core_info *thread;
1672  	struct memelfnote psinfo;
1673  	struct memelfnote signote;
1674  	struct memelfnote auxv;
1675  	struct memelfnote files;
1676  	user_siginfo_t csigdata;
1677  	size_t size;
1678  	int thread_notes;
1679  };
1680  
1681  /*
1682   * When a regset has a writeback hook, we call it on each thread before
1683   * dumping user memory.  On register window machines, this makes sure the
1684   * user memory backing the register data is up to date before we read it.
1685   */
1686  static void do_thread_regset_writeback(struct task_struct *task,
1687  				       const struct user_regset *regset)
1688  {
1689  	if (regset->writeback)
1690  		regset->writeback(task, regset, 1);
1691  }
1692  
1693  #ifndef PRSTATUS_SIZE
1694  #define PRSTATUS_SIZE(S, R) sizeof(S)
1695  #endif
1696  
1697  #ifndef SET_PR_FPVALID
1698  #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1699  #endif
1700  
1701  static int fill_thread_core_info(struct elf_thread_core_info *t,
1702  				 const struct user_regset_view *view,
1703  				 long signr, size_t *total)
1704  {
1705  	unsigned int i;
1706  	unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1707  
1708  	/*
1709  	 * NT_PRSTATUS is the one special case, because the regset data
1710  	 * goes into the pr_reg field inside the note contents, rather
1711  	 * than being the whole note contents.  We fill the reset in here.
1712  	 * We assume that regset 0 is NT_PRSTATUS.
1713  	 */
1714  	fill_prstatus(&t->prstatus, t->task, signr);
1715  	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1716  				    &t->prstatus.pr_reg, NULL);
1717  
1718  	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1719  		  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1720  	*total += notesize(&t->notes[0]);
1721  
1722  	do_thread_regset_writeback(t->task, &view->regsets[0]);
1723  
1724  	/*
1725  	 * Each other regset might generate a note too.  For each regset
1726  	 * that has no core_note_type or is inactive, we leave t->notes[i]
1727  	 * all zero and we'll know to skip writing it later.
1728  	 */
1729  	for (i = 1; i < view->n; ++i) {
1730  		const struct user_regset *regset = &view->regsets[i];
1731  		do_thread_regset_writeback(t->task, regset);
1732  		if (regset->core_note_type && regset->get &&
1733  		    (!regset->active || regset->active(t->task, regset) > 0)) {
1734  			int ret;
1735  			size_t size = regset_size(t->task, regset);
1736  			void *data = kmalloc(size, GFP_KERNEL);
1737  			if (unlikely(!data))
1738  				return 0;
1739  			ret = regset->get(t->task, regset,
1740  					  0, size, data, NULL);
1741  			if (unlikely(ret))
1742  				kfree(data);
1743  			else {
1744  				if (regset->core_note_type != NT_PRFPREG)
1745  					fill_note(&t->notes[i], "LINUX",
1746  						  regset->core_note_type,
1747  						  size, data);
1748  				else {
1749  					SET_PR_FPVALID(&t->prstatus,
1750  							1, regset0_size);
1751  					fill_note(&t->notes[i], "CORE",
1752  						  NT_PRFPREG, size, data);
1753  				}
1754  				*total += notesize(&t->notes[i]);
1755  			}
1756  		}
1757  	}
1758  
1759  	return 1;
1760  }
1761  
1762  static int fill_note_info(struct elfhdr *elf, int phdrs,
1763  			  struct elf_note_info *info,
1764  			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1765  {
1766  	struct task_struct *dump_task = current;
1767  	const struct user_regset_view *view = task_user_regset_view(dump_task);
1768  	struct elf_thread_core_info *t;
1769  	struct elf_prpsinfo *psinfo;
1770  	struct core_thread *ct;
1771  	unsigned int i;
1772  
1773  	info->size = 0;
1774  	info->thread = NULL;
1775  
1776  	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1777  	if (psinfo == NULL) {
1778  		info->psinfo.data = NULL; /* So we don't free this wrongly */
1779  		return 0;
1780  	}
1781  
1782  	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1783  
1784  	/*
1785  	 * Figure out how many notes we're going to need for each thread.
1786  	 */
1787  	info->thread_notes = 0;
1788  	for (i = 0; i < view->n; ++i)
1789  		if (view->regsets[i].core_note_type != 0)
1790  			++info->thread_notes;
1791  
1792  	/*
1793  	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1794  	 * since it is our one special case.
1795  	 */
1796  	if (unlikely(info->thread_notes == 0) ||
1797  	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1798  		WARN_ON(1);
1799  		return 0;
1800  	}
1801  
1802  	/*
1803  	 * Initialize the ELF file header.
1804  	 */
1805  	fill_elf_header(elf, phdrs,
1806  			view->e_machine, view->e_flags);
1807  
1808  	/*
1809  	 * Allocate a structure for each thread.
1810  	 */
1811  	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1812  		t = kzalloc(offsetof(struct elf_thread_core_info,
1813  				     notes[info->thread_notes]),
1814  			    GFP_KERNEL);
1815  		if (unlikely(!t))
1816  			return 0;
1817  
1818  		t->task = ct->task;
1819  		if (ct->task == dump_task || !info->thread) {
1820  			t->next = info->thread;
1821  			info->thread = t;
1822  		} else {
1823  			/*
1824  			 * Make sure to keep the original task at
1825  			 * the head of the list.
1826  			 */
1827  			t->next = info->thread->next;
1828  			info->thread->next = t;
1829  		}
1830  	}
1831  
1832  	/*
1833  	 * Now fill in each thread's information.
1834  	 */
1835  	for (t = info->thread; t != NULL; t = t->next)
1836  		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1837  			return 0;
1838  
1839  	/*
1840  	 * Fill in the two process-wide notes.
1841  	 */
1842  	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1843  	info->size += notesize(&info->psinfo);
1844  
1845  	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1846  	info->size += notesize(&info->signote);
1847  
1848  	fill_auxv_note(&info->auxv, current->mm);
1849  	info->size += notesize(&info->auxv);
1850  
1851  	if (fill_files_note(&info->files) == 0)
1852  		info->size += notesize(&info->files);
1853  
1854  	return 1;
1855  }
1856  
1857  static size_t get_note_info_size(struct elf_note_info *info)
1858  {
1859  	return info->size;
1860  }
1861  
1862  /*
1863   * Write all the notes for each thread.  When writing the first thread, the
1864   * process-wide notes are interleaved after the first thread-specific note.
1865   */
1866  static int write_note_info(struct elf_note_info *info,
1867  			   struct coredump_params *cprm)
1868  {
1869  	bool first = true;
1870  	struct elf_thread_core_info *t = info->thread;
1871  
1872  	do {
1873  		int i;
1874  
1875  		if (!writenote(&t->notes[0], cprm))
1876  			return 0;
1877  
1878  		if (first && !writenote(&info->psinfo, cprm))
1879  			return 0;
1880  		if (first && !writenote(&info->signote, cprm))
1881  			return 0;
1882  		if (first && !writenote(&info->auxv, cprm))
1883  			return 0;
1884  		if (first && info->files.data &&
1885  				!writenote(&info->files, cprm))
1886  			return 0;
1887  
1888  		for (i = 1; i < info->thread_notes; ++i)
1889  			if (t->notes[i].data &&
1890  			    !writenote(&t->notes[i], cprm))
1891  				return 0;
1892  
1893  		first = false;
1894  		t = t->next;
1895  	} while (t);
1896  
1897  	return 1;
1898  }
1899  
1900  static void free_note_info(struct elf_note_info *info)
1901  {
1902  	struct elf_thread_core_info *threads = info->thread;
1903  	while (threads) {
1904  		unsigned int i;
1905  		struct elf_thread_core_info *t = threads;
1906  		threads = t->next;
1907  		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1908  		for (i = 1; i < info->thread_notes; ++i)
1909  			kfree(t->notes[i].data);
1910  		kfree(t);
1911  	}
1912  	kfree(info->psinfo.data);
1913  	kvfree(info->files.data);
1914  }
1915  
1916  #else
1917  
1918  /* Here is the structure in which status of each thread is captured. */
1919  struct elf_thread_status
1920  {
1921  	struct list_head list;
1922  	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1923  	elf_fpregset_t fpu;		/* NT_PRFPREG */
1924  	struct task_struct *thread;
1925  #ifdef ELF_CORE_COPY_XFPREGS
1926  	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1927  #endif
1928  	struct memelfnote notes[3];
1929  	int num_notes;
1930  };
1931  
1932  /*
1933   * In order to add the specific thread information for the elf file format,
1934   * we need to keep a linked list of every threads pr_status and then create
1935   * a single section for them in the final core file.
1936   */
1937  static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1938  {
1939  	int sz = 0;
1940  	struct task_struct *p = t->thread;
1941  	t->num_notes = 0;
1942  
1943  	fill_prstatus(&t->prstatus, p, signr);
1944  	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1945  
1946  	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1947  		  &(t->prstatus));
1948  	t->num_notes++;
1949  	sz += notesize(&t->notes[0]);
1950  
1951  	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1952  								&t->fpu))) {
1953  		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1954  			  &(t->fpu));
1955  		t->num_notes++;
1956  		sz += notesize(&t->notes[1]);
1957  	}
1958  
1959  #ifdef ELF_CORE_COPY_XFPREGS
1960  	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1961  		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1962  			  sizeof(t->xfpu), &t->xfpu);
1963  		t->num_notes++;
1964  		sz += notesize(&t->notes[2]);
1965  	}
1966  #endif
1967  	return sz;
1968  }
1969  
1970  struct elf_note_info {
1971  	struct memelfnote *notes;
1972  	struct memelfnote *notes_files;
1973  	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1974  	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1975  	struct list_head thread_list;
1976  	elf_fpregset_t *fpu;
1977  #ifdef ELF_CORE_COPY_XFPREGS
1978  	elf_fpxregset_t *xfpu;
1979  #endif
1980  	user_siginfo_t csigdata;
1981  	int thread_status_size;
1982  	int numnote;
1983  };
1984  
1985  static int elf_note_info_init(struct elf_note_info *info)
1986  {
1987  	memset(info, 0, sizeof(*info));
1988  	INIT_LIST_HEAD(&info->thread_list);
1989  
1990  	/* Allocate space for ELF notes */
1991  	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
1992  	if (!info->notes)
1993  		return 0;
1994  	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1995  	if (!info->psinfo)
1996  		return 0;
1997  	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1998  	if (!info->prstatus)
1999  		return 0;
2000  	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2001  	if (!info->fpu)
2002  		return 0;
2003  #ifdef ELF_CORE_COPY_XFPREGS
2004  	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2005  	if (!info->xfpu)
2006  		return 0;
2007  #endif
2008  	return 1;
2009  }
2010  
2011  static int fill_note_info(struct elfhdr *elf, int phdrs,
2012  			  struct elf_note_info *info,
2013  			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2014  {
2015  	struct core_thread *ct;
2016  	struct elf_thread_status *ets;
2017  
2018  	if (!elf_note_info_init(info))
2019  		return 0;
2020  
2021  	for (ct = current->mm->core_state->dumper.next;
2022  					ct; ct = ct->next) {
2023  		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2024  		if (!ets)
2025  			return 0;
2026  
2027  		ets->thread = ct->task;
2028  		list_add(&ets->list, &info->thread_list);
2029  	}
2030  
2031  	list_for_each_entry(ets, &info->thread_list, list) {
2032  		int sz;
2033  
2034  		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2035  		info->thread_status_size += sz;
2036  	}
2037  	/* now collect the dump for the current */
2038  	memset(info->prstatus, 0, sizeof(*info->prstatus));
2039  	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2040  	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2041  
2042  	/* Set up header */
2043  	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2044  
2045  	/*
2046  	 * Set up the notes in similar form to SVR4 core dumps made
2047  	 * with info from their /proc.
2048  	 */
2049  
2050  	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2051  		  sizeof(*info->prstatus), info->prstatus);
2052  	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2053  	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2054  		  sizeof(*info->psinfo), info->psinfo);
2055  
2056  	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2057  	fill_auxv_note(info->notes + 3, current->mm);
2058  	info->numnote = 4;
2059  
2060  	if (fill_files_note(info->notes + info->numnote) == 0) {
2061  		info->notes_files = info->notes + info->numnote;
2062  		info->numnote++;
2063  	}
2064  
2065  	/* Try to dump the FPU. */
2066  	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2067  							       info->fpu);
2068  	if (info->prstatus->pr_fpvalid)
2069  		fill_note(info->notes + info->numnote++,
2070  			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2071  #ifdef ELF_CORE_COPY_XFPREGS
2072  	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2073  		fill_note(info->notes + info->numnote++,
2074  			  "LINUX", ELF_CORE_XFPREG_TYPE,
2075  			  sizeof(*info->xfpu), info->xfpu);
2076  #endif
2077  
2078  	return 1;
2079  }
2080  
2081  static size_t get_note_info_size(struct elf_note_info *info)
2082  {
2083  	int sz = 0;
2084  	int i;
2085  
2086  	for (i = 0; i < info->numnote; i++)
2087  		sz += notesize(info->notes + i);
2088  
2089  	sz += info->thread_status_size;
2090  
2091  	return sz;
2092  }
2093  
2094  static int write_note_info(struct elf_note_info *info,
2095  			   struct coredump_params *cprm)
2096  {
2097  	struct elf_thread_status *ets;
2098  	int i;
2099  
2100  	for (i = 0; i < info->numnote; i++)
2101  		if (!writenote(info->notes + i, cprm))
2102  			return 0;
2103  
2104  	/* write out the thread status notes section */
2105  	list_for_each_entry(ets, &info->thread_list, list) {
2106  		for (i = 0; i < ets->num_notes; i++)
2107  			if (!writenote(&ets->notes[i], cprm))
2108  				return 0;
2109  	}
2110  
2111  	return 1;
2112  }
2113  
2114  static void free_note_info(struct elf_note_info *info)
2115  {
2116  	while (!list_empty(&info->thread_list)) {
2117  		struct list_head *tmp = info->thread_list.next;
2118  		list_del(tmp);
2119  		kfree(list_entry(tmp, struct elf_thread_status, list));
2120  	}
2121  
2122  	/* Free data possibly allocated by fill_files_note(): */
2123  	if (info->notes_files)
2124  		kvfree(info->notes_files->data);
2125  
2126  	kfree(info->prstatus);
2127  	kfree(info->psinfo);
2128  	kfree(info->notes);
2129  	kfree(info->fpu);
2130  #ifdef ELF_CORE_COPY_XFPREGS
2131  	kfree(info->xfpu);
2132  #endif
2133  }
2134  
2135  #endif
2136  
2137  static struct vm_area_struct *first_vma(struct task_struct *tsk,
2138  					struct vm_area_struct *gate_vma)
2139  {
2140  	struct vm_area_struct *ret = tsk->mm->mmap;
2141  
2142  	if (ret)
2143  		return ret;
2144  	return gate_vma;
2145  }
2146  /*
2147   * Helper function for iterating across a vma list.  It ensures that the caller
2148   * will visit `gate_vma' prior to terminating the search.
2149   */
2150  static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2151  					struct vm_area_struct *gate_vma)
2152  {
2153  	struct vm_area_struct *ret;
2154  
2155  	ret = this_vma->vm_next;
2156  	if (ret)
2157  		return ret;
2158  	if (this_vma == gate_vma)
2159  		return NULL;
2160  	return gate_vma;
2161  }
2162  
2163  static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2164  			     elf_addr_t e_shoff, int segs)
2165  {
2166  	elf->e_shoff = e_shoff;
2167  	elf->e_shentsize = sizeof(*shdr4extnum);
2168  	elf->e_shnum = 1;
2169  	elf->e_shstrndx = SHN_UNDEF;
2170  
2171  	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2172  
2173  	shdr4extnum->sh_type = SHT_NULL;
2174  	shdr4extnum->sh_size = elf->e_shnum;
2175  	shdr4extnum->sh_link = elf->e_shstrndx;
2176  	shdr4extnum->sh_info = segs;
2177  }
2178  
2179  /*
2180   * Actual dumper
2181   *
2182   * This is a two-pass process; first we find the offsets of the bits,
2183   * and then they are actually written out.  If we run out of core limit
2184   * we just truncate.
2185   */
2186  static int elf_core_dump(struct coredump_params *cprm)
2187  {
2188  	int has_dumped = 0;
2189  	mm_segment_t fs;
2190  	int segs, i;
2191  	size_t vma_data_size = 0;
2192  	struct vm_area_struct *vma, *gate_vma;
2193  	struct elfhdr elf;
2194  	loff_t offset = 0, dataoff;
2195  	struct elf_note_info info = { };
2196  	struct elf_phdr *phdr4note = NULL;
2197  	struct elf_shdr *shdr4extnum = NULL;
2198  	Elf_Half e_phnum;
2199  	elf_addr_t e_shoff;
2200  	elf_addr_t *vma_filesz = NULL;
2201  
2202  	/*
2203  	 * We no longer stop all VM operations.
2204  	 *
2205  	 * This is because those proceses that could possibly change map_count
2206  	 * or the mmap / vma pages are now blocked in do_exit on current
2207  	 * finishing this core dump.
2208  	 *
2209  	 * Only ptrace can touch these memory addresses, but it doesn't change
2210  	 * the map_count or the pages allocated. So no possibility of crashing
2211  	 * exists while dumping the mm->vm_next areas to the core file.
2212  	 */
2213  
2214  	/*
2215  	 * The number of segs are recored into ELF header as 16bit value.
2216  	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2217  	 */
2218  	segs = current->mm->map_count;
2219  	segs += elf_core_extra_phdrs();
2220  
2221  	gate_vma = get_gate_vma(current->mm);
2222  	if (gate_vma != NULL)
2223  		segs++;
2224  
2225  	/* for notes section */
2226  	segs++;
2227  
2228  	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2229  	 * this, kernel supports extended numbering. Have a look at
2230  	 * include/linux/elf.h for further information. */
2231  	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2232  
2233  	/*
2234  	 * Collect all the non-memory information about the process for the
2235  	 * notes.  This also sets up the file header.
2236  	 */
2237  	if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2238  		goto cleanup;
2239  
2240  	has_dumped = 1;
2241  
2242  	fs = get_fs();
2243  	set_fs(KERNEL_DS);
2244  
2245  	offset += sizeof(elf);				/* Elf header */
2246  	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2247  
2248  	/* Write notes phdr entry */
2249  	{
2250  		size_t sz = get_note_info_size(&info);
2251  
2252  		sz += elf_coredump_extra_notes_size();
2253  
2254  		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2255  		if (!phdr4note)
2256  			goto end_coredump;
2257  
2258  		fill_elf_note_phdr(phdr4note, sz, offset);
2259  		offset += sz;
2260  	}
2261  
2262  	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2263  
2264  	/*
2265  	 * Zero vma process will get ZERO_SIZE_PTR here.
2266  	 * Let coredump continue for register state at least.
2267  	 */
2268  	vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2269  			      GFP_KERNEL);
2270  	if (!vma_filesz)
2271  		goto end_coredump;
2272  
2273  	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2274  			vma = next_vma(vma, gate_vma)) {
2275  		unsigned long dump_size;
2276  
2277  		dump_size = vma_dump_size(vma, cprm->mm_flags);
2278  		vma_filesz[i++] = dump_size;
2279  		vma_data_size += dump_size;
2280  	}
2281  
2282  	offset += vma_data_size;
2283  	offset += elf_core_extra_data_size();
2284  	e_shoff = offset;
2285  
2286  	if (e_phnum == PN_XNUM) {
2287  		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2288  		if (!shdr4extnum)
2289  			goto end_coredump;
2290  		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2291  	}
2292  
2293  	offset = dataoff;
2294  
2295  	if (!dump_emit(cprm, &elf, sizeof(elf)))
2296  		goto end_coredump;
2297  
2298  	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2299  		goto end_coredump;
2300  
2301  	/* Write program headers for segments dump */
2302  	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2303  			vma = next_vma(vma, gate_vma)) {
2304  		struct elf_phdr phdr;
2305  
2306  		phdr.p_type = PT_LOAD;
2307  		phdr.p_offset = offset;
2308  		phdr.p_vaddr = vma->vm_start;
2309  		phdr.p_paddr = 0;
2310  		phdr.p_filesz = vma_filesz[i++];
2311  		phdr.p_memsz = vma->vm_end - vma->vm_start;
2312  		offset += phdr.p_filesz;
2313  		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2314  		if (vma->vm_flags & VM_WRITE)
2315  			phdr.p_flags |= PF_W;
2316  		if (vma->vm_flags & VM_EXEC)
2317  			phdr.p_flags |= PF_X;
2318  		phdr.p_align = ELF_EXEC_PAGESIZE;
2319  
2320  		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2321  			goto end_coredump;
2322  	}
2323  
2324  	if (!elf_core_write_extra_phdrs(cprm, offset))
2325  		goto end_coredump;
2326  
2327   	/* write out the notes section */
2328  	if (!write_note_info(&info, cprm))
2329  		goto end_coredump;
2330  
2331  	if (elf_coredump_extra_notes_write(cprm))
2332  		goto end_coredump;
2333  
2334  	/* Align to page */
2335  	if (!dump_skip(cprm, dataoff - cprm->pos))
2336  		goto end_coredump;
2337  
2338  	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2339  			vma = next_vma(vma, gate_vma)) {
2340  		unsigned long addr;
2341  		unsigned long end;
2342  
2343  		end = vma->vm_start + vma_filesz[i++];
2344  
2345  		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2346  			struct page *page;
2347  			int stop;
2348  
2349  			page = get_dump_page(addr);
2350  			if (page) {
2351  				void *kaddr = kmap(page);
2352  				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2353  				kunmap(page);
2354  				put_page(page);
2355  			} else
2356  				stop = !dump_skip(cprm, PAGE_SIZE);
2357  			if (stop)
2358  				goto end_coredump;
2359  		}
2360  	}
2361  	dump_truncate(cprm);
2362  
2363  	if (!elf_core_write_extra_data(cprm))
2364  		goto end_coredump;
2365  
2366  	if (e_phnum == PN_XNUM) {
2367  		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2368  			goto end_coredump;
2369  	}
2370  
2371  end_coredump:
2372  	set_fs(fs);
2373  
2374  cleanup:
2375  	free_note_info(&info);
2376  	kfree(shdr4extnum);
2377  	kvfree(vma_filesz);
2378  	kfree(phdr4note);
2379  	return has_dumped;
2380  }
2381  
2382  #endif		/* CONFIG_ELF_CORE */
2383  
2384  static int __init init_elf_binfmt(void)
2385  {
2386  	register_binfmt(&elf_format);
2387  	return 0;
2388  }
2389  
2390  static void __exit exit_elf_binfmt(void)
2391  {
2392  	/* Remove the COFF and ELF loaders. */
2393  	unregister_binfmt(&elf_format);
2394  }
2395  
2396  core_initcall(init_elf_binfmt);
2397  module_exit(exit_elf_binfmt);
2398  MODULE_LICENSE("GPL");
2399