xref: /openbmc/linux/fs/binfmt_elf.c (revision 877013bc)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * linux/fs/binfmt_elf.c
4   *
5   * These are the functions used to load ELF format executables as used
6   * on SVr4 machines.  Information on the format may be found in the book
7   * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8   * Tools".
9   *
10   * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11   */
12  
13  #include <linux/module.h>
14  #include <linux/kernel.h>
15  #include <linux/fs.h>
16  #include <linux/log2.h>
17  #include <linux/mm.h>
18  #include <linux/mman.h>
19  #include <linux/errno.h>
20  #include <linux/signal.h>
21  #include <linux/binfmts.h>
22  #include <linux/string.h>
23  #include <linux/file.h>
24  #include <linux/slab.h>
25  #include <linux/personality.h>
26  #include <linux/elfcore.h>
27  #include <linux/init.h>
28  #include <linux/highuid.h>
29  #include <linux/compiler.h>
30  #include <linux/highmem.h>
31  #include <linux/hugetlb.h>
32  #include <linux/pagemap.h>
33  #include <linux/vmalloc.h>
34  #include <linux/security.h>
35  #include <linux/random.h>
36  #include <linux/elf.h>
37  #include <linux/elf-randomize.h>
38  #include <linux/utsname.h>
39  #include <linux/coredump.h>
40  #include <linux/sched.h>
41  #include <linux/sched/coredump.h>
42  #include <linux/sched/task_stack.h>
43  #include <linux/sched/cputime.h>
44  #include <linux/sizes.h>
45  #include <linux/types.h>
46  #include <linux/cred.h>
47  #include <linux/dax.h>
48  #include <linux/uaccess.h>
49  #include <asm/param.h>
50  #include <asm/page.h>
51  
52  #ifndef ELF_COMPAT
53  #define ELF_COMPAT 0
54  #endif
55  
56  #ifndef user_long_t
57  #define user_long_t long
58  #endif
59  #ifndef user_siginfo_t
60  #define user_siginfo_t siginfo_t
61  #endif
62  
63  /* That's for binfmt_elf_fdpic to deal with */
64  #ifndef elf_check_fdpic
65  #define elf_check_fdpic(ex) false
66  #endif
67  
68  static int load_elf_binary(struct linux_binprm *bprm);
69  
70  #ifdef CONFIG_USELIB
71  static int load_elf_library(struct file *);
72  #else
73  #define load_elf_library NULL
74  #endif
75  
76  /*
77   * If we don't support core dumping, then supply a NULL so we
78   * don't even try.
79   */
80  #ifdef CONFIG_ELF_CORE
81  static int elf_core_dump(struct coredump_params *cprm);
82  #else
83  #define elf_core_dump	NULL
84  #endif
85  
86  #if ELF_EXEC_PAGESIZE > PAGE_SIZE
87  #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
88  #else
89  #define ELF_MIN_ALIGN	PAGE_SIZE
90  #endif
91  
92  #ifndef ELF_CORE_EFLAGS
93  #define ELF_CORE_EFLAGS	0
94  #endif
95  
96  #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
97  #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
98  #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
99  
100  static struct linux_binfmt elf_format = {
101  	.module		= THIS_MODULE,
102  	.load_binary	= load_elf_binary,
103  	.load_shlib	= load_elf_library,
104  	.core_dump	= elf_core_dump,
105  	.min_coredump	= ELF_EXEC_PAGESIZE,
106  };
107  
108  #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
109  
110  static int set_brk(unsigned long start, unsigned long end, int prot)
111  {
112  	start = ELF_PAGEALIGN(start);
113  	end = ELF_PAGEALIGN(end);
114  	if (end > start) {
115  		/*
116  		 * Map the last of the bss segment.
117  		 * If the header is requesting these pages to be
118  		 * executable, honour that (ppc32 needs this).
119  		 */
120  		int error = vm_brk_flags(start, end - start,
121  				prot & PROT_EXEC ? VM_EXEC : 0);
122  		if (error)
123  			return error;
124  	}
125  	current->mm->start_brk = current->mm->brk = end;
126  	return 0;
127  }
128  
129  /* We need to explicitly zero any fractional pages
130     after the data section (i.e. bss).  This would
131     contain the junk from the file that should not
132     be in memory
133   */
134  static int padzero(unsigned long elf_bss)
135  {
136  	unsigned long nbyte;
137  
138  	nbyte = ELF_PAGEOFFSET(elf_bss);
139  	if (nbyte) {
140  		nbyte = ELF_MIN_ALIGN - nbyte;
141  		if (clear_user((void __user *) elf_bss, nbyte))
142  			return -EFAULT;
143  	}
144  	return 0;
145  }
146  
147  /* Let's use some macros to make this stack manipulation a little clearer */
148  #ifdef CONFIG_STACK_GROWSUP
149  #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
150  #define STACK_ROUND(sp, items) \
151  	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
152  #define STACK_ALLOC(sp, len) ({ \
153  	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
154  	old_sp; })
155  #else
156  #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
157  #define STACK_ROUND(sp, items) \
158  	(((unsigned long) (sp - items)) &~ 15UL)
159  #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
160  #endif
161  
162  #ifndef ELF_BASE_PLATFORM
163  /*
164   * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
165   * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
166   * will be copied to the user stack in the same manner as AT_PLATFORM.
167   */
168  #define ELF_BASE_PLATFORM NULL
169  #endif
170  
171  static int
172  create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
173  		unsigned long load_addr, unsigned long interp_load_addr,
174  		unsigned long e_entry)
175  {
176  	struct mm_struct *mm = current->mm;
177  	unsigned long p = bprm->p;
178  	int argc = bprm->argc;
179  	int envc = bprm->envc;
180  	elf_addr_t __user *sp;
181  	elf_addr_t __user *u_platform;
182  	elf_addr_t __user *u_base_platform;
183  	elf_addr_t __user *u_rand_bytes;
184  	const char *k_platform = ELF_PLATFORM;
185  	const char *k_base_platform = ELF_BASE_PLATFORM;
186  	unsigned char k_rand_bytes[16];
187  	int items;
188  	elf_addr_t *elf_info;
189  	elf_addr_t flags = 0;
190  	int ei_index;
191  	const struct cred *cred = current_cred();
192  	struct vm_area_struct *vma;
193  
194  	/*
195  	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
196  	 * evictions by the processes running on the same package. One
197  	 * thing we can do is to shuffle the initial stack for them.
198  	 */
199  
200  	p = arch_align_stack(p);
201  
202  	/*
203  	 * If this architecture has a platform capability string, copy it
204  	 * to userspace.  In some cases (Sparc), this info is impossible
205  	 * for userspace to get any other way, in others (i386) it is
206  	 * merely difficult.
207  	 */
208  	u_platform = NULL;
209  	if (k_platform) {
210  		size_t len = strlen(k_platform) + 1;
211  
212  		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
213  		if (copy_to_user(u_platform, k_platform, len))
214  			return -EFAULT;
215  	}
216  
217  	/*
218  	 * If this architecture has a "base" platform capability
219  	 * string, copy it to userspace.
220  	 */
221  	u_base_platform = NULL;
222  	if (k_base_platform) {
223  		size_t len = strlen(k_base_platform) + 1;
224  
225  		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
226  		if (copy_to_user(u_base_platform, k_base_platform, len))
227  			return -EFAULT;
228  	}
229  
230  	/*
231  	 * Generate 16 random bytes for userspace PRNG seeding.
232  	 */
233  	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
234  	u_rand_bytes = (elf_addr_t __user *)
235  		       STACK_ALLOC(p, sizeof(k_rand_bytes));
236  	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
237  		return -EFAULT;
238  
239  	/* Create the ELF interpreter info */
240  	elf_info = (elf_addr_t *)mm->saved_auxv;
241  	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
242  #define NEW_AUX_ENT(id, val) \
243  	do { \
244  		*elf_info++ = id; \
245  		*elf_info++ = val; \
246  	} while (0)
247  
248  #ifdef ARCH_DLINFO
249  	/*
250  	 * ARCH_DLINFO must come first so PPC can do its special alignment of
251  	 * AUXV.
252  	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
253  	 * ARCH_DLINFO changes
254  	 */
255  	ARCH_DLINFO;
256  #endif
257  	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
258  	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
259  	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
260  	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
261  	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
262  	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
263  	NEW_AUX_ENT(AT_BASE, interp_load_addr);
264  	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
265  		flags |= AT_FLAGS_PRESERVE_ARGV0;
266  	NEW_AUX_ENT(AT_FLAGS, flags);
267  	NEW_AUX_ENT(AT_ENTRY, e_entry);
268  	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
269  	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
270  	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
271  	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
272  	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
273  	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
274  #ifdef ELF_HWCAP2
275  	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
276  #endif
277  	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
278  	if (k_platform) {
279  		NEW_AUX_ENT(AT_PLATFORM,
280  			    (elf_addr_t)(unsigned long)u_platform);
281  	}
282  	if (k_base_platform) {
283  		NEW_AUX_ENT(AT_BASE_PLATFORM,
284  			    (elf_addr_t)(unsigned long)u_base_platform);
285  	}
286  	if (bprm->have_execfd) {
287  		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
288  	}
289  #undef NEW_AUX_ENT
290  	/* AT_NULL is zero; clear the rest too */
291  	memset(elf_info, 0, (char *)mm->saved_auxv +
292  			sizeof(mm->saved_auxv) - (char *)elf_info);
293  
294  	/* And advance past the AT_NULL entry.  */
295  	elf_info += 2;
296  
297  	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
298  	sp = STACK_ADD(p, ei_index);
299  
300  	items = (argc + 1) + (envc + 1) + 1;
301  	bprm->p = STACK_ROUND(sp, items);
302  
303  	/* Point sp at the lowest address on the stack */
304  #ifdef CONFIG_STACK_GROWSUP
305  	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
306  	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
307  #else
308  	sp = (elf_addr_t __user *)bprm->p;
309  #endif
310  
311  
312  	/*
313  	 * Grow the stack manually; some architectures have a limit on how
314  	 * far ahead a user-space access may be in order to grow the stack.
315  	 */
316  	if (mmap_read_lock_killable(mm))
317  		return -EINTR;
318  	vma = find_extend_vma(mm, bprm->p);
319  	mmap_read_unlock(mm);
320  	if (!vma)
321  		return -EFAULT;
322  
323  	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
324  	if (put_user(argc, sp++))
325  		return -EFAULT;
326  
327  	/* Populate list of argv pointers back to argv strings. */
328  	p = mm->arg_end = mm->arg_start;
329  	while (argc-- > 0) {
330  		size_t len;
331  		if (put_user((elf_addr_t)p, sp++))
332  			return -EFAULT;
333  		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
334  		if (!len || len > MAX_ARG_STRLEN)
335  			return -EINVAL;
336  		p += len;
337  	}
338  	if (put_user(0, sp++))
339  		return -EFAULT;
340  	mm->arg_end = p;
341  
342  	/* Populate list of envp pointers back to envp strings. */
343  	mm->env_end = mm->env_start = p;
344  	while (envc-- > 0) {
345  		size_t len;
346  		if (put_user((elf_addr_t)p, sp++))
347  			return -EFAULT;
348  		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
349  		if (!len || len > MAX_ARG_STRLEN)
350  			return -EINVAL;
351  		p += len;
352  	}
353  	if (put_user(0, sp++))
354  		return -EFAULT;
355  	mm->env_end = p;
356  
357  	/* Put the elf_info on the stack in the right place.  */
358  	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
359  		return -EFAULT;
360  	return 0;
361  }
362  
363  static unsigned long elf_map(struct file *filep, unsigned long addr,
364  		const struct elf_phdr *eppnt, int prot, int type,
365  		unsigned long total_size)
366  {
367  	unsigned long map_addr;
368  	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
369  	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
370  	addr = ELF_PAGESTART(addr);
371  	size = ELF_PAGEALIGN(size);
372  
373  	/* mmap() will return -EINVAL if given a zero size, but a
374  	 * segment with zero filesize is perfectly valid */
375  	if (!size)
376  		return addr;
377  
378  	/*
379  	* total_size is the size of the ELF (interpreter) image.
380  	* The _first_ mmap needs to know the full size, otherwise
381  	* randomization might put this image into an overlapping
382  	* position with the ELF binary image. (since size < total_size)
383  	* So we first map the 'big' image - and unmap the remainder at
384  	* the end. (which unmap is needed for ELF images with holes.)
385  	*/
386  	if (total_size) {
387  		total_size = ELF_PAGEALIGN(total_size);
388  		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
389  		if (!BAD_ADDR(map_addr))
390  			vm_munmap(map_addr+size, total_size-size);
391  	} else
392  		map_addr = vm_mmap(filep, addr, size, prot, type, off);
393  
394  	if ((type & MAP_FIXED_NOREPLACE) &&
395  	    PTR_ERR((void *)map_addr) == -EEXIST)
396  		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
397  			task_pid_nr(current), current->comm, (void *)addr);
398  
399  	return(map_addr);
400  }
401  
402  static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
403  {
404  	int i, first_idx = -1, last_idx = -1;
405  
406  	for (i = 0; i < nr; i++) {
407  		if (cmds[i].p_type == PT_LOAD) {
408  			last_idx = i;
409  			if (first_idx == -1)
410  				first_idx = i;
411  		}
412  	}
413  	if (first_idx == -1)
414  		return 0;
415  
416  	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
417  				ELF_PAGESTART(cmds[first_idx].p_vaddr);
418  }
419  
420  static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
421  {
422  	ssize_t rv;
423  
424  	rv = kernel_read(file, buf, len, &pos);
425  	if (unlikely(rv != len)) {
426  		return (rv < 0) ? rv : -EIO;
427  	}
428  	return 0;
429  }
430  
431  static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
432  {
433  	unsigned long alignment = 0;
434  	int i;
435  
436  	for (i = 0; i < nr; i++) {
437  		if (cmds[i].p_type == PT_LOAD) {
438  			unsigned long p_align = cmds[i].p_align;
439  
440  			/* skip non-power of two alignments as invalid */
441  			if (!is_power_of_2(p_align))
442  				continue;
443  			alignment = max(alignment, p_align);
444  		}
445  	}
446  
447  	/* ensure we align to at least one page */
448  	return ELF_PAGEALIGN(alignment);
449  }
450  
451  /**
452   * load_elf_phdrs() - load ELF program headers
453   * @elf_ex:   ELF header of the binary whose program headers should be loaded
454   * @elf_file: the opened ELF binary file
455   *
456   * Loads ELF program headers from the binary file elf_file, which has the ELF
457   * header pointed to by elf_ex, into a newly allocated array. The caller is
458   * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
459   */
460  static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
461  				       struct file *elf_file)
462  {
463  	struct elf_phdr *elf_phdata = NULL;
464  	int retval, err = -1;
465  	unsigned int size;
466  
467  	/*
468  	 * If the size of this structure has changed, then punt, since
469  	 * we will be doing the wrong thing.
470  	 */
471  	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
472  		goto out;
473  
474  	/* Sanity check the number of program headers... */
475  	/* ...and their total size. */
476  	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
477  	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
478  		goto out;
479  
480  	elf_phdata = kmalloc(size, GFP_KERNEL);
481  	if (!elf_phdata)
482  		goto out;
483  
484  	/* Read in the program headers */
485  	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
486  	if (retval < 0) {
487  		err = retval;
488  		goto out;
489  	}
490  
491  	/* Success! */
492  	err = 0;
493  out:
494  	if (err) {
495  		kfree(elf_phdata);
496  		elf_phdata = NULL;
497  	}
498  	return elf_phdata;
499  }
500  
501  #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
502  
503  /**
504   * struct arch_elf_state - arch-specific ELF loading state
505   *
506   * This structure is used to preserve architecture specific data during
507   * the loading of an ELF file, throughout the checking of architecture
508   * specific ELF headers & through to the point where the ELF load is
509   * known to be proceeding (ie. SET_PERSONALITY).
510   *
511   * This implementation is a dummy for architectures which require no
512   * specific state.
513   */
514  struct arch_elf_state {
515  };
516  
517  #define INIT_ARCH_ELF_STATE {}
518  
519  /**
520   * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
521   * @ehdr:	The main ELF header
522   * @phdr:	The program header to check
523   * @elf:	The open ELF file
524   * @is_interp:	True if the phdr is from the interpreter of the ELF being
525   *		loaded, else false.
526   * @state:	Architecture-specific state preserved throughout the process
527   *		of loading the ELF.
528   *
529   * Inspects the program header phdr to validate its correctness and/or
530   * suitability for the system. Called once per ELF program header in the
531   * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
532   * interpreter.
533   *
534   * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
535   *         with that return code.
536   */
537  static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
538  				   struct elf_phdr *phdr,
539  				   struct file *elf, bool is_interp,
540  				   struct arch_elf_state *state)
541  {
542  	/* Dummy implementation, always proceed */
543  	return 0;
544  }
545  
546  /**
547   * arch_check_elf() - check an ELF executable
548   * @ehdr:	The main ELF header
549   * @has_interp:	True if the ELF has an interpreter, else false.
550   * @interp_ehdr: The interpreter's ELF header
551   * @state:	Architecture-specific state preserved throughout the process
552   *		of loading the ELF.
553   *
554   * Provides a final opportunity for architecture code to reject the loading
555   * of the ELF & cause an exec syscall to return an error. This is called after
556   * all program headers to be checked by arch_elf_pt_proc have been.
557   *
558   * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
559   *         with that return code.
560   */
561  static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
562  				 struct elfhdr *interp_ehdr,
563  				 struct arch_elf_state *state)
564  {
565  	/* Dummy implementation, always proceed */
566  	return 0;
567  }
568  
569  #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
570  
571  static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
572  			    bool has_interp, bool is_interp)
573  {
574  	int prot = 0;
575  
576  	if (p_flags & PF_R)
577  		prot |= PROT_READ;
578  	if (p_flags & PF_W)
579  		prot |= PROT_WRITE;
580  	if (p_flags & PF_X)
581  		prot |= PROT_EXEC;
582  
583  	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
584  }
585  
586  /* This is much more generalized than the library routine read function,
587     so we keep this separate.  Technically the library read function
588     is only provided so that we can read a.out libraries that have
589     an ELF header */
590  
591  static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
592  		struct file *interpreter,
593  		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
594  		struct arch_elf_state *arch_state)
595  {
596  	struct elf_phdr *eppnt;
597  	unsigned long load_addr = 0;
598  	int load_addr_set = 0;
599  	unsigned long last_bss = 0, elf_bss = 0;
600  	int bss_prot = 0;
601  	unsigned long error = ~0UL;
602  	unsigned long total_size;
603  	int i;
604  
605  	/* First of all, some simple consistency checks */
606  	if (interp_elf_ex->e_type != ET_EXEC &&
607  	    interp_elf_ex->e_type != ET_DYN)
608  		goto out;
609  	if (!elf_check_arch(interp_elf_ex) ||
610  	    elf_check_fdpic(interp_elf_ex))
611  		goto out;
612  	if (!interpreter->f_op->mmap)
613  		goto out;
614  
615  	total_size = total_mapping_size(interp_elf_phdata,
616  					interp_elf_ex->e_phnum);
617  	if (!total_size) {
618  		error = -EINVAL;
619  		goto out;
620  	}
621  
622  	eppnt = interp_elf_phdata;
623  	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
624  		if (eppnt->p_type == PT_LOAD) {
625  			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
626  			int elf_prot = make_prot(eppnt->p_flags, arch_state,
627  						 true, true);
628  			unsigned long vaddr = 0;
629  			unsigned long k, map_addr;
630  
631  			vaddr = eppnt->p_vaddr;
632  			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
633  				elf_type |= MAP_FIXED_NOREPLACE;
634  			else if (no_base && interp_elf_ex->e_type == ET_DYN)
635  				load_addr = -vaddr;
636  
637  			map_addr = elf_map(interpreter, load_addr + vaddr,
638  					eppnt, elf_prot, elf_type, total_size);
639  			total_size = 0;
640  			error = map_addr;
641  			if (BAD_ADDR(map_addr))
642  				goto out;
643  
644  			if (!load_addr_set &&
645  			    interp_elf_ex->e_type == ET_DYN) {
646  				load_addr = map_addr - ELF_PAGESTART(vaddr);
647  				load_addr_set = 1;
648  			}
649  
650  			/*
651  			 * Check to see if the section's size will overflow the
652  			 * allowed task size. Note that p_filesz must always be
653  			 * <= p_memsize so it's only necessary to check p_memsz.
654  			 */
655  			k = load_addr + eppnt->p_vaddr;
656  			if (BAD_ADDR(k) ||
657  			    eppnt->p_filesz > eppnt->p_memsz ||
658  			    eppnt->p_memsz > TASK_SIZE ||
659  			    TASK_SIZE - eppnt->p_memsz < k) {
660  				error = -ENOMEM;
661  				goto out;
662  			}
663  
664  			/*
665  			 * Find the end of the file mapping for this phdr, and
666  			 * keep track of the largest address we see for this.
667  			 */
668  			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
669  			if (k > elf_bss)
670  				elf_bss = k;
671  
672  			/*
673  			 * Do the same thing for the memory mapping - between
674  			 * elf_bss and last_bss is the bss section.
675  			 */
676  			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
677  			if (k > last_bss) {
678  				last_bss = k;
679  				bss_prot = elf_prot;
680  			}
681  		}
682  	}
683  
684  	/*
685  	 * Now fill out the bss section: first pad the last page from
686  	 * the file up to the page boundary, and zero it from elf_bss
687  	 * up to the end of the page.
688  	 */
689  	if (padzero(elf_bss)) {
690  		error = -EFAULT;
691  		goto out;
692  	}
693  	/*
694  	 * Next, align both the file and mem bss up to the page size,
695  	 * since this is where elf_bss was just zeroed up to, and where
696  	 * last_bss will end after the vm_brk_flags() below.
697  	 */
698  	elf_bss = ELF_PAGEALIGN(elf_bss);
699  	last_bss = ELF_PAGEALIGN(last_bss);
700  	/* Finally, if there is still more bss to allocate, do it. */
701  	if (last_bss > elf_bss) {
702  		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
703  				bss_prot & PROT_EXEC ? VM_EXEC : 0);
704  		if (error)
705  			goto out;
706  	}
707  
708  	error = load_addr;
709  out:
710  	return error;
711  }
712  
713  /*
714   * These are the functions used to load ELF style executables and shared
715   * libraries.  There is no binary dependent code anywhere else.
716   */
717  
718  static int parse_elf_property(const char *data, size_t *off, size_t datasz,
719  			      struct arch_elf_state *arch,
720  			      bool have_prev_type, u32 *prev_type)
721  {
722  	size_t o, step;
723  	const struct gnu_property *pr;
724  	int ret;
725  
726  	if (*off == datasz)
727  		return -ENOENT;
728  
729  	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
730  		return -EIO;
731  	o = *off;
732  	datasz -= *off;
733  
734  	if (datasz < sizeof(*pr))
735  		return -ENOEXEC;
736  	pr = (const struct gnu_property *)(data + o);
737  	o += sizeof(*pr);
738  	datasz -= sizeof(*pr);
739  
740  	if (pr->pr_datasz > datasz)
741  		return -ENOEXEC;
742  
743  	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
744  	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
745  	if (step > datasz)
746  		return -ENOEXEC;
747  
748  	/* Properties are supposed to be unique and sorted on pr_type: */
749  	if (have_prev_type && pr->pr_type <= *prev_type)
750  		return -ENOEXEC;
751  	*prev_type = pr->pr_type;
752  
753  	ret = arch_parse_elf_property(pr->pr_type, data + o,
754  				      pr->pr_datasz, ELF_COMPAT, arch);
755  	if (ret)
756  		return ret;
757  
758  	*off = o + step;
759  	return 0;
760  }
761  
762  #define NOTE_DATA_SZ SZ_1K
763  #define GNU_PROPERTY_TYPE_0_NAME "GNU"
764  #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
765  
766  static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
767  				struct arch_elf_state *arch)
768  {
769  	union {
770  		struct elf_note nhdr;
771  		char data[NOTE_DATA_SZ];
772  	} note;
773  	loff_t pos;
774  	ssize_t n;
775  	size_t off, datasz;
776  	int ret;
777  	bool have_prev_type;
778  	u32 prev_type;
779  
780  	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
781  		return 0;
782  
783  	/* load_elf_binary() shouldn't call us unless this is true... */
784  	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
785  		return -ENOEXEC;
786  
787  	/* If the properties are crazy large, that's too bad (for now): */
788  	if (phdr->p_filesz > sizeof(note))
789  		return -ENOEXEC;
790  
791  	pos = phdr->p_offset;
792  	n = kernel_read(f, &note, phdr->p_filesz, &pos);
793  
794  	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
795  	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
796  		return -EIO;
797  
798  	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
799  	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
800  	    strncmp(note.data + sizeof(note.nhdr),
801  		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
802  		return -ENOEXEC;
803  
804  	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
805  		       ELF_GNU_PROPERTY_ALIGN);
806  	if (off > n)
807  		return -ENOEXEC;
808  
809  	if (note.nhdr.n_descsz > n - off)
810  		return -ENOEXEC;
811  	datasz = off + note.nhdr.n_descsz;
812  
813  	have_prev_type = false;
814  	do {
815  		ret = parse_elf_property(note.data, &off, datasz, arch,
816  					 have_prev_type, &prev_type);
817  		have_prev_type = true;
818  	} while (!ret);
819  
820  	return ret == -ENOENT ? 0 : ret;
821  }
822  
823  static int load_elf_binary(struct linux_binprm *bprm)
824  {
825  	struct file *interpreter = NULL; /* to shut gcc up */
826   	unsigned long load_addr = 0, load_bias = 0;
827  	int load_addr_set = 0;
828  	unsigned long error;
829  	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
830  	struct elf_phdr *elf_property_phdata = NULL;
831  	unsigned long elf_bss, elf_brk;
832  	int bss_prot = 0;
833  	int retval, i;
834  	unsigned long elf_entry;
835  	unsigned long e_entry;
836  	unsigned long interp_load_addr = 0;
837  	unsigned long start_code, end_code, start_data, end_data;
838  	unsigned long reloc_func_desc __maybe_unused = 0;
839  	int executable_stack = EXSTACK_DEFAULT;
840  	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
841  	struct elfhdr *interp_elf_ex = NULL;
842  	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
843  	struct mm_struct *mm;
844  	struct pt_regs *regs;
845  
846  	retval = -ENOEXEC;
847  	/* First of all, some simple consistency checks */
848  	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
849  		goto out;
850  
851  	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
852  		goto out;
853  	if (!elf_check_arch(elf_ex))
854  		goto out;
855  	if (elf_check_fdpic(elf_ex))
856  		goto out;
857  	if (!bprm->file->f_op->mmap)
858  		goto out;
859  
860  	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
861  	if (!elf_phdata)
862  		goto out;
863  
864  	elf_ppnt = elf_phdata;
865  	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
866  		char *elf_interpreter;
867  
868  		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
869  			elf_property_phdata = elf_ppnt;
870  			continue;
871  		}
872  
873  		if (elf_ppnt->p_type != PT_INTERP)
874  			continue;
875  
876  		/*
877  		 * This is the program interpreter used for shared libraries -
878  		 * for now assume that this is an a.out format binary.
879  		 */
880  		retval = -ENOEXEC;
881  		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
882  			goto out_free_ph;
883  
884  		retval = -ENOMEM;
885  		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
886  		if (!elf_interpreter)
887  			goto out_free_ph;
888  
889  		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
890  				  elf_ppnt->p_offset);
891  		if (retval < 0)
892  			goto out_free_interp;
893  		/* make sure path is NULL terminated */
894  		retval = -ENOEXEC;
895  		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
896  			goto out_free_interp;
897  
898  		interpreter = open_exec(elf_interpreter);
899  		kfree(elf_interpreter);
900  		retval = PTR_ERR(interpreter);
901  		if (IS_ERR(interpreter))
902  			goto out_free_ph;
903  
904  		/*
905  		 * If the binary is not readable then enforce mm->dumpable = 0
906  		 * regardless of the interpreter's permissions.
907  		 */
908  		would_dump(bprm, interpreter);
909  
910  		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
911  		if (!interp_elf_ex) {
912  			retval = -ENOMEM;
913  			goto out_free_ph;
914  		}
915  
916  		/* Get the exec headers */
917  		retval = elf_read(interpreter, interp_elf_ex,
918  				  sizeof(*interp_elf_ex), 0);
919  		if (retval < 0)
920  			goto out_free_dentry;
921  
922  		break;
923  
924  out_free_interp:
925  		kfree(elf_interpreter);
926  		goto out_free_ph;
927  	}
928  
929  	elf_ppnt = elf_phdata;
930  	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
931  		switch (elf_ppnt->p_type) {
932  		case PT_GNU_STACK:
933  			if (elf_ppnt->p_flags & PF_X)
934  				executable_stack = EXSTACK_ENABLE_X;
935  			else
936  				executable_stack = EXSTACK_DISABLE_X;
937  			break;
938  
939  		case PT_LOPROC ... PT_HIPROC:
940  			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
941  						  bprm->file, false,
942  						  &arch_state);
943  			if (retval)
944  				goto out_free_dentry;
945  			break;
946  		}
947  
948  	/* Some simple consistency checks for the interpreter */
949  	if (interpreter) {
950  		retval = -ELIBBAD;
951  		/* Not an ELF interpreter */
952  		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
953  			goto out_free_dentry;
954  		/* Verify the interpreter has a valid arch */
955  		if (!elf_check_arch(interp_elf_ex) ||
956  		    elf_check_fdpic(interp_elf_ex))
957  			goto out_free_dentry;
958  
959  		/* Load the interpreter program headers */
960  		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
961  						   interpreter);
962  		if (!interp_elf_phdata)
963  			goto out_free_dentry;
964  
965  		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
966  		elf_property_phdata = NULL;
967  		elf_ppnt = interp_elf_phdata;
968  		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
969  			switch (elf_ppnt->p_type) {
970  			case PT_GNU_PROPERTY:
971  				elf_property_phdata = elf_ppnt;
972  				break;
973  
974  			case PT_LOPROC ... PT_HIPROC:
975  				retval = arch_elf_pt_proc(interp_elf_ex,
976  							  elf_ppnt, interpreter,
977  							  true, &arch_state);
978  				if (retval)
979  					goto out_free_dentry;
980  				break;
981  			}
982  	}
983  
984  	retval = parse_elf_properties(interpreter ?: bprm->file,
985  				      elf_property_phdata, &arch_state);
986  	if (retval)
987  		goto out_free_dentry;
988  
989  	/*
990  	 * Allow arch code to reject the ELF at this point, whilst it's
991  	 * still possible to return an error to the code that invoked
992  	 * the exec syscall.
993  	 */
994  	retval = arch_check_elf(elf_ex,
995  				!!interpreter, interp_elf_ex,
996  				&arch_state);
997  	if (retval)
998  		goto out_free_dentry;
999  
1000  	/* Flush all traces of the currently running executable */
1001  	retval = begin_new_exec(bprm);
1002  	if (retval)
1003  		goto out_free_dentry;
1004  
1005  	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006  	   may depend on the personality.  */
1007  	SET_PERSONALITY2(*elf_ex, &arch_state);
1008  	if (elf_read_implies_exec(*elf_ex, executable_stack))
1009  		current->personality |= READ_IMPLIES_EXEC;
1010  
1011  	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1012  		current->flags |= PF_RANDOMIZE;
1013  
1014  	setup_new_exec(bprm);
1015  
1016  	/* Do this so that we can load the interpreter, if need be.  We will
1017  	   change some of these later */
1018  	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1019  				 executable_stack);
1020  	if (retval < 0)
1021  		goto out_free_dentry;
1022  
1023  	elf_bss = 0;
1024  	elf_brk = 0;
1025  
1026  	start_code = ~0UL;
1027  	end_code = 0;
1028  	start_data = 0;
1029  	end_data = 0;
1030  
1031  	/* Now we do a little grungy work by mmapping the ELF image into
1032  	   the correct location in memory. */
1033  	for(i = 0, elf_ppnt = elf_phdata;
1034  	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1035  		int elf_prot, elf_flags;
1036  		unsigned long k, vaddr;
1037  		unsigned long total_size = 0;
1038  		unsigned long alignment;
1039  
1040  		if (elf_ppnt->p_type != PT_LOAD)
1041  			continue;
1042  
1043  		if (unlikely (elf_brk > elf_bss)) {
1044  			unsigned long nbyte;
1045  
1046  			/* There was a PT_LOAD segment with p_memsz > p_filesz
1047  			   before this one. Map anonymous pages, if needed,
1048  			   and clear the area.  */
1049  			retval = set_brk(elf_bss + load_bias,
1050  					 elf_brk + load_bias,
1051  					 bss_prot);
1052  			if (retval)
1053  				goto out_free_dentry;
1054  			nbyte = ELF_PAGEOFFSET(elf_bss);
1055  			if (nbyte) {
1056  				nbyte = ELF_MIN_ALIGN - nbyte;
1057  				if (nbyte > elf_brk - elf_bss)
1058  					nbyte = elf_brk - elf_bss;
1059  				if (clear_user((void __user *)elf_bss +
1060  							load_bias, nbyte)) {
1061  					/*
1062  					 * This bss-zeroing can fail if the ELF
1063  					 * file specifies odd protections. So
1064  					 * we don't check the return value
1065  					 */
1066  				}
1067  			}
1068  		}
1069  
1070  		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1071  				     !!interpreter, false);
1072  
1073  		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1074  
1075  		vaddr = elf_ppnt->p_vaddr;
1076  		/*
1077  		 * If we are loading ET_EXEC or we have already performed
1078  		 * the ET_DYN load_addr calculations, proceed normally.
1079  		 */
1080  		if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1081  			elf_flags |= MAP_FIXED;
1082  		} else if (elf_ex->e_type == ET_DYN) {
1083  			/*
1084  			 * This logic is run once for the first LOAD Program
1085  			 * Header for ET_DYN binaries to calculate the
1086  			 * randomization (load_bias) for all the LOAD
1087  			 * Program Headers, and to calculate the entire
1088  			 * size of the ELF mapping (total_size). (Note that
1089  			 * load_addr_set is set to true later once the
1090  			 * initial mapping is performed.)
1091  			 *
1092  			 * There are effectively two types of ET_DYN
1093  			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1094  			 * and loaders (ET_DYN without INTERP, since they
1095  			 * _are_ the ELF interpreter). The loaders must
1096  			 * be loaded away from programs since the program
1097  			 * may otherwise collide with the loader (especially
1098  			 * for ET_EXEC which does not have a randomized
1099  			 * position). For example to handle invocations of
1100  			 * "./ld.so someprog" to test out a new version of
1101  			 * the loader, the subsequent program that the
1102  			 * loader loads must avoid the loader itself, so
1103  			 * they cannot share the same load range. Sufficient
1104  			 * room for the brk must be allocated with the
1105  			 * loader as well, since brk must be available with
1106  			 * the loader.
1107  			 *
1108  			 * Therefore, programs are loaded offset from
1109  			 * ELF_ET_DYN_BASE and loaders are loaded into the
1110  			 * independently randomized mmap region (0 load_bias
1111  			 * without MAP_FIXED).
1112  			 */
1113  			if (interpreter) {
1114  				load_bias = ELF_ET_DYN_BASE;
1115  				if (current->flags & PF_RANDOMIZE)
1116  					load_bias += arch_mmap_rnd();
1117  				alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1118  				if (alignment)
1119  					load_bias &= ~(alignment - 1);
1120  				elf_flags |= MAP_FIXED;
1121  			} else
1122  				load_bias = 0;
1123  
1124  			/*
1125  			 * Since load_bias is used for all subsequent loading
1126  			 * calculations, we must lower it by the first vaddr
1127  			 * so that the remaining calculations based on the
1128  			 * ELF vaddrs will be correctly offset. The result
1129  			 * is then page aligned.
1130  			 */
1131  			load_bias = ELF_PAGESTART(load_bias - vaddr);
1132  
1133  			total_size = total_mapping_size(elf_phdata,
1134  							elf_ex->e_phnum);
1135  			if (!total_size) {
1136  				retval = -EINVAL;
1137  				goto out_free_dentry;
1138  			}
1139  		}
1140  
1141  		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1142  				elf_prot, elf_flags, total_size);
1143  		if (BAD_ADDR(error)) {
1144  			retval = IS_ERR((void *)error) ?
1145  				PTR_ERR((void*)error) : -EINVAL;
1146  			goto out_free_dentry;
1147  		}
1148  
1149  		if (!load_addr_set) {
1150  			load_addr_set = 1;
1151  			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1152  			if (elf_ex->e_type == ET_DYN) {
1153  				load_bias += error -
1154  				             ELF_PAGESTART(load_bias + vaddr);
1155  				load_addr += load_bias;
1156  				reloc_func_desc = load_bias;
1157  			}
1158  		}
1159  		k = elf_ppnt->p_vaddr;
1160  		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1161  			start_code = k;
1162  		if (start_data < k)
1163  			start_data = k;
1164  
1165  		/*
1166  		 * Check to see if the section's size will overflow the
1167  		 * allowed task size. Note that p_filesz must always be
1168  		 * <= p_memsz so it is only necessary to check p_memsz.
1169  		 */
1170  		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1171  		    elf_ppnt->p_memsz > TASK_SIZE ||
1172  		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1173  			/* set_brk can never work. Avoid overflows. */
1174  			retval = -EINVAL;
1175  			goto out_free_dentry;
1176  		}
1177  
1178  		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1179  
1180  		if (k > elf_bss)
1181  			elf_bss = k;
1182  		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1183  			end_code = k;
1184  		if (end_data < k)
1185  			end_data = k;
1186  		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1187  		if (k > elf_brk) {
1188  			bss_prot = elf_prot;
1189  			elf_brk = k;
1190  		}
1191  	}
1192  
1193  	e_entry = elf_ex->e_entry + load_bias;
1194  	elf_bss += load_bias;
1195  	elf_brk += load_bias;
1196  	start_code += load_bias;
1197  	end_code += load_bias;
1198  	start_data += load_bias;
1199  	end_data += load_bias;
1200  
1201  	/* Calling set_brk effectively mmaps the pages that we need
1202  	 * for the bss and break sections.  We must do this before
1203  	 * mapping in the interpreter, to make sure it doesn't wind
1204  	 * up getting placed where the bss needs to go.
1205  	 */
1206  	retval = set_brk(elf_bss, elf_brk, bss_prot);
1207  	if (retval)
1208  		goto out_free_dentry;
1209  	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1210  		retval = -EFAULT; /* Nobody gets to see this, but.. */
1211  		goto out_free_dentry;
1212  	}
1213  
1214  	if (interpreter) {
1215  		elf_entry = load_elf_interp(interp_elf_ex,
1216  					    interpreter,
1217  					    load_bias, interp_elf_phdata,
1218  					    &arch_state);
1219  		if (!IS_ERR((void *)elf_entry)) {
1220  			/*
1221  			 * load_elf_interp() returns relocation
1222  			 * adjustment
1223  			 */
1224  			interp_load_addr = elf_entry;
1225  			elf_entry += interp_elf_ex->e_entry;
1226  		}
1227  		if (BAD_ADDR(elf_entry)) {
1228  			retval = IS_ERR((void *)elf_entry) ?
1229  					(int)elf_entry : -EINVAL;
1230  			goto out_free_dentry;
1231  		}
1232  		reloc_func_desc = interp_load_addr;
1233  
1234  		allow_write_access(interpreter);
1235  		fput(interpreter);
1236  
1237  		kfree(interp_elf_ex);
1238  		kfree(interp_elf_phdata);
1239  	} else {
1240  		elf_entry = e_entry;
1241  		if (BAD_ADDR(elf_entry)) {
1242  			retval = -EINVAL;
1243  			goto out_free_dentry;
1244  		}
1245  	}
1246  
1247  	kfree(elf_phdata);
1248  
1249  	set_binfmt(&elf_format);
1250  
1251  #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1252  	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1253  	if (retval < 0)
1254  		goto out;
1255  #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1256  
1257  	retval = create_elf_tables(bprm, elf_ex,
1258  			  load_addr, interp_load_addr, e_entry);
1259  	if (retval < 0)
1260  		goto out;
1261  
1262  	mm = current->mm;
1263  	mm->end_code = end_code;
1264  	mm->start_code = start_code;
1265  	mm->start_data = start_data;
1266  	mm->end_data = end_data;
1267  	mm->start_stack = bprm->p;
1268  
1269  	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1270  		/*
1271  		 * For architectures with ELF randomization, when executing
1272  		 * a loader directly (i.e. no interpreter listed in ELF
1273  		 * headers), move the brk area out of the mmap region
1274  		 * (since it grows up, and may collide early with the stack
1275  		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1276  		 */
1277  		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1278  		    elf_ex->e_type == ET_DYN && !interpreter) {
1279  			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1280  		}
1281  
1282  		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1283  #ifdef compat_brk_randomized
1284  		current->brk_randomized = 1;
1285  #endif
1286  	}
1287  
1288  	if (current->personality & MMAP_PAGE_ZERO) {
1289  		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1290  		   and some applications "depend" upon this behavior.
1291  		   Since we do not have the power to recompile these, we
1292  		   emulate the SVr4 behavior. Sigh. */
1293  		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1294  				MAP_FIXED | MAP_PRIVATE, 0);
1295  	}
1296  
1297  	regs = current_pt_regs();
1298  #ifdef ELF_PLAT_INIT
1299  	/*
1300  	 * The ABI may specify that certain registers be set up in special
1301  	 * ways (on i386 %edx is the address of a DT_FINI function, for
1302  	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1303  	 * that the e_entry field is the address of the function descriptor
1304  	 * for the startup routine, rather than the address of the startup
1305  	 * routine itself.  This macro performs whatever initialization to
1306  	 * the regs structure is required as well as any relocations to the
1307  	 * function descriptor entries when executing dynamically links apps.
1308  	 */
1309  	ELF_PLAT_INIT(regs, reloc_func_desc);
1310  #endif
1311  
1312  	finalize_exec(bprm);
1313  	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1314  	retval = 0;
1315  out:
1316  	return retval;
1317  
1318  	/* error cleanup */
1319  out_free_dentry:
1320  	kfree(interp_elf_ex);
1321  	kfree(interp_elf_phdata);
1322  	allow_write_access(interpreter);
1323  	if (interpreter)
1324  		fput(interpreter);
1325  out_free_ph:
1326  	kfree(elf_phdata);
1327  	goto out;
1328  }
1329  
1330  #ifdef CONFIG_USELIB
1331  /* This is really simpleminded and specialized - we are loading an
1332     a.out library that is given an ELF header. */
1333  static int load_elf_library(struct file *file)
1334  {
1335  	struct elf_phdr *elf_phdata;
1336  	struct elf_phdr *eppnt;
1337  	unsigned long elf_bss, bss, len;
1338  	int retval, error, i, j;
1339  	struct elfhdr elf_ex;
1340  
1341  	error = -ENOEXEC;
1342  	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1343  	if (retval < 0)
1344  		goto out;
1345  
1346  	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1347  		goto out;
1348  
1349  	/* First of all, some simple consistency checks */
1350  	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1351  	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1352  		goto out;
1353  	if (elf_check_fdpic(&elf_ex))
1354  		goto out;
1355  
1356  	/* Now read in all of the header information */
1357  
1358  	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1359  	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1360  
1361  	error = -ENOMEM;
1362  	elf_phdata = kmalloc(j, GFP_KERNEL);
1363  	if (!elf_phdata)
1364  		goto out;
1365  
1366  	eppnt = elf_phdata;
1367  	error = -ENOEXEC;
1368  	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1369  	if (retval < 0)
1370  		goto out_free_ph;
1371  
1372  	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1373  		if ((eppnt + i)->p_type == PT_LOAD)
1374  			j++;
1375  	if (j != 1)
1376  		goto out_free_ph;
1377  
1378  	while (eppnt->p_type != PT_LOAD)
1379  		eppnt++;
1380  
1381  	/* Now use mmap to map the library into memory. */
1382  	error = vm_mmap(file,
1383  			ELF_PAGESTART(eppnt->p_vaddr),
1384  			(eppnt->p_filesz +
1385  			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1386  			PROT_READ | PROT_WRITE | PROT_EXEC,
1387  			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1388  			(eppnt->p_offset -
1389  			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1390  	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1391  		goto out_free_ph;
1392  
1393  	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1394  	if (padzero(elf_bss)) {
1395  		error = -EFAULT;
1396  		goto out_free_ph;
1397  	}
1398  
1399  	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1400  	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1401  	if (bss > len) {
1402  		error = vm_brk(len, bss - len);
1403  		if (error)
1404  			goto out_free_ph;
1405  	}
1406  	error = 0;
1407  
1408  out_free_ph:
1409  	kfree(elf_phdata);
1410  out:
1411  	return error;
1412  }
1413  #endif /* #ifdef CONFIG_USELIB */
1414  
1415  #ifdef CONFIG_ELF_CORE
1416  /*
1417   * ELF core dumper
1418   *
1419   * Modelled on fs/exec.c:aout_core_dump()
1420   * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1421   */
1422  
1423  /* An ELF note in memory */
1424  struct memelfnote
1425  {
1426  	const char *name;
1427  	int type;
1428  	unsigned int datasz;
1429  	void *data;
1430  };
1431  
1432  static int notesize(struct memelfnote *en)
1433  {
1434  	int sz;
1435  
1436  	sz = sizeof(struct elf_note);
1437  	sz += roundup(strlen(en->name) + 1, 4);
1438  	sz += roundup(en->datasz, 4);
1439  
1440  	return sz;
1441  }
1442  
1443  static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1444  {
1445  	struct elf_note en;
1446  	en.n_namesz = strlen(men->name) + 1;
1447  	en.n_descsz = men->datasz;
1448  	en.n_type = men->type;
1449  
1450  	return dump_emit(cprm, &en, sizeof(en)) &&
1451  	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1452  	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1453  }
1454  
1455  static void fill_elf_header(struct elfhdr *elf, int segs,
1456  			    u16 machine, u32 flags)
1457  {
1458  	memset(elf, 0, sizeof(*elf));
1459  
1460  	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1461  	elf->e_ident[EI_CLASS] = ELF_CLASS;
1462  	elf->e_ident[EI_DATA] = ELF_DATA;
1463  	elf->e_ident[EI_VERSION] = EV_CURRENT;
1464  	elf->e_ident[EI_OSABI] = ELF_OSABI;
1465  
1466  	elf->e_type = ET_CORE;
1467  	elf->e_machine = machine;
1468  	elf->e_version = EV_CURRENT;
1469  	elf->e_phoff = sizeof(struct elfhdr);
1470  	elf->e_flags = flags;
1471  	elf->e_ehsize = sizeof(struct elfhdr);
1472  	elf->e_phentsize = sizeof(struct elf_phdr);
1473  	elf->e_phnum = segs;
1474  }
1475  
1476  static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1477  {
1478  	phdr->p_type = PT_NOTE;
1479  	phdr->p_offset = offset;
1480  	phdr->p_vaddr = 0;
1481  	phdr->p_paddr = 0;
1482  	phdr->p_filesz = sz;
1483  	phdr->p_memsz = 0;
1484  	phdr->p_flags = 0;
1485  	phdr->p_align = 0;
1486  }
1487  
1488  static void fill_note(struct memelfnote *note, const char *name, int type,
1489  		unsigned int sz, void *data)
1490  {
1491  	note->name = name;
1492  	note->type = type;
1493  	note->datasz = sz;
1494  	note->data = data;
1495  }
1496  
1497  /*
1498   * fill up all the fields in prstatus from the given task struct, except
1499   * registers which need to be filled up separately.
1500   */
1501  static void fill_prstatus(struct elf_prstatus_common *prstatus,
1502  		struct task_struct *p, long signr)
1503  {
1504  	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1505  	prstatus->pr_sigpend = p->pending.signal.sig[0];
1506  	prstatus->pr_sighold = p->blocked.sig[0];
1507  	rcu_read_lock();
1508  	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1509  	rcu_read_unlock();
1510  	prstatus->pr_pid = task_pid_vnr(p);
1511  	prstatus->pr_pgrp = task_pgrp_vnr(p);
1512  	prstatus->pr_sid = task_session_vnr(p);
1513  	if (thread_group_leader(p)) {
1514  		struct task_cputime cputime;
1515  
1516  		/*
1517  		 * This is the record for the group leader.  It shows the
1518  		 * group-wide total, not its individual thread total.
1519  		 */
1520  		thread_group_cputime(p, &cputime);
1521  		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1522  		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1523  	} else {
1524  		u64 utime, stime;
1525  
1526  		task_cputime(p, &utime, &stime);
1527  		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1528  		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1529  	}
1530  
1531  	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1532  	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1533  }
1534  
1535  static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1536  		       struct mm_struct *mm)
1537  {
1538  	const struct cred *cred;
1539  	unsigned int i, len;
1540  
1541  	/* first copy the parameters from user space */
1542  	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1543  
1544  	len = mm->arg_end - mm->arg_start;
1545  	if (len >= ELF_PRARGSZ)
1546  		len = ELF_PRARGSZ-1;
1547  	if (copy_from_user(&psinfo->pr_psargs,
1548  		           (const char __user *)mm->arg_start, len))
1549  		return -EFAULT;
1550  	for(i = 0; i < len; i++)
1551  		if (psinfo->pr_psargs[i] == 0)
1552  			psinfo->pr_psargs[i] = ' ';
1553  	psinfo->pr_psargs[len] = 0;
1554  
1555  	rcu_read_lock();
1556  	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1557  	rcu_read_unlock();
1558  	psinfo->pr_pid = task_pid_vnr(p);
1559  	psinfo->pr_pgrp = task_pgrp_vnr(p);
1560  	psinfo->pr_sid = task_session_vnr(p);
1561  
1562  	i = p->state ? ffz(~p->state) + 1 : 0;
1563  	psinfo->pr_state = i;
1564  	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1565  	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1566  	psinfo->pr_nice = task_nice(p);
1567  	psinfo->pr_flag = p->flags;
1568  	rcu_read_lock();
1569  	cred = __task_cred(p);
1570  	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1571  	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1572  	rcu_read_unlock();
1573  	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1574  
1575  	return 0;
1576  }
1577  
1578  static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1579  {
1580  	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1581  	int i = 0;
1582  	do
1583  		i += 2;
1584  	while (auxv[i - 2] != AT_NULL);
1585  	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1586  }
1587  
1588  static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1589  		const kernel_siginfo_t *siginfo)
1590  {
1591  	copy_siginfo_to_external(csigdata, siginfo);
1592  	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1593  }
1594  
1595  #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1596  /*
1597   * Format of NT_FILE note:
1598   *
1599   * long count     -- how many files are mapped
1600   * long page_size -- units for file_ofs
1601   * array of [COUNT] elements of
1602   *   long start
1603   *   long end
1604   *   long file_ofs
1605   * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1606   */
1607  static int fill_files_note(struct memelfnote *note)
1608  {
1609  	struct mm_struct *mm = current->mm;
1610  	struct vm_area_struct *vma;
1611  	unsigned count, size, names_ofs, remaining, n;
1612  	user_long_t *data;
1613  	user_long_t *start_end_ofs;
1614  	char *name_base, *name_curpos;
1615  
1616  	/* *Estimated* file count and total data size needed */
1617  	count = mm->map_count;
1618  	if (count > UINT_MAX / 64)
1619  		return -EINVAL;
1620  	size = count * 64;
1621  
1622  	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1623   alloc:
1624  	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1625  		return -EINVAL;
1626  	size = round_up(size, PAGE_SIZE);
1627  	/*
1628  	 * "size" can be 0 here legitimately.
1629  	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1630  	 */
1631  	data = kvmalloc(size, GFP_KERNEL);
1632  	if (ZERO_OR_NULL_PTR(data))
1633  		return -ENOMEM;
1634  
1635  	start_end_ofs = data + 2;
1636  	name_base = name_curpos = ((char *)data) + names_ofs;
1637  	remaining = size - names_ofs;
1638  	count = 0;
1639  	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1640  		struct file *file;
1641  		const char *filename;
1642  
1643  		file = vma->vm_file;
1644  		if (!file)
1645  			continue;
1646  		filename = file_path(file, name_curpos, remaining);
1647  		if (IS_ERR(filename)) {
1648  			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1649  				kvfree(data);
1650  				size = size * 5 / 4;
1651  				goto alloc;
1652  			}
1653  			continue;
1654  		}
1655  
1656  		/* file_path() fills at the end, move name down */
1657  		/* n = strlen(filename) + 1: */
1658  		n = (name_curpos + remaining) - filename;
1659  		remaining = filename - name_curpos;
1660  		memmove(name_curpos, filename, n);
1661  		name_curpos += n;
1662  
1663  		*start_end_ofs++ = vma->vm_start;
1664  		*start_end_ofs++ = vma->vm_end;
1665  		*start_end_ofs++ = vma->vm_pgoff;
1666  		count++;
1667  	}
1668  
1669  	/* Now we know exact count of files, can store it */
1670  	data[0] = count;
1671  	data[1] = PAGE_SIZE;
1672  	/*
1673  	 * Count usually is less than mm->map_count,
1674  	 * we need to move filenames down.
1675  	 */
1676  	n = mm->map_count - count;
1677  	if (n != 0) {
1678  		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1679  		memmove(name_base - shift_bytes, name_base,
1680  			name_curpos - name_base);
1681  		name_curpos -= shift_bytes;
1682  	}
1683  
1684  	size = name_curpos - (char *)data;
1685  	fill_note(note, "CORE", NT_FILE, size, data);
1686  	return 0;
1687  }
1688  
1689  #ifdef CORE_DUMP_USE_REGSET
1690  #include <linux/regset.h>
1691  
1692  struct elf_thread_core_info {
1693  	struct elf_thread_core_info *next;
1694  	struct task_struct *task;
1695  	struct elf_prstatus prstatus;
1696  	struct memelfnote notes[];
1697  };
1698  
1699  struct elf_note_info {
1700  	struct elf_thread_core_info *thread;
1701  	struct memelfnote psinfo;
1702  	struct memelfnote signote;
1703  	struct memelfnote auxv;
1704  	struct memelfnote files;
1705  	user_siginfo_t csigdata;
1706  	size_t size;
1707  	int thread_notes;
1708  };
1709  
1710  /*
1711   * When a regset has a writeback hook, we call it on each thread before
1712   * dumping user memory.  On register window machines, this makes sure the
1713   * user memory backing the register data is up to date before we read it.
1714   */
1715  static void do_thread_regset_writeback(struct task_struct *task,
1716  				       const struct user_regset *regset)
1717  {
1718  	if (regset->writeback)
1719  		regset->writeback(task, regset, 1);
1720  }
1721  
1722  #ifndef PRSTATUS_SIZE
1723  #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1724  #endif
1725  
1726  #ifndef SET_PR_FPVALID
1727  #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1728  #endif
1729  
1730  static int fill_thread_core_info(struct elf_thread_core_info *t,
1731  				 const struct user_regset_view *view,
1732  				 long signr, size_t *total)
1733  {
1734  	unsigned int i;
1735  
1736  	/*
1737  	 * NT_PRSTATUS is the one special case, because the regset data
1738  	 * goes into the pr_reg field inside the note contents, rather
1739  	 * than being the whole note contents.  We fill the reset in here.
1740  	 * We assume that regset 0 is NT_PRSTATUS.
1741  	 */
1742  	fill_prstatus(&t->prstatus.common, t->task, signr);
1743  	regset_get(t->task, &view->regsets[0],
1744  		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1745  
1746  	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1747  		  PRSTATUS_SIZE, &t->prstatus);
1748  	*total += notesize(&t->notes[0]);
1749  
1750  	do_thread_regset_writeback(t->task, &view->regsets[0]);
1751  
1752  	/*
1753  	 * Each other regset might generate a note too.  For each regset
1754  	 * that has no core_note_type or is inactive, we leave t->notes[i]
1755  	 * all zero and we'll know to skip writing it later.
1756  	 */
1757  	for (i = 1; i < view->n; ++i) {
1758  		const struct user_regset *regset = &view->regsets[i];
1759  		int note_type = regset->core_note_type;
1760  		bool is_fpreg = note_type == NT_PRFPREG;
1761  		void *data;
1762  		int ret;
1763  
1764  		do_thread_regset_writeback(t->task, regset);
1765  		if (!note_type) // not for coredumps
1766  			continue;
1767  		if (regset->active && regset->active(t->task, regset) <= 0)
1768  			continue;
1769  
1770  		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1771  		if (ret < 0)
1772  			continue;
1773  
1774  		if (is_fpreg)
1775  			SET_PR_FPVALID(&t->prstatus);
1776  
1777  		fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
1778  			  note_type, ret, data);
1779  
1780  		*total += notesize(&t->notes[i]);
1781  	}
1782  
1783  	return 1;
1784  }
1785  
1786  static int fill_note_info(struct elfhdr *elf, int phdrs,
1787  			  struct elf_note_info *info,
1788  			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1789  {
1790  	struct task_struct *dump_task = current;
1791  	const struct user_regset_view *view = task_user_regset_view(dump_task);
1792  	struct elf_thread_core_info *t;
1793  	struct elf_prpsinfo *psinfo;
1794  	struct core_thread *ct;
1795  	unsigned int i;
1796  
1797  	info->size = 0;
1798  	info->thread = NULL;
1799  
1800  	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1801  	if (psinfo == NULL) {
1802  		info->psinfo.data = NULL; /* So we don't free this wrongly */
1803  		return 0;
1804  	}
1805  
1806  	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1807  
1808  	/*
1809  	 * Figure out how many notes we're going to need for each thread.
1810  	 */
1811  	info->thread_notes = 0;
1812  	for (i = 0; i < view->n; ++i)
1813  		if (view->regsets[i].core_note_type != 0)
1814  			++info->thread_notes;
1815  
1816  	/*
1817  	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1818  	 * since it is our one special case.
1819  	 */
1820  	if (unlikely(info->thread_notes == 0) ||
1821  	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1822  		WARN_ON(1);
1823  		return 0;
1824  	}
1825  
1826  	/*
1827  	 * Initialize the ELF file header.
1828  	 */
1829  	fill_elf_header(elf, phdrs,
1830  			view->e_machine, view->e_flags);
1831  
1832  	/*
1833  	 * Allocate a structure for each thread.
1834  	 */
1835  	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1836  		t = kzalloc(offsetof(struct elf_thread_core_info,
1837  				     notes[info->thread_notes]),
1838  			    GFP_KERNEL);
1839  		if (unlikely(!t))
1840  			return 0;
1841  
1842  		t->task = ct->task;
1843  		if (ct->task == dump_task || !info->thread) {
1844  			t->next = info->thread;
1845  			info->thread = t;
1846  		} else {
1847  			/*
1848  			 * Make sure to keep the original task at
1849  			 * the head of the list.
1850  			 */
1851  			t->next = info->thread->next;
1852  			info->thread->next = t;
1853  		}
1854  	}
1855  
1856  	/*
1857  	 * Now fill in each thread's information.
1858  	 */
1859  	for (t = info->thread; t != NULL; t = t->next)
1860  		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1861  			return 0;
1862  
1863  	/*
1864  	 * Fill in the two process-wide notes.
1865  	 */
1866  	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1867  	info->size += notesize(&info->psinfo);
1868  
1869  	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1870  	info->size += notesize(&info->signote);
1871  
1872  	fill_auxv_note(&info->auxv, current->mm);
1873  	info->size += notesize(&info->auxv);
1874  
1875  	if (fill_files_note(&info->files) == 0)
1876  		info->size += notesize(&info->files);
1877  
1878  	return 1;
1879  }
1880  
1881  static size_t get_note_info_size(struct elf_note_info *info)
1882  {
1883  	return info->size;
1884  }
1885  
1886  /*
1887   * Write all the notes for each thread.  When writing the first thread, the
1888   * process-wide notes are interleaved after the first thread-specific note.
1889   */
1890  static int write_note_info(struct elf_note_info *info,
1891  			   struct coredump_params *cprm)
1892  {
1893  	bool first = true;
1894  	struct elf_thread_core_info *t = info->thread;
1895  
1896  	do {
1897  		int i;
1898  
1899  		if (!writenote(&t->notes[0], cprm))
1900  			return 0;
1901  
1902  		if (first && !writenote(&info->psinfo, cprm))
1903  			return 0;
1904  		if (first && !writenote(&info->signote, cprm))
1905  			return 0;
1906  		if (first && !writenote(&info->auxv, cprm))
1907  			return 0;
1908  		if (first && info->files.data &&
1909  				!writenote(&info->files, cprm))
1910  			return 0;
1911  
1912  		for (i = 1; i < info->thread_notes; ++i)
1913  			if (t->notes[i].data &&
1914  			    !writenote(&t->notes[i], cprm))
1915  				return 0;
1916  
1917  		first = false;
1918  		t = t->next;
1919  	} while (t);
1920  
1921  	return 1;
1922  }
1923  
1924  static void free_note_info(struct elf_note_info *info)
1925  {
1926  	struct elf_thread_core_info *threads = info->thread;
1927  	while (threads) {
1928  		unsigned int i;
1929  		struct elf_thread_core_info *t = threads;
1930  		threads = t->next;
1931  		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1932  		for (i = 1; i < info->thread_notes; ++i)
1933  			kfree(t->notes[i].data);
1934  		kfree(t);
1935  	}
1936  	kfree(info->psinfo.data);
1937  	kvfree(info->files.data);
1938  }
1939  
1940  #else
1941  
1942  /* Here is the structure in which status of each thread is captured. */
1943  struct elf_thread_status
1944  {
1945  	struct list_head list;
1946  	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1947  	elf_fpregset_t fpu;		/* NT_PRFPREG */
1948  	struct task_struct *thread;
1949  	struct memelfnote notes[3];
1950  	int num_notes;
1951  };
1952  
1953  /*
1954   * In order to add the specific thread information for the elf file format,
1955   * we need to keep a linked list of every threads pr_status and then create
1956   * a single section for them in the final core file.
1957   */
1958  static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1959  {
1960  	int sz = 0;
1961  	struct task_struct *p = t->thread;
1962  	t->num_notes = 0;
1963  
1964  	fill_prstatus(&t->prstatus.common, p, signr);
1965  	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1966  
1967  	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1968  		  &(t->prstatus));
1969  	t->num_notes++;
1970  	sz += notesize(&t->notes[0]);
1971  
1972  	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1973  								&t->fpu))) {
1974  		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1975  			  &(t->fpu));
1976  		t->num_notes++;
1977  		sz += notesize(&t->notes[1]);
1978  	}
1979  	return sz;
1980  }
1981  
1982  struct elf_note_info {
1983  	struct memelfnote *notes;
1984  	struct memelfnote *notes_files;
1985  	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1986  	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1987  	struct list_head thread_list;
1988  	elf_fpregset_t *fpu;
1989  	user_siginfo_t csigdata;
1990  	int thread_status_size;
1991  	int numnote;
1992  };
1993  
1994  static int elf_note_info_init(struct elf_note_info *info)
1995  {
1996  	memset(info, 0, sizeof(*info));
1997  	INIT_LIST_HEAD(&info->thread_list);
1998  
1999  	/* Allocate space for ELF notes */
2000  	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2001  	if (!info->notes)
2002  		return 0;
2003  	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2004  	if (!info->psinfo)
2005  		return 0;
2006  	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2007  	if (!info->prstatus)
2008  		return 0;
2009  	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2010  	if (!info->fpu)
2011  		return 0;
2012  	return 1;
2013  }
2014  
2015  static int fill_note_info(struct elfhdr *elf, int phdrs,
2016  			  struct elf_note_info *info,
2017  			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2018  {
2019  	struct core_thread *ct;
2020  	struct elf_thread_status *ets;
2021  
2022  	if (!elf_note_info_init(info))
2023  		return 0;
2024  
2025  	for (ct = current->mm->core_state->dumper.next;
2026  					ct; ct = ct->next) {
2027  		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2028  		if (!ets)
2029  			return 0;
2030  
2031  		ets->thread = ct->task;
2032  		list_add(&ets->list, &info->thread_list);
2033  	}
2034  
2035  	list_for_each_entry(ets, &info->thread_list, list) {
2036  		int sz;
2037  
2038  		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2039  		info->thread_status_size += sz;
2040  	}
2041  	/* now collect the dump for the current */
2042  	memset(info->prstatus, 0, sizeof(*info->prstatus));
2043  	fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
2044  	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2045  
2046  	/* Set up header */
2047  	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2048  
2049  	/*
2050  	 * Set up the notes in similar form to SVR4 core dumps made
2051  	 * with info from their /proc.
2052  	 */
2053  
2054  	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2055  		  sizeof(*info->prstatus), info->prstatus);
2056  	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2057  	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2058  		  sizeof(*info->psinfo), info->psinfo);
2059  
2060  	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2061  	fill_auxv_note(info->notes + 3, current->mm);
2062  	info->numnote = 4;
2063  
2064  	if (fill_files_note(info->notes + info->numnote) == 0) {
2065  		info->notes_files = info->notes + info->numnote;
2066  		info->numnote++;
2067  	}
2068  
2069  	/* Try to dump the FPU. */
2070  	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2071  							       info->fpu);
2072  	if (info->prstatus->pr_fpvalid)
2073  		fill_note(info->notes + info->numnote++,
2074  			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2075  	return 1;
2076  }
2077  
2078  static size_t get_note_info_size(struct elf_note_info *info)
2079  {
2080  	int sz = 0;
2081  	int i;
2082  
2083  	for (i = 0; i < info->numnote; i++)
2084  		sz += notesize(info->notes + i);
2085  
2086  	sz += info->thread_status_size;
2087  
2088  	return sz;
2089  }
2090  
2091  static int write_note_info(struct elf_note_info *info,
2092  			   struct coredump_params *cprm)
2093  {
2094  	struct elf_thread_status *ets;
2095  	int i;
2096  
2097  	for (i = 0; i < info->numnote; i++)
2098  		if (!writenote(info->notes + i, cprm))
2099  			return 0;
2100  
2101  	/* write out the thread status notes section */
2102  	list_for_each_entry(ets, &info->thread_list, list) {
2103  		for (i = 0; i < ets->num_notes; i++)
2104  			if (!writenote(&ets->notes[i], cprm))
2105  				return 0;
2106  	}
2107  
2108  	return 1;
2109  }
2110  
2111  static void free_note_info(struct elf_note_info *info)
2112  {
2113  	while (!list_empty(&info->thread_list)) {
2114  		struct list_head *tmp = info->thread_list.next;
2115  		list_del(tmp);
2116  		kfree(list_entry(tmp, struct elf_thread_status, list));
2117  	}
2118  
2119  	/* Free data possibly allocated by fill_files_note(): */
2120  	if (info->notes_files)
2121  		kvfree(info->notes_files->data);
2122  
2123  	kfree(info->prstatus);
2124  	kfree(info->psinfo);
2125  	kfree(info->notes);
2126  	kfree(info->fpu);
2127  }
2128  
2129  #endif
2130  
2131  static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2132  			     elf_addr_t e_shoff, int segs)
2133  {
2134  	elf->e_shoff = e_shoff;
2135  	elf->e_shentsize = sizeof(*shdr4extnum);
2136  	elf->e_shnum = 1;
2137  	elf->e_shstrndx = SHN_UNDEF;
2138  
2139  	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2140  
2141  	shdr4extnum->sh_type = SHT_NULL;
2142  	shdr4extnum->sh_size = elf->e_shnum;
2143  	shdr4extnum->sh_link = elf->e_shstrndx;
2144  	shdr4extnum->sh_info = segs;
2145  }
2146  
2147  /*
2148   * Actual dumper
2149   *
2150   * This is a two-pass process; first we find the offsets of the bits,
2151   * and then they are actually written out.  If we run out of core limit
2152   * we just truncate.
2153   */
2154  static int elf_core_dump(struct coredump_params *cprm)
2155  {
2156  	int has_dumped = 0;
2157  	int vma_count, segs, i;
2158  	size_t vma_data_size;
2159  	struct elfhdr elf;
2160  	loff_t offset = 0, dataoff;
2161  	struct elf_note_info info = { };
2162  	struct elf_phdr *phdr4note = NULL;
2163  	struct elf_shdr *shdr4extnum = NULL;
2164  	Elf_Half e_phnum;
2165  	elf_addr_t e_shoff;
2166  	struct core_vma_metadata *vma_meta;
2167  
2168  	if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
2169  		return 0;
2170  
2171  	/*
2172  	 * The number of segs are recored into ELF header as 16bit value.
2173  	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2174  	 */
2175  	segs = vma_count + elf_core_extra_phdrs();
2176  
2177  	/* for notes section */
2178  	segs++;
2179  
2180  	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2181  	 * this, kernel supports extended numbering. Have a look at
2182  	 * include/linux/elf.h for further information. */
2183  	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2184  
2185  	/*
2186  	 * Collect all the non-memory information about the process for the
2187  	 * notes.  This also sets up the file header.
2188  	 */
2189  	if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2190  		goto end_coredump;
2191  
2192  	has_dumped = 1;
2193  
2194  	offset += sizeof(elf);				/* Elf header */
2195  	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2196  
2197  	/* Write notes phdr entry */
2198  	{
2199  		size_t sz = get_note_info_size(&info);
2200  
2201  		/* For cell spufs */
2202  		sz += elf_coredump_extra_notes_size();
2203  
2204  		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2205  		if (!phdr4note)
2206  			goto end_coredump;
2207  
2208  		fill_elf_note_phdr(phdr4note, sz, offset);
2209  		offset += sz;
2210  	}
2211  
2212  	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2213  
2214  	offset += vma_data_size;
2215  	offset += elf_core_extra_data_size();
2216  	e_shoff = offset;
2217  
2218  	if (e_phnum == PN_XNUM) {
2219  		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2220  		if (!shdr4extnum)
2221  			goto end_coredump;
2222  		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2223  	}
2224  
2225  	offset = dataoff;
2226  
2227  	if (!dump_emit(cprm, &elf, sizeof(elf)))
2228  		goto end_coredump;
2229  
2230  	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2231  		goto end_coredump;
2232  
2233  	/* Write program headers for segments dump */
2234  	for (i = 0; i < vma_count; i++) {
2235  		struct core_vma_metadata *meta = vma_meta + i;
2236  		struct elf_phdr phdr;
2237  
2238  		phdr.p_type = PT_LOAD;
2239  		phdr.p_offset = offset;
2240  		phdr.p_vaddr = meta->start;
2241  		phdr.p_paddr = 0;
2242  		phdr.p_filesz = meta->dump_size;
2243  		phdr.p_memsz = meta->end - meta->start;
2244  		offset += phdr.p_filesz;
2245  		phdr.p_flags = 0;
2246  		if (meta->flags & VM_READ)
2247  			phdr.p_flags |= PF_R;
2248  		if (meta->flags & VM_WRITE)
2249  			phdr.p_flags |= PF_W;
2250  		if (meta->flags & VM_EXEC)
2251  			phdr.p_flags |= PF_X;
2252  		phdr.p_align = ELF_EXEC_PAGESIZE;
2253  
2254  		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2255  			goto end_coredump;
2256  	}
2257  
2258  	if (!elf_core_write_extra_phdrs(cprm, offset))
2259  		goto end_coredump;
2260  
2261   	/* write out the notes section */
2262  	if (!write_note_info(&info, cprm))
2263  		goto end_coredump;
2264  
2265  	/* For cell spufs */
2266  	if (elf_coredump_extra_notes_write(cprm))
2267  		goto end_coredump;
2268  
2269  	/* Align to page */
2270  	dump_skip_to(cprm, dataoff);
2271  
2272  	for (i = 0; i < vma_count; i++) {
2273  		struct core_vma_metadata *meta = vma_meta + i;
2274  
2275  		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2276  			goto end_coredump;
2277  	}
2278  
2279  	if (!elf_core_write_extra_data(cprm))
2280  		goto end_coredump;
2281  
2282  	if (e_phnum == PN_XNUM) {
2283  		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2284  			goto end_coredump;
2285  	}
2286  
2287  end_coredump:
2288  	free_note_info(&info);
2289  	kfree(shdr4extnum);
2290  	kvfree(vma_meta);
2291  	kfree(phdr4note);
2292  	return has_dumped;
2293  }
2294  
2295  #endif		/* CONFIG_ELF_CORE */
2296  
2297  static int __init init_elf_binfmt(void)
2298  {
2299  	register_binfmt(&elf_format);
2300  	return 0;
2301  }
2302  
2303  static void __exit exit_elf_binfmt(void)
2304  {
2305  	/* Remove the COFF and ELF loaders. */
2306  	unregister_binfmt(&elf_format);
2307  }
2308  
2309  core_initcall(init_elf_binfmt);
2310  module_exit(exit_elf_binfmt);
2311  MODULE_LICENSE("GPL");
2312