1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/binfmt_elf.c 4 * 5 * These are the functions used to load ELF format executables as used 6 * on SVr4 machines. Information on the format may be found in the book 7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support 8 * Tools". 9 * 10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/fs.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/errno.h> 19 #include <linux/signal.h> 20 #include <linux/binfmts.h> 21 #include <linux/string.h> 22 #include <linux/file.h> 23 #include <linux/slab.h> 24 #include <linux/personality.h> 25 #include <linux/elfcore.h> 26 #include <linux/init.h> 27 #include <linux/highuid.h> 28 #include <linux/compiler.h> 29 #include <linux/highmem.h> 30 #include <linux/hugetlb.h> 31 #include <linux/pagemap.h> 32 #include <linux/vmalloc.h> 33 #include <linux/security.h> 34 #include <linux/random.h> 35 #include <linux/elf.h> 36 #include <linux/elf-randomize.h> 37 #include <linux/utsname.h> 38 #include <linux/coredump.h> 39 #include <linux/sched.h> 40 #include <linux/sched/coredump.h> 41 #include <linux/sched/task_stack.h> 42 #include <linux/sched/cputime.h> 43 #include <linux/sizes.h> 44 #include <linux/types.h> 45 #include <linux/cred.h> 46 #include <linux/dax.h> 47 #include <linux/uaccess.h> 48 #include <asm/param.h> 49 #include <asm/page.h> 50 51 #ifndef ELF_COMPAT 52 #define ELF_COMPAT 0 53 #endif 54 55 #ifndef user_long_t 56 #define user_long_t long 57 #endif 58 #ifndef user_siginfo_t 59 #define user_siginfo_t siginfo_t 60 #endif 61 62 /* That's for binfmt_elf_fdpic to deal with */ 63 #ifndef elf_check_fdpic 64 #define elf_check_fdpic(ex) false 65 #endif 66 67 static int load_elf_binary(struct linux_binprm *bprm); 68 69 #ifdef CONFIG_USELIB 70 static int load_elf_library(struct file *); 71 #else 72 #define load_elf_library NULL 73 #endif 74 75 /* 76 * If we don't support core dumping, then supply a NULL so we 77 * don't even try. 78 */ 79 #ifdef CONFIG_ELF_CORE 80 static int elf_core_dump(struct coredump_params *cprm); 81 #else 82 #define elf_core_dump NULL 83 #endif 84 85 #if ELF_EXEC_PAGESIZE > PAGE_SIZE 86 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE 87 #else 88 #define ELF_MIN_ALIGN PAGE_SIZE 89 #endif 90 91 #ifndef ELF_CORE_EFLAGS 92 #define ELF_CORE_EFLAGS 0 93 #endif 94 95 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) 96 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) 97 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) 98 99 static struct linux_binfmt elf_format = { 100 .module = THIS_MODULE, 101 .load_binary = load_elf_binary, 102 .load_shlib = load_elf_library, 103 .core_dump = elf_core_dump, 104 .min_coredump = ELF_EXEC_PAGESIZE, 105 }; 106 107 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE)) 108 109 static int set_brk(unsigned long start, unsigned long end, int prot) 110 { 111 start = ELF_PAGEALIGN(start); 112 end = ELF_PAGEALIGN(end); 113 if (end > start) { 114 /* 115 * Map the last of the bss segment. 116 * If the header is requesting these pages to be 117 * executable, honour that (ppc32 needs this). 118 */ 119 int error = vm_brk_flags(start, end - start, 120 prot & PROT_EXEC ? VM_EXEC : 0); 121 if (error) 122 return error; 123 } 124 current->mm->start_brk = current->mm->brk = end; 125 return 0; 126 } 127 128 /* We need to explicitly zero any fractional pages 129 after the data section (i.e. bss). This would 130 contain the junk from the file that should not 131 be in memory 132 */ 133 static int padzero(unsigned long elf_bss) 134 { 135 unsigned long nbyte; 136 137 nbyte = ELF_PAGEOFFSET(elf_bss); 138 if (nbyte) { 139 nbyte = ELF_MIN_ALIGN - nbyte; 140 if (clear_user((void __user *) elf_bss, nbyte)) 141 return -EFAULT; 142 } 143 return 0; 144 } 145 146 /* Let's use some macros to make this stack manipulation a little clearer */ 147 #ifdef CONFIG_STACK_GROWSUP 148 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) 149 #define STACK_ROUND(sp, items) \ 150 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL) 151 #define STACK_ALLOC(sp, len) ({ \ 152 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \ 153 old_sp; }) 154 #else 155 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) 156 #define STACK_ROUND(sp, items) \ 157 (((unsigned long) (sp - items)) &~ 15UL) 158 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 159 #endif 160 161 #ifndef ELF_BASE_PLATFORM 162 /* 163 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. 164 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value 165 * will be copied to the user stack in the same manner as AT_PLATFORM. 166 */ 167 #define ELF_BASE_PLATFORM NULL 168 #endif 169 170 static int 171 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, 172 unsigned long load_addr, unsigned long interp_load_addr, 173 unsigned long e_entry) 174 { 175 struct mm_struct *mm = current->mm; 176 unsigned long p = bprm->p; 177 int argc = bprm->argc; 178 int envc = bprm->envc; 179 elf_addr_t __user *sp; 180 elf_addr_t __user *u_platform; 181 elf_addr_t __user *u_base_platform; 182 elf_addr_t __user *u_rand_bytes; 183 const char *k_platform = ELF_PLATFORM; 184 const char *k_base_platform = ELF_BASE_PLATFORM; 185 unsigned char k_rand_bytes[16]; 186 int items; 187 elf_addr_t *elf_info; 188 int ei_index; 189 const struct cred *cred = current_cred(); 190 struct vm_area_struct *vma; 191 192 /* 193 * In some cases (e.g. Hyper-Threading), we want to avoid L1 194 * evictions by the processes running on the same package. One 195 * thing we can do is to shuffle the initial stack for them. 196 */ 197 198 p = arch_align_stack(p); 199 200 /* 201 * If this architecture has a platform capability string, copy it 202 * to userspace. In some cases (Sparc), this info is impossible 203 * for userspace to get any other way, in others (i386) it is 204 * merely difficult. 205 */ 206 u_platform = NULL; 207 if (k_platform) { 208 size_t len = strlen(k_platform) + 1; 209 210 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 211 if (__copy_to_user(u_platform, k_platform, len)) 212 return -EFAULT; 213 } 214 215 /* 216 * If this architecture has a "base" platform capability 217 * string, copy it to userspace. 218 */ 219 u_base_platform = NULL; 220 if (k_base_platform) { 221 size_t len = strlen(k_base_platform) + 1; 222 223 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 224 if (__copy_to_user(u_base_platform, k_base_platform, len)) 225 return -EFAULT; 226 } 227 228 /* 229 * Generate 16 random bytes for userspace PRNG seeding. 230 */ 231 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); 232 u_rand_bytes = (elf_addr_t __user *) 233 STACK_ALLOC(p, sizeof(k_rand_bytes)); 234 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) 235 return -EFAULT; 236 237 /* Create the ELF interpreter info */ 238 elf_info = (elf_addr_t *)mm->saved_auxv; 239 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 240 #define NEW_AUX_ENT(id, val) \ 241 do { \ 242 *elf_info++ = id; \ 243 *elf_info++ = val; \ 244 } while (0) 245 246 #ifdef ARCH_DLINFO 247 /* 248 * ARCH_DLINFO must come first so PPC can do its special alignment of 249 * AUXV. 250 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in 251 * ARCH_DLINFO changes 252 */ 253 ARCH_DLINFO; 254 #endif 255 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); 256 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); 257 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); 258 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); 259 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); 260 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); 261 NEW_AUX_ENT(AT_BASE, interp_load_addr); 262 NEW_AUX_ENT(AT_FLAGS, 0); 263 NEW_AUX_ENT(AT_ENTRY, e_entry); 264 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid)); 265 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid)); 266 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid)); 267 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid)); 268 NEW_AUX_ENT(AT_SECURE, bprm->secureexec); 269 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); 270 #ifdef ELF_HWCAP2 271 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); 272 #endif 273 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 274 if (k_platform) { 275 NEW_AUX_ENT(AT_PLATFORM, 276 (elf_addr_t)(unsigned long)u_platform); 277 } 278 if (k_base_platform) { 279 NEW_AUX_ENT(AT_BASE_PLATFORM, 280 (elf_addr_t)(unsigned long)u_base_platform); 281 } 282 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 283 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 284 } 285 #undef NEW_AUX_ENT 286 /* AT_NULL is zero; clear the rest too */ 287 memset(elf_info, 0, (char *)mm->saved_auxv + 288 sizeof(mm->saved_auxv) - (char *)elf_info); 289 290 /* And advance past the AT_NULL entry. */ 291 elf_info += 2; 292 293 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv; 294 sp = STACK_ADD(p, ei_index); 295 296 items = (argc + 1) + (envc + 1) + 1; 297 bprm->p = STACK_ROUND(sp, items); 298 299 /* Point sp at the lowest address on the stack */ 300 #ifdef CONFIG_STACK_GROWSUP 301 sp = (elf_addr_t __user *)bprm->p - items - ei_index; 302 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */ 303 #else 304 sp = (elf_addr_t __user *)bprm->p; 305 #endif 306 307 308 /* 309 * Grow the stack manually; some architectures have a limit on how 310 * far ahead a user-space access may be in order to grow the stack. 311 */ 312 vma = find_extend_vma(mm, bprm->p); 313 if (!vma) 314 return -EFAULT; 315 316 /* Now, let's put argc (and argv, envp if appropriate) on the stack */ 317 if (__put_user(argc, sp++)) 318 return -EFAULT; 319 320 /* Populate list of argv pointers back to argv strings. */ 321 p = mm->arg_end = mm->arg_start; 322 while (argc-- > 0) { 323 size_t len; 324 if (__put_user((elf_addr_t)p, sp++)) 325 return -EFAULT; 326 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 327 if (!len || len > MAX_ARG_STRLEN) 328 return -EINVAL; 329 p += len; 330 } 331 if (__put_user(0, sp++)) 332 return -EFAULT; 333 mm->arg_end = p; 334 335 /* Populate list of envp pointers back to envp strings. */ 336 mm->env_end = mm->env_start = p; 337 while (envc-- > 0) { 338 size_t len; 339 if (__put_user((elf_addr_t)p, sp++)) 340 return -EFAULT; 341 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 342 if (!len || len > MAX_ARG_STRLEN) 343 return -EINVAL; 344 p += len; 345 } 346 if (__put_user(0, sp++)) 347 return -EFAULT; 348 mm->env_end = p; 349 350 /* Put the elf_info on the stack in the right place. */ 351 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t))) 352 return -EFAULT; 353 return 0; 354 } 355 356 #ifndef elf_map 357 358 static unsigned long elf_map(struct file *filep, unsigned long addr, 359 const struct elf_phdr *eppnt, int prot, int type, 360 unsigned long total_size) 361 { 362 unsigned long map_addr; 363 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); 364 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); 365 addr = ELF_PAGESTART(addr); 366 size = ELF_PAGEALIGN(size); 367 368 /* mmap() will return -EINVAL if given a zero size, but a 369 * segment with zero filesize is perfectly valid */ 370 if (!size) 371 return addr; 372 373 /* 374 * total_size is the size of the ELF (interpreter) image. 375 * The _first_ mmap needs to know the full size, otherwise 376 * randomization might put this image into an overlapping 377 * position with the ELF binary image. (since size < total_size) 378 * So we first map the 'big' image - and unmap the remainder at 379 * the end. (which unmap is needed for ELF images with holes.) 380 */ 381 if (total_size) { 382 total_size = ELF_PAGEALIGN(total_size); 383 map_addr = vm_mmap(filep, addr, total_size, prot, type, off); 384 if (!BAD_ADDR(map_addr)) 385 vm_munmap(map_addr+size, total_size-size); 386 } else 387 map_addr = vm_mmap(filep, addr, size, prot, type, off); 388 389 if ((type & MAP_FIXED_NOREPLACE) && 390 PTR_ERR((void *)map_addr) == -EEXIST) 391 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", 392 task_pid_nr(current), current->comm, (void *)addr); 393 394 return(map_addr); 395 } 396 397 #endif /* !elf_map */ 398 399 static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) 400 { 401 int i, first_idx = -1, last_idx = -1; 402 403 for (i = 0; i < nr; i++) { 404 if (cmds[i].p_type == PT_LOAD) { 405 last_idx = i; 406 if (first_idx == -1) 407 first_idx = i; 408 } 409 } 410 if (first_idx == -1) 411 return 0; 412 413 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - 414 ELF_PAGESTART(cmds[first_idx].p_vaddr); 415 } 416 417 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos) 418 { 419 ssize_t rv; 420 421 rv = kernel_read(file, buf, len, &pos); 422 if (unlikely(rv != len)) { 423 return (rv < 0) ? rv : -EIO; 424 } 425 return 0; 426 } 427 428 /** 429 * load_elf_phdrs() - load ELF program headers 430 * @elf_ex: ELF header of the binary whose program headers should be loaded 431 * @elf_file: the opened ELF binary file 432 * 433 * Loads ELF program headers from the binary file elf_file, which has the ELF 434 * header pointed to by elf_ex, into a newly allocated array. The caller is 435 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. 436 */ 437 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, 438 struct file *elf_file) 439 { 440 struct elf_phdr *elf_phdata = NULL; 441 int retval, err = -1; 442 unsigned int size; 443 444 /* 445 * If the size of this structure has changed, then punt, since 446 * we will be doing the wrong thing. 447 */ 448 if (elf_ex->e_phentsize != sizeof(struct elf_phdr)) 449 goto out; 450 451 /* Sanity check the number of program headers... */ 452 /* ...and their total size. */ 453 size = sizeof(struct elf_phdr) * elf_ex->e_phnum; 454 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN) 455 goto out; 456 457 elf_phdata = kmalloc(size, GFP_KERNEL); 458 if (!elf_phdata) 459 goto out; 460 461 /* Read in the program headers */ 462 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); 463 if (retval < 0) { 464 err = retval; 465 goto out; 466 } 467 468 /* Success! */ 469 err = 0; 470 out: 471 if (err) { 472 kfree(elf_phdata); 473 elf_phdata = NULL; 474 } 475 return elf_phdata; 476 } 477 478 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE 479 480 /** 481 * struct arch_elf_state - arch-specific ELF loading state 482 * 483 * This structure is used to preserve architecture specific data during 484 * the loading of an ELF file, throughout the checking of architecture 485 * specific ELF headers & through to the point where the ELF load is 486 * known to be proceeding (ie. SET_PERSONALITY). 487 * 488 * This implementation is a dummy for architectures which require no 489 * specific state. 490 */ 491 struct arch_elf_state { 492 }; 493 494 #define INIT_ARCH_ELF_STATE {} 495 496 /** 497 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header 498 * @ehdr: The main ELF header 499 * @phdr: The program header to check 500 * @elf: The open ELF file 501 * @is_interp: True if the phdr is from the interpreter of the ELF being 502 * loaded, else false. 503 * @state: Architecture-specific state preserved throughout the process 504 * of loading the ELF. 505 * 506 * Inspects the program header phdr to validate its correctness and/or 507 * suitability for the system. Called once per ELF program header in the 508 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its 509 * interpreter. 510 * 511 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 512 * with that return code. 513 */ 514 static inline int arch_elf_pt_proc(struct elfhdr *ehdr, 515 struct elf_phdr *phdr, 516 struct file *elf, bool is_interp, 517 struct arch_elf_state *state) 518 { 519 /* Dummy implementation, always proceed */ 520 return 0; 521 } 522 523 /** 524 * arch_check_elf() - check an ELF executable 525 * @ehdr: The main ELF header 526 * @has_interp: True if the ELF has an interpreter, else false. 527 * @interp_ehdr: The interpreter's ELF header 528 * @state: Architecture-specific state preserved throughout the process 529 * of loading the ELF. 530 * 531 * Provides a final opportunity for architecture code to reject the loading 532 * of the ELF & cause an exec syscall to return an error. This is called after 533 * all program headers to be checked by arch_elf_pt_proc have been. 534 * 535 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 536 * with that return code. 537 */ 538 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, 539 struct elfhdr *interp_ehdr, 540 struct arch_elf_state *state) 541 { 542 /* Dummy implementation, always proceed */ 543 return 0; 544 } 545 546 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ 547 548 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, 549 bool has_interp, bool is_interp) 550 { 551 int prot = 0; 552 553 if (p_flags & PF_R) 554 prot |= PROT_READ; 555 if (p_flags & PF_W) 556 prot |= PROT_WRITE; 557 if (p_flags & PF_X) 558 prot |= PROT_EXEC; 559 560 return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); 561 } 562 563 /* This is much more generalized than the library routine read function, 564 so we keep this separate. Technically the library read function 565 is only provided so that we can read a.out libraries that have 566 an ELF header */ 567 568 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, 569 struct file *interpreter, 570 unsigned long no_base, struct elf_phdr *interp_elf_phdata, 571 struct arch_elf_state *arch_state) 572 { 573 struct elf_phdr *eppnt; 574 unsigned long load_addr = 0; 575 int load_addr_set = 0; 576 unsigned long last_bss = 0, elf_bss = 0; 577 int bss_prot = 0; 578 unsigned long error = ~0UL; 579 unsigned long total_size; 580 int i; 581 582 /* First of all, some simple consistency checks */ 583 if (interp_elf_ex->e_type != ET_EXEC && 584 interp_elf_ex->e_type != ET_DYN) 585 goto out; 586 if (!elf_check_arch(interp_elf_ex) || 587 elf_check_fdpic(interp_elf_ex)) 588 goto out; 589 if (!interpreter->f_op->mmap) 590 goto out; 591 592 total_size = total_mapping_size(interp_elf_phdata, 593 interp_elf_ex->e_phnum); 594 if (!total_size) { 595 error = -EINVAL; 596 goto out; 597 } 598 599 eppnt = interp_elf_phdata; 600 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { 601 if (eppnt->p_type == PT_LOAD) { 602 int elf_type = MAP_PRIVATE | MAP_DENYWRITE; 603 int elf_prot = make_prot(eppnt->p_flags, arch_state, 604 true, true); 605 unsigned long vaddr = 0; 606 unsigned long k, map_addr; 607 608 vaddr = eppnt->p_vaddr; 609 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 610 elf_type |= MAP_FIXED_NOREPLACE; 611 else if (no_base && interp_elf_ex->e_type == ET_DYN) 612 load_addr = -vaddr; 613 614 map_addr = elf_map(interpreter, load_addr + vaddr, 615 eppnt, elf_prot, elf_type, total_size); 616 total_size = 0; 617 error = map_addr; 618 if (BAD_ADDR(map_addr)) 619 goto out; 620 621 if (!load_addr_set && 622 interp_elf_ex->e_type == ET_DYN) { 623 load_addr = map_addr - ELF_PAGESTART(vaddr); 624 load_addr_set = 1; 625 } 626 627 /* 628 * Check to see if the section's size will overflow the 629 * allowed task size. Note that p_filesz must always be 630 * <= p_memsize so it's only necessary to check p_memsz. 631 */ 632 k = load_addr + eppnt->p_vaddr; 633 if (BAD_ADDR(k) || 634 eppnt->p_filesz > eppnt->p_memsz || 635 eppnt->p_memsz > TASK_SIZE || 636 TASK_SIZE - eppnt->p_memsz < k) { 637 error = -ENOMEM; 638 goto out; 639 } 640 641 /* 642 * Find the end of the file mapping for this phdr, and 643 * keep track of the largest address we see for this. 644 */ 645 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; 646 if (k > elf_bss) 647 elf_bss = k; 648 649 /* 650 * Do the same thing for the memory mapping - between 651 * elf_bss and last_bss is the bss section. 652 */ 653 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; 654 if (k > last_bss) { 655 last_bss = k; 656 bss_prot = elf_prot; 657 } 658 } 659 } 660 661 /* 662 * Now fill out the bss section: first pad the last page from 663 * the file up to the page boundary, and zero it from elf_bss 664 * up to the end of the page. 665 */ 666 if (padzero(elf_bss)) { 667 error = -EFAULT; 668 goto out; 669 } 670 /* 671 * Next, align both the file and mem bss up to the page size, 672 * since this is where elf_bss was just zeroed up to, and where 673 * last_bss will end after the vm_brk_flags() below. 674 */ 675 elf_bss = ELF_PAGEALIGN(elf_bss); 676 last_bss = ELF_PAGEALIGN(last_bss); 677 /* Finally, if there is still more bss to allocate, do it. */ 678 if (last_bss > elf_bss) { 679 error = vm_brk_flags(elf_bss, last_bss - elf_bss, 680 bss_prot & PROT_EXEC ? VM_EXEC : 0); 681 if (error) 682 goto out; 683 } 684 685 error = load_addr; 686 out: 687 return error; 688 } 689 690 /* 691 * These are the functions used to load ELF style executables and shared 692 * libraries. There is no binary dependent code anywhere else. 693 */ 694 695 static int parse_elf_property(const char *data, size_t *off, size_t datasz, 696 struct arch_elf_state *arch, 697 bool have_prev_type, u32 *prev_type) 698 { 699 size_t o, step; 700 const struct gnu_property *pr; 701 int ret; 702 703 if (*off == datasz) 704 return -ENOENT; 705 706 if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN)) 707 return -EIO; 708 o = *off; 709 datasz -= *off; 710 711 if (datasz < sizeof(*pr)) 712 return -ENOEXEC; 713 pr = (const struct gnu_property *)(data + o); 714 o += sizeof(*pr); 715 datasz -= sizeof(*pr); 716 717 if (pr->pr_datasz > datasz) 718 return -ENOEXEC; 719 720 WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN); 721 step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN); 722 if (step > datasz) 723 return -ENOEXEC; 724 725 /* Properties are supposed to be unique and sorted on pr_type: */ 726 if (have_prev_type && pr->pr_type <= *prev_type) 727 return -ENOEXEC; 728 *prev_type = pr->pr_type; 729 730 ret = arch_parse_elf_property(pr->pr_type, data + o, 731 pr->pr_datasz, ELF_COMPAT, arch); 732 if (ret) 733 return ret; 734 735 *off = o + step; 736 return 0; 737 } 738 739 #define NOTE_DATA_SZ SZ_1K 740 #define GNU_PROPERTY_TYPE_0_NAME "GNU" 741 #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME)) 742 743 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, 744 struct arch_elf_state *arch) 745 { 746 union { 747 struct elf_note nhdr; 748 char data[NOTE_DATA_SZ]; 749 } note; 750 loff_t pos; 751 ssize_t n; 752 size_t off, datasz; 753 int ret; 754 bool have_prev_type; 755 u32 prev_type; 756 757 if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr) 758 return 0; 759 760 /* load_elf_binary() shouldn't call us unless this is true... */ 761 if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY)) 762 return -ENOEXEC; 763 764 /* If the properties are crazy large, that's too bad (for now): */ 765 if (phdr->p_filesz > sizeof(note)) 766 return -ENOEXEC; 767 768 pos = phdr->p_offset; 769 n = kernel_read(f, ¬e, phdr->p_filesz, &pos); 770 771 BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ); 772 if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ) 773 return -EIO; 774 775 if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 || 776 note.nhdr.n_namesz != NOTE_NAME_SZ || 777 strncmp(note.data + sizeof(note.nhdr), 778 GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr))) 779 return -ENOEXEC; 780 781 off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ, 782 ELF_GNU_PROPERTY_ALIGN); 783 if (off > n) 784 return -ENOEXEC; 785 786 if (note.nhdr.n_descsz > n - off) 787 return -ENOEXEC; 788 datasz = off + note.nhdr.n_descsz; 789 790 have_prev_type = false; 791 do { 792 ret = parse_elf_property(note.data, &off, datasz, arch, 793 have_prev_type, &prev_type); 794 have_prev_type = true; 795 } while (!ret); 796 797 return ret == -ENOENT ? 0 : ret; 798 } 799 800 static int load_elf_binary(struct linux_binprm *bprm) 801 { 802 struct file *interpreter = NULL; /* to shut gcc up */ 803 unsigned long load_addr = 0, load_bias = 0; 804 int load_addr_set = 0; 805 unsigned long error; 806 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; 807 struct elf_phdr *elf_property_phdata = NULL; 808 unsigned long elf_bss, elf_brk; 809 int bss_prot = 0; 810 int retval, i; 811 unsigned long elf_entry; 812 unsigned long e_entry; 813 unsigned long interp_load_addr = 0; 814 unsigned long start_code, end_code, start_data, end_data; 815 unsigned long reloc_func_desc __maybe_unused = 0; 816 int executable_stack = EXSTACK_DEFAULT; 817 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf; 818 struct elfhdr *interp_elf_ex = NULL; 819 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; 820 struct mm_struct *mm; 821 struct pt_regs *regs; 822 823 retval = -ENOEXEC; 824 /* First of all, some simple consistency checks */ 825 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0) 826 goto out; 827 828 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN) 829 goto out; 830 if (!elf_check_arch(elf_ex)) 831 goto out; 832 if (elf_check_fdpic(elf_ex)) 833 goto out; 834 if (!bprm->file->f_op->mmap) 835 goto out; 836 837 elf_phdata = load_elf_phdrs(elf_ex, bprm->file); 838 if (!elf_phdata) 839 goto out; 840 841 elf_ppnt = elf_phdata; 842 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) { 843 char *elf_interpreter; 844 845 if (elf_ppnt->p_type == PT_GNU_PROPERTY) { 846 elf_property_phdata = elf_ppnt; 847 continue; 848 } 849 850 if (elf_ppnt->p_type != PT_INTERP) 851 continue; 852 853 /* 854 * This is the program interpreter used for shared libraries - 855 * for now assume that this is an a.out format binary. 856 */ 857 retval = -ENOEXEC; 858 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) 859 goto out_free_ph; 860 861 retval = -ENOMEM; 862 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); 863 if (!elf_interpreter) 864 goto out_free_ph; 865 866 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz, 867 elf_ppnt->p_offset); 868 if (retval < 0) 869 goto out_free_interp; 870 /* make sure path is NULL terminated */ 871 retval = -ENOEXEC; 872 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 873 goto out_free_interp; 874 875 interpreter = open_exec(elf_interpreter); 876 kfree(elf_interpreter); 877 retval = PTR_ERR(interpreter); 878 if (IS_ERR(interpreter)) 879 goto out_free_ph; 880 881 /* 882 * If the binary is not readable then enforce mm->dumpable = 0 883 * regardless of the interpreter's permissions. 884 */ 885 would_dump(bprm, interpreter); 886 887 interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); 888 if (!interp_elf_ex) { 889 retval = -ENOMEM; 890 goto out_free_ph; 891 } 892 893 /* Get the exec headers */ 894 retval = elf_read(interpreter, interp_elf_ex, 895 sizeof(*interp_elf_ex), 0); 896 if (retval < 0) 897 goto out_free_dentry; 898 899 break; 900 901 out_free_interp: 902 kfree(elf_interpreter); 903 goto out_free_ph; 904 } 905 906 elf_ppnt = elf_phdata; 907 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) 908 switch (elf_ppnt->p_type) { 909 case PT_GNU_STACK: 910 if (elf_ppnt->p_flags & PF_X) 911 executable_stack = EXSTACK_ENABLE_X; 912 else 913 executable_stack = EXSTACK_DISABLE_X; 914 break; 915 916 case PT_LOPROC ... PT_HIPROC: 917 retval = arch_elf_pt_proc(elf_ex, elf_ppnt, 918 bprm->file, false, 919 &arch_state); 920 if (retval) 921 goto out_free_dentry; 922 break; 923 } 924 925 /* Some simple consistency checks for the interpreter */ 926 if (interpreter) { 927 retval = -ELIBBAD; 928 /* Not an ELF interpreter */ 929 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0) 930 goto out_free_dentry; 931 /* Verify the interpreter has a valid arch */ 932 if (!elf_check_arch(interp_elf_ex) || 933 elf_check_fdpic(interp_elf_ex)) 934 goto out_free_dentry; 935 936 /* Load the interpreter program headers */ 937 interp_elf_phdata = load_elf_phdrs(interp_elf_ex, 938 interpreter); 939 if (!interp_elf_phdata) 940 goto out_free_dentry; 941 942 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ 943 elf_property_phdata = NULL; 944 elf_ppnt = interp_elf_phdata; 945 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++) 946 switch (elf_ppnt->p_type) { 947 case PT_GNU_PROPERTY: 948 elf_property_phdata = elf_ppnt; 949 break; 950 951 case PT_LOPROC ... PT_HIPROC: 952 retval = arch_elf_pt_proc(interp_elf_ex, 953 elf_ppnt, interpreter, 954 true, &arch_state); 955 if (retval) 956 goto out_free_dentry; 957 break; 958 } 959 } 960 961 retval = parse_elf_properties(interpreter ?: bprm->file, 962 elf_property_phdata, &arch_state); 963 if (retval) 964 goto out_free_dentry; 965 966 /* 967 * Allow arch code to reject the ELF at this point, whilst it's 968 * still possible to return an error to the code that invoked 969 * the exec syscall. 970 */ 971 retval = arch_check_elf(elf_ex, 972 !!interpreter, interp_elf_ex, 973 &arch_state); 974 if (retval) 975 goto out_free_dentry; 976 977 /* Flush all traces of the currently running executable */ 978 retval = flush_old_exec(bprm); 979 if (retval) 980 goto out_free_dentry; 981 982 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 983 may depend on the personality. */ 984 SET_PERSONALITY2(*elf_ex, &arch_state); 985 if (elf_read_implies_exec(*elf_ex, executable_stack)) 986 current->personality |= READ_IMPLIES_EXEC; 987 988 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 989 current->flags |= PF_RANDOMIZE; 990 991 setup_new_exec(bprm); 992 install_exec_creds(bprm); 993 994 /* Do this so that we can load the interpreter, if need be. We will 995 change some of these later */ 996 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), 997 executable_stack); 998 if (retval < 0) 999 goto out_free_dentry; 1000 1001 elf_bss = 0; 1002 elf_brk = 0; 1003 1004 start_code = ~0UL; 1005 end_code = 0; 1006 start_data = 0; 1007 end_data = 0; 1008 1009 /* Now we do a little grungy work by mmapping the ELF image into 1010 the correct location in memory. */ 1011 for(i = 0, elf_ppnt = elf_phdata; 1012 i < elf_ex->e_phnum; i++, elf_ppnt++) { 1013 int elf_prot, elf_flags; 1014 unsigned long k, vaddr; 1015 unsigned long total_size = 0; 1016 1017 if (elf_ppnt->p_type != PT_LOAD) 1018 continue; 1019 1020 if (unlikely (elf_brk > elf_bss)) { 1021 unsigned long nbyte; 1022 1023 /* There was a PT_LOAD segment with p_memsz > p_filesz 1024 before this one. Map anonymous pages, if needed, 1025 and clear the area. */ 1026 retval = set_brk(elf_bss + load_bias, 1027 elf_brk + load_bias, 1028 bss_prot); 1029 if (retval) 1030 goto out_free_dentry; 1031 nbyte = ELF_PAGEOFFSET(elf_bss); 1032 if (nbyte) { 1033 nbyte = ELF_MIN_ALIGN - nbyte; 1034 if (nbyte > elf_brk - elf_bss) 1035 nbyte = elf_brk - elf_bss; 1036 if (clear_user((void __user *)elf_bss + 1037 load_bias, nbyte)) { 1038 /* 1039 * This bss-zeroing can fail if the ELF 1040 * file specifies odd protections. So 1041 * we don't check the return value 1042 */ 1043 } 1044 } 1045 } 1046 1047 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, 1048 !!interpreter, false); 1049 1050 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; 1051 1052 vaddr = elf_ppnt->p_vaddr; 1053 /* 1054 * If we are loading ET_EXEC or we have already performed 1055 * the ET_DYN load_addr calculations, proceed normally. 1056 */ 1057 if (elf_ex->e_type == ET_EXEC || load_addr_set) { 1058 elf_flags |= MAP_FIXED; 1059 } else if (elf_ex->e_type == ET_DYN) { 1060 /* 1061 * This logic is run once for the first LOAD Program 1062 * Header for ET_DYN binaries to calculate the 1063 * randomization (load_bias) for all the LOAD 1064 * Program Headers, and to calculate the entire 1065 * size of the ELF mapping (total_size). (Note that 1066 * load_addr_set is set to true later once the 1067 * initial mapping is performed.) 1068 * 1069 * There are effectively two types of ET_DYN 1070 * binaries: programs (i.e. PIE: ET_DYN with INTERP) 1071 * and loaders (ET_DYN without INTERP, since they 1072 * _are_ the ELF interpreter). The loaders must 1073 * be loaded away from programs since the program 1074 * may otherwise collide with the loader (especially 1075 * for ET_EXEC which does not have a randomized 1076 * position). For example to handle invocations of 1077 * "./ld.so someprog" to test out a new version of 1078 * the loader, the subsequent program that the 1079 * loader loads must avoid the loader itself, so 1080 * they cannot share the same load range. Sufficient 1081 * room for the brk must be allocated with the 1082 * loader as well, since brk must be available with 1083 * the loader. 1084 * 1085 * Therefore, programs are loaded offset from 1086 * ELF_ET_DYN_BASE and loaders are loaded into the 1087 * independently randomized mmap region (0 load_bias 1088 * without MAP_FIXED). 1089 */ 1090 if (interpreter) { 1091 load_bias = ELF_ET_DYN_BASE; 1092 if (current->flags & PF_RANDOMIZE) 1093 load_bias += arch_mmap_rnd(); 1094 elf_flags |= MAP_FIXED; 1095 } else 1096 load_bias = 0; 1097 1098 /* 1099 * Since load_bias is used for all subsequent loading 1100 * calculations, we must lower it by the first vaddr 1101 * so that the remaining calculations based on the 1102 * ELF vaddrs will be correctly offset. The result 1103 * is then page aligned. 1104 */ 1105 load_bias = ELF_PAGESTART(load_bias - vaddr); 1106 1107 total_size = total_mapping_size(elf_phdata, 1108 elf_ex->e_phnum); 1109 if (!total_size) { 1110 retval = -EINVAL; 1111 goto out_free_dentry; 1112 } 1113 } 1114 1115 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 1116 elf_prot, elf_flags, total_size); 1117 if (BAD_ADDR(error)) { 1118 retval = IS_ERR((void *)error) ? 1119 PTR_ERR((void*)error) : -EINVAL; 1120 goto out_free_dentry; 1121 } 1122 1123 if (!load_addr_set) { 1124 load_addr_set = 1; 1125 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); 1126 if (elf_ex->e_type == ET_DYN) { 1127 load_bias += error - 1128 ELF_PAGESTART(load_bias + vaddr); 1129 load_addr += load_bias; 1130 reloc_func_desc = load_bias; 1131 } 1132 } 1133 k = elf_ppnt->p_vaddr; 1134 if ((elf_ppnt->p_flags & PF_X) && k < start_code) 1135 start_code = k; 1136 if (start_data < k) 1137 start_data = k; 1138 1139 /* 1140 * Check to see if the section's size will overflow the 1141 * allowed task size. Note that p_filesz must always be 1142 * <= p_memsz so it is only necessary to check p_memsz. 1143 */ 1144 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 1145 elf_ppnt->p_memsz > TASK_SIZE || 1146 TASK_SIZE - elf_ppnt->p_memsz < k) { 1147 /* set_brk can never work. Avoid overflows. */ 1148 retval = -EINVAL; 1149 goto out_free_dentry; 1150 } 1151 1152 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; 1153 1154 if (k > elf_bss) 1155 elf_bss = k; 1156 if ((elf_ppnt->p_flags & PF_X) && end_code < k) 1157 end_code = k; 1158 if (end_data < k) 1159 end_data = k; 1160 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; 1161 if (k > elf_brk) { 1162 bss_prot = elf_prot; 1163 elf_brk = k; 1164 } 1165 } 1166 1167 e_entry = elf_ex->e_entry + load_bias; 1168 elf_bss += load_bias; 1169 elf_brk += load_bias; 1170 start_code += load_bias; 1171 end_code += load_bias; 1172 start_data += load_bias; 1173 end_data += load_bias; 1174 1175 /* Calling set_brk effectively mmaps the pages that we need 1176 * for the bss and break sections. We must do this before 1177 * mapping in the interpreter, to make sure it doesn't wind 1178 * up getting placed where the bss needs to go. 1179 */ 1180 retval = set_brk(elf_bss, elf_brk, bss_prot); 1181 if (retval) 1182 goto out_free_dentry; 1183 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { 1184 retval = -EFAULT; /* Nobody gets to see this, but.. */ 1185 goto out_free_dentry; 1186 } 1187 1188 if (interpreter) { 1189 elf_entry = load_elf_interp(interp_elf_ex, 1190 interpreter, 1191 load_bias, interp_elf_phdata, 1192 &arch_state); 1193 if (!IS_ERR((void *)elf_entry)) { 1194 /* 1195 * load_elf_interp() returns relocation 1196 * adjustment 1197 */ 1198 interp_load_addr = elf_entry; 1199 elf_entry += interp_elf_ex->e_entry; 1200 } 1201 if (BAD_ADDR(elf_entry)) { 1202 retval = IS_ERR((void *)elf_entry) ? 1203 (int)elf_entry : -EINVAL; 1204 goto out_free_dentry; 1205 } 1206 reloc_func_desc = interp_load_addr; 1207 1208 allow_write_access(interpreter); 1209 fput(interpreter); 1210 1211 kfree(interp_elf_ex); 1212 kfree(interp_elf_phdata); 1213 } else { 1214 elf_entry = e_entry; 1215 if (BAD_ADDR(elf_entry)) { 1216 retval = -EINVAL; 1217 goto out_free_dentry; 1218 } 1219 } 1220 1221 kfree(elf_phdata); 1222 1223 set_binfmt(&elf_format); 1224 1225 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES 1226 retval = arch_setup_additional_pages(bprm, !!interpreter); 1227 if (retval < 0) 1228 goto out; 1229 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 1230 1231 retval = create_elf_tables(bprm, elf_ex, 1232 load_addr, interp_load_addr, e_entry); 1233 if (retval < 0) 1234 goto out; 1235 1236 mm = current->mm; 1237 mm->end_code = end_code; 1238 mm->start_code = start_code; 1239 mm->start_data = start_data; 1240 mm->end_data = end_data; 1241 mm->start_stack = bprm->p; 1242 1243 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1244 /* 1245 * For architectures with ELF randomization, when executing 1246 * a loader directly (i.e. no interpreter listed in ELF 1247 * headers), move the brk area out of the mmap region 1248 * (since it grows up, and may collide early with the stack 1249 * growing down), and into the unused ELF_ET_DYN_BASE region. 1250 */ 1251 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && 1252 elf_ex->e_type == ET_DYN && !interpreter) { 1253 mm->brk = mm->start_brk = ELF_ET_DYN_BASE; 1254 } 1255 1256 mm->brk = mm->start_brk = arch_randomize_brk(mm); 1257 #ifdef compat_brk_randomized 1258 current->brk_randomized = 1; 1259 #endif 1260 } 1261 1262 if (current->personality & MMAP_PAGE_ZERO) { 1263 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1264 and some applications "depend" upon this behavior. 1265 Since we do not have the power to recompile these, we 1266 emulate the SVr4 behavior. Sigh. */ 1267 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, 1268 MAP_FIXED | MAP_PRIVATE, 0); 1269 } 1270 1271 regs = current_pt_regs(); 1272 #ifdef ELF_PLAT_INIT 1273 /* 1274 * The ABI may specify that certain registers be set up in special 1275 * ways (on i386 %edx is the address of a DT_FINI function, for 1276 * example. In addition, it may also specify (eg, PowerPC64 ELF) 1277 * that the e_entry field is the address of the function descriptor 1278 * for the startup routine, rather than the address of the startup 1279 * routine itself. This macro performs whatever initialization to 1280 * the regs structure is required as well as any relocations to the 1281 * function descriptor entries when executing dynamically links apps. 1282 */ 1283 ELF_PLAT_INIT(regs, reloc_func_desc); 1284 #endif 1285 1286 finalize_exec(bprm); 1287 start_thread(regs, elf_entry, bprm->p); 1288 retval = 0; 1289 out: 1290 return retval; 1291 1292 /* error cleanup */ 1293 out_free_dentry: 1294 kfree(interp_elf_ex); 1295 kfree(interp_elf_phdata); 1296 allow_write_access(interpreter); 1297 if (interpreter) 1298 fput(interpreter); 1299 out_free_ph: 1300 kfree(elf_phdata); 1301 goto out; 1302 } 1303 1304 #ifdef CONFIG_USELIB 1305 /* This is really simpleminded and specialized - we are loading an 1306 a.out library that is given an ELF header. */ 1307 static int load_elf_library(struct file *file) 1308 { 1309 struct elf_phdr *elf_phdata; 1310 struct elf_phdr *eppnt; 1311 unsigned long elf_bss, bss, len; 1312 int retval, error, i, j; 1313 struct elfhdr elf_ex; 1314 1315 error = -ENOEXEC; 1316 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0); 1317 if (retval < 0) 1318 goto out; 1319 1320 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 1321 goto out; 1322 1323 /* First of all, some simple consistency checks */ 1324 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || 1325 !elf_check_arch(&elf_ex) || !file->f_op->mmap) 1326 goto out; 1327 if (elf_check_fdpic(&elf_ex)) 1328 goto out; 1329 1330 /* Now read in all of the header information */ 1331 1332 j = sizeof(struct elf_phdr) * elf_ex.e_phnum; 1333 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ 1334 1335 error = -ENOMEM; 1336 elf_phdata = kmalloc(j, GFP_KERNEL); 1337 if (!elf_phdata) 1338 goto out; 1339 1340 eppnt = elf_phdata; 1341 error = -ENOEXEC; 1342 retval = elf_read(file, eppnt, j, elf_ex.e_phoff); 1343 if (retval < 0) 1344 goto out_free_ph; 1345 1346 for (j = 0, i = 0; i<elf_ex.e_phnum; i++) 1347 if ((eppnt + i)->p_type == PT_LOAD) 1348 j++; 1349 if (j != 1) 1350 goto out_free_ph; 1351 1352 while (eppnt->p_type != PT_LOAD) 1353 eppnt++; 1354 1355 /* Now use mmap to map the library into memory. */ 1356 error = vm_mmap(file, 1357 ELF_PAGESTART(eppnt->p_vaddr), 1358 (eppnt->p_filesz + 1359 ELF_PAGEOFFSET(eppnt->p_vaddr)), 1360 PROT_READ | PROT_WRITE | PROT_EXEC, 1361 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE, 1362 (eppnt->p_offset - 1363 ELF_PAGEOFFSET(eppnt->p_vaddr))); 1364 if (error != ELF_PAGESTART(eppnt->p_vaddr)) 1365 goto out_free_ph; 1366 1367 elf_bss = eppnt->p_vaddr + eppnt->p_filesz; 1368 if (padzero(elf_bss)) { 1369 error = -EFAULT; 1370 goto out_free_ph; 1371 } 1372 1373 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); 1374 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); 1375 if (bss > len) { 1376 error = vm_brk(len, bss - len); 1377 if (error) 1378 goto out_free_ph; 1379 } 1380 error = 0; 1381 1382 out_free_ph: 1383 kfree(elf_phdata); 1384 out: 1385 return error; 1386 } 1387 #endif /* #ifdef CONFIG_USELIB */ 1388 1389 #ifdef CONFIG_ELF_CORE 1390 /* 1391 * ELF core dumper 1392 * 1393 * Modelled on fs/exec.c:aout_core_dump() 1394 * Jeremy Fitzhardinge <jeremy@sw.oz.au> 1395 */ 1396 1397 /* 1398 * The purpose of always_dump_vma() is to make sure that special kernel mappings 1399 * that are useful for post-mortem analysis are included in every core dump. 1400 * In that way we ensure that the core dump is fully interpretable later 1401 * without matching up the same kernel and hardware config to see what PC values 1402 * meant. These special mappings include - vDSO, vsyscall, and other 1403 * architecture specific mappings 1404 */ 1405 static bool always_dump_vma(struct vm_area_struct *vma) 1406 { 1407 /* Any vsyscall mappings? */ 1408 if (vma == get_gate_vma(vma->vm_mm)) 1409 return true; 1410 1411 /* 1412 * Assume that all vmas with a .name op should always be dumped. 1413 * If this changes, a new vm_ops field can easily be added. 1414 */ 1415 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1416 return true; 1417 1418 /* 1419 * arch_vma_name() returns non-NULL for special architecture mappings, 1420 * such as vDSO sections. 1421 */ 1422 if (arch_vma_name(vma)) 1423 return true; 1424 1425 return false; 1426 } 1427 1428 /* 1429 * Decide what to dump of a segment, part, all or none. 1430 */ 1431 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1432 unsigned long mm_flags) 1433 { 1434 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1435 1436 /* always dump the vdso and vsyscall sections */ 1437 if (always_dump_vma(vma)) 1438 goto whole; 1439 1440 if (vma->vm_flags & VM_DONTDUMP) 1441 return 0; 1442 1443 /* support for DAX */ 1444 if (vma_is_dax(vma)) { 1445 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1446 goto whole; 1447 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1448 goto whole; 1449 return 0; 1450 } 1451 1452 /* Hugetlb memory check */ 1453 if (is_vm_hugetlb_page(vma)) { 1454 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1455 goto whole; 1456 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1457 goto whole; 1458 return 0; 1459 } 1460 1461 /* Do not dump I/O mapped devices or special mappings */ 1462 if (vma->vm_flags & VM_IO) 1463 return 0; 1464 1465 /* By default, dump shared memory if mapped from an anonymous file. */ 1466 if (vma->vm_flags & VM_SHARED) { 1467 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1468 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1469 goto whole; 1470 return 0; 1471 } 1472 1473 /* Dump segments that have been written to. */ 1474 if (vma->anon_vma && FILTER(ANON_PRIVATE)) 1475 goto whole; 1476 if (vma->vm_file == NULL) 1477 return 0; 1478 1479 if (FILTER(MAPPED_PRIVATE)) 1480 goto whole; 1481 1482 /* 1483 * If this looks like the beginning of a DSO or executable mapping, 1484 * check for an ELF header. If we find one, dump the first page to 1485 * aid in determining what was mapped here. 1486 */ 1487 if (FILTER(ELF_HEADERS) && 1488 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1489 u32 __user *header = (u32 __user *) vma->vm_start; 1490 u32 word; 1491 /* 1492 * Doing it this way gets the constant folded by GCC. 1493 */ 1494 union { 1495 u32 cmp; 1496 char elfmag[SELFMAG]; 1497 } magic; 1498 BUILD_BUG_ON(SELFMAG != sizeof word); 1499 magic.elfmag[EI_MAG0] = ELFMAG0; 1500 magic.elfmag[EI_MAG1] = ELFMAG1; 1501 magic.elfmag[EI_MAG2] = ELFMAG2; 1502 magic.elfmag[EI_MAG3] = ELFMAG3; 1503 if (unlikely(get_user(word, header))) 1504 word = 0; 1505 if (word == magic.cmp) 1506 return PAGE_SIZE; 1507 } 1508 1509 #undef FILTER 1510 1511 return 0; 1512 1513 whole: 1514 return vma->vm_end - vma->vm_start; 1515 } 1516 1517 /* An ELF note in memory */ 1518 struct memelfnote 1519 { 1520 const char *name; 1521 int type; 1522 unsigned int datasz; 1523 void *data; 1524 }; 1525 1526 static int notesize(struct memelfnote *en) 1527 { 1528 int sz; 1529 1530 sz = sizeof(struct elf_note); 1531 sz += roundup(strlen(en->name) + 1, 4); 1532 sz += roundup(en->datasz, 4); 1533 1534 return sz; 1535 } 1536 1537 static int writenote(struct memelfnote *men, struct coredump_params *cprm) 1538 { 1539 struct elf_note en; 1540 en.n_namesz = strlen(men->name) + 1; 1541 en.n_descsz = men->datasz; 1542 en.n_type = men->type; 1543 1544 return dump_emit(cprm, &en, sizeof(en)) && 1545 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && 1546 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); 1547 } 1548 1549 static void fill_elf_header(struct elfhdr *elf, int segs, 1550 u16 machine, u32 flags) 1551 { 1552 memset(elf, 0, sizeof(*elf)); 1553 1554 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1555 elf->e_ident[EI_CLASS] = ELF_CLASS; 1556 elf->e_ident[EI_DATA] = ELF_DATA; 1557 elf->e_ident[EI_VERSION] = EV_CURRENT; 1558 elf->e_ident[EI_OSABI] = ELF_OSABI; 1559 1560 elf->e_type = ET_CORE; 1561 elf->e_machine = machine; 1562 elf->e_version = EV_CURRENT; 1563 elf->e_phoff = sizeof(struct elfhdr); 1564 elf->e_flags = flags; 1565 elf->e_ehsize = sizeof(struct elfhdr); 1566 elf->e_phentsize = sizeof(struct elf_phdr); 1567 elf->e_phnum = segs; 1568 } 1569 1570 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) 1571 { 1572 phdr->p_type = PT_NOTE; 1573 phdr->p_offset = offset; 1574 phdr->p_vaddr = 0; 1575 phdr->p_paddr = 0; 1576 phdr->p_filesz = sz; 1577 phdr->p_memsz = 0; 1578 phdr->p_flags = 0; 1579 phdr->p_align = 0; 1580 } 1581 1582 static void fill_note(struct memelfnote *note, const char *name, int type, 1583 unsigned int sz, void *data) 1584 { 1585 note->name = name; 1586 note->type = type; 1587 note->datasz = sz; 1588 note->data = data; 1589 } 1590 1591 /* 1592 * fill up all the fields in prstatus from the given task struct, except 1593 * registers which need to be filled up separately. 1594 */ 1595 static void fill_prstatus(struct elf_prstatus *prstatus, 1596 struct task_struct *p, long signr) 1597 { 1598 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1599 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1600 prstatus->pr_sighold = p->blocked.sig[0]; 1601 rcu_read_lock(); 1602 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1603 rcu_read_unlock(); 1604 prstatus->pr_pid = task_pid_vnr(p); 1605 prstatus->pr_pgrp = task_pgrp_vnr(p); 1606 prstatus->pr_sid = task_session_vnr(p); 1607 if (thread_group_leader(p)) { 1608 struct task_cputime cputime; 1609 1610 /* 1611 * This is the record for the group leader. It shows the 1612 * group-wide total, not its individual thread total. 1613 */ 1614 thread_group_cputime(p, &cputime); 1615 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime); 1616 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime); 1617 } else { 1618 u64 utime, stime; 1619 1620 task_cputime(p, &utime, &stime); 1621 prstatus->pr_utime = ns_to_kernel_old_timeval(utime); 1622 prstatus->pr_stime = ns_to_kernel_old_timeval(stime); 1623 } 1624 1625 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime); 1626 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime); 1627 } 1628 1629 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, 1630 struct mm_struct *mm) 1631 { 1632 const struct cred *cred; 1633 unsigned int i, len; 1634 1635 /* first copy the parameters from user space */ 1636 memset(psinfo, 0, sizeof(struct elf_prpsinfo)); 1637 1638 len = mm->arg_end - mm->arg_start; 1639 if (len >= ELF_PRARGSZ) 1640 len = ELF_PRARGSZ-1; 1641 if (copy_from_user(&psinfo->pr_psargs, 1642 (const char __user *)mm->arg_start, len)) 1643 return -EFAULT; 1644 for(i = 0; i < len; i++) 1645 if (psinfo->pr_psargs[i] == 0) 1646 psinfo->pr_psargs[i] = ' '; 1647 psinfo->pr_psargs[len] = 0; 1648 1649 rcu_read_lock(); 1650 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1651 rcu_read_unlock(); 1652 psinfo->pr_pid = task_pid_vnr(p); 1653 psinfo->pr_pgrp = task_pgrp_vnr(p); 1654 psinfo->pr_sid = task_session_vnr(p); 1655 1656 i = p->state ? ffz(~p->state) + 1 : 0; 1657 psinfo->pr_state = i; 1658 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; 1659 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; 1660 psinfo->pr_nice = task_nice(p); 1661 psinfo->pr_flag = p->flags; 1662 rcu_read_lock(); 1663 cred = __task_cred(p); 1664 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); 1665 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); 1666 rcu_read_unlock(); 1667 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1668 1669 return 0; 1670 } 1671 1672 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) 1673 { 1674 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv; 1675 int i = 0; 1676 do 1677 i += 2; 1678 while (auxv[i - 2] != AT_NULL); 1679 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); 1680 } 1681 1682 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, 1683 const kernel_siginfo_t *siginfo) 1684 { 1685 copy_siginfo_to_external(csigdata, siginfo); 1686 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); 1687 } 1688 1689 #define MAX_FILE_NOTE_SIZE (4*1024*1024) 1690 /* 1691 * Format of NT_FILE note: 1692 * 1693 * long count -- how many files are mapped 1694 * long page_size -- units for file_ofs 1695 * array of [COUNT] elements of 1696 * long start 1697 * long end 1698 * long file_ofs 1699 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... 1700 */ 1701 static int fill_files_note(struct memelfnote *note) 1702 { 1703 struct mm_struct *mm = current->mm; 1704 struct vm_area_struct *vma; 1705 unsigned count, size, names_ofs, remaining, n; 1706 user_long_t *data; 1707 user_long_t *start_end_ofs; 1708 char *name_base, *name_curpos; 1709 1710 /* *Estimated* file count and total data size needed */ 1711 count = mm->map_count; 1712 if (count > UINT_MAX / 64) 1713 return -EINVAL; 1714 size = count * 64; 1715 1716 names_ofs = (2 + 3 * count) * sizeof(data[0]); 1717 alloc: 1718 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ 1719 return -EINVAL; 1720 size = round_up(size, PAGE_SIZE); 1721 /* 1722 * "size" can be 0 here legitimately. 1723 * Let it ENOMEM and omit NT_FILE section which will be empty anyway. 1724 */ 1725 data = kvmalloc(size, GFP_KERNEL); 1726 if (ZERO_OR_NULL_PTR(data)) 1727 return -ENOMEM; 1728 1729 start_end_ofs = data + 2; 1730 name_base = name_curpos = ((char *)data) + names_ofs; 1731 remaining = size - names_ofs; 1732 count = 0; 1733 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 1734 struct file *file; 1735 const char *filename; 1736 1737 file = vma->vm_file; 1738 if (!file) 1739 continue; 1740 filename = file_path(file, name_curpos, remaining); 1741 if (IS_ERR(filename)) { 1742 if (PTR_ERR(filename) == -ENAMETOOLONG) { 1743 kvfree(data); 1744 size = size * 5 / 4; 1745 goto alloc; 1746 } 1747 continue; 1748 } 1749 1750 /* file_path() fills at the end, move name down */ 1751 /* n = strlen(filename) + 1: */ 1752 n = (name_curpos + remaining) - filename; 1753 remaining = filename - name_curpos; 1754 memmove(name_curpos, filename, n); 1755 name_curpos += n; 1756 1757 *start_end_ofs++ = vma->vm_start; 1758 *start_end_ofs++ = vma->vm_end; 1759 *start_end_ofs++ = vma->vm_pgoff; 1760 count++; 1761 } 1762 1763 /* Now we know exact count of files, can store it */ 1764 data[0] = count; 1765 data[1] = PAGE_SIZE; 1766 /* 1767 * Count usually is less than mm->map_count, 1768 * we need to move filenames down. 1769 */ 1770 n = mm->map_count - count; 1771 if (n != 0) { 1772 unsigned shift_bytes = n * 3 * sizeof(data[0]); 1773 memmove(name_base - shift_bytes, name_base, 1774 name_curpos - name_base); 1775 name_curpos -= shift_bytes; 1776 } 1777 1778 size = name_curpos - (char *)data; 1779 fill_note(note, "CORE", NT_FILE, size, data); 1780 return 0; 1781 } 1782 1783 #ifdef CORE_DUMP_USE_REGSET 1784 #include <linux/regset.h> 1785 1786 struct elf_thread_core_info { 1787 struct elf_thread_core_info *next; 1788 struct task_struct *task; 1789 struct elf_prstatus prstatus; 1790 struct memelfnote notes[0]; 1791 }; 1792 1793 struct elf_note_info { 1794 struct elf_thread_core_info *thread; 1795 struct memelfnote psinfo; 1796 struct memelfnote signote; 1797 struct memelfnote auxv; 1798 struct memelfnote files; 1799 user_siginfo_t csigdata; 1800 size_t size; 1801 int thread_notes; 1802 }; 1803 1804 /* 1805 * When a regset has a writeback hook, we call it on each thread before 1806 * dumping user memory. On register window machines, this makes sure the 1807 * user memory backing the register data is up to date before we read it. 1808 */ 1809 static void do_thread_regset_writeback(struct task_struct *task, 1810 const struct user_regset *regset) 1811 { 1812 if (regset->writeback) 1813 regset->writeback(task, regset, 1); 1814 } 1815 1816 #ifndef PRSTATUS_SIZE 1817 #define PRSTATUS_SIZE(S, R) sizeof(S) 1818 #endif 1819 1820 #ifndef SET_PR_FPVALID 1821 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V)) 1822 #endif 1823 1824 static int fill_thread_core_info(struct elf_thread_core_info *t, 1825 const struct user_regset_view *view, 1826 long signr, size_t *total) 1827 { 1828 unsigned int i; 1829 unsigned int regset0_size = regset_size(t->task, &view->regsets[0]); 1830 1831 /* 1832 * NT_PRSTATUS is the one special case, because the regset data 1833 * goes into the pr_reg field inside the note contents, rather 1834 * than being the whole note contents. We fill the reset in here. 1835 * We assume that regset 0 is NT_PRSTATUS. 1836 */ 1837 fill_prstatus(&t->prstatus, t->task, signr); 1838 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size, 1839 &t->prstatus.pr_reg, NULL); 1840 1841 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, 1842 PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus); 1843 *total += notesize(&t->notes[0]); 1844 1845 do_thread_regset_writeback(t->task, &view->regsets[0]); 1846 1847 /* 1848 * Each other regset might generate a note too. For each regset 1849 * that has no core_note_type or is inactive, we leave t->notes[i] 1850 * all zero and we'll know to skip writing it later. 1851 */ 1852 for (i = 1; i < view->n; ++i) { 1853 const struct user_regset *regset = &view->regsets[i]; 1854 do_thread_regset_writeback(t->task, regset); 1855 if (regset->core_note_type && regset->get && 1856 (!regset->active || regset->active(t->task, regset) > 0)) { 1857 int ret; 1858 size_t size = regset_size(t->task, regset); 1859 void *data = kzalloc(size, GFP_KERNEL); 1860 if (unlikely(!data)) 1861 return 0; 1862 ret = regset->get(t->task, regset, 1863 0, size, data, NULL); 1864 if (unlikely(ret)) 1865 kfree(data); 1866 else { 1867 if (regset->core_note_type != NT_PRFPREG) 1868 fill_note(&t->notes[i], "LINUX", 1869 regset->core_note_type, 1870 size, data); 1871 else { 1872 SET_PR_FPVALID(&t->prstatus, 1873 1, regset0_size); 1874 fill_note(&t->notes[i], "CORE", 1875 NT_PRFPREG, size, data); 1876 } 1877 *total += notesize(&t->notes[i]); 1878 } 1879 } 1880 } 1881 1882 return 1; 1883 } 1884 1885 static int fill_note_info(struct elfhdr *elf, int phdrs, 1886 struct elf_note_info *info, 1887 const kernel_siginfo_t *siginfo, struct pt_regs *regs) 1888 { 1889 struct task_struct *dump_task = current; 1890 const struct user_regset_view *view = task_user_regset_view(dump_task); 1891 struct elf_thread_core_info *t; 1892 struct elf_prpsinfo *psinfo; 1893 struct core_thread *ct; 1894 unsigned int i; 1895 1896 info->size = 0; 1897 info->thread = NULL; 1898 1899 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); 1900 if (psinfo == NULL) { 1901 info->psinfo.data = NULL; /* So we don't free this wrongly */ 1902 return 0; 1903 } 1904 1905 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); 1906 1907 /* 1908 * Figure out how many notes we're going to need for each thread. 1909 */ 1910 info->thread_notes = 0; 1911 for (i = 0; i < view->n; ++i) 1912 if (view->regsets[i].core_note_type != 0) 1913 ++info->thread_notes; 1914 1915 /* 1916 * Sanity check. We rely on regset 0 being in NT_PRSTATUS, 1917 * since it is our one special case. 1918 */ 1919 if (unlikely(info->thread_notes == 0) || 1920 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) { 1921 WARN_ON(1); 1922 return 0; 1923 } 1924 1925 /* 1926 * Initialize the ELF file header. 1927 */ 1928 fill_elf_header(elf, phdrs, 1929 view->e_machine, view->e_flags); 1930 1931 /* 1932 * Allocate a structure for each thread. 1933 */ 1934 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { 1935 t = kzalloc(offsetof(struct elf_thread_core_info, 1936 notes[info->thread_notes]), 1937 GFP_KERNEL); 1938 if (unlikely(!t)) 1939 return 0; 1940 1941 t->task = ct->task; 1942 if (ct->task == dump_task || !info->thread) { 1943 t->next = info->thread; 1944 info->thread = t; 1945 } else { 1946 /* 1947 * Make sure to keep the original task at 1948 * the head of the list. 1949 */ 1950 t->next = info->thread->next; 1951 info->thread->next = t; 1952 } 1953 } 1954 1955 /* 1956 * Now fill in each thread's information. 1957 */ 1958 for (t = info->thread; t != NULL; t = t->next) 1959 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) 1960 return 0; 1961 1962 /* 1963 * Fill in the two process-wide notes. 1964 */ 1965 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); 1966 info->size += notesize(&info->psinfo); 1967 1968 fill_siginfo_note(&info->signote, &info->csigdata, siginfo); 1969 info->size += notesize(&info->signote); 1970 1971 fill_auxv_note(&info->auxv, current->mm); 1972 info->size += notesize(&info->auxv); 1973 1974 if (fill_files_note(&info->files) == 0) 1975 info->size += notesize(&info->files); 1976 1977 return 1; 1978 } 1979 1980 static size_t get_note_info_size(struct elf_note_info *info) 1981 { 1982 return info->size; 1983 } 1984 1985 /* 1986 * Write all the notes for each thread. When writing the first thread, the 1987 * process-wide notes are interleaved after the first thread-specific note. 1988 */ 1989 static int write_note_info(struct elf_note_info *info, 1990 struct coredump_params *cprm) 1991 { 1992 bool first = true; 1993 struct elf_thread_core_info *t = info->thread; 1994 1995 do { 1996 int i; 1997 1998 if (!writenote(&t->notes[0], cprm)) 1999 return 0; 2000 2001 if (first && !writenote(&info->psinfo, cprm)) 2002 return 0; 2003 if (first && !writenote(&info->signote, cprm)) 2004 return 0; 2005 if (first && !writenote(&info->auxv, cprm)) 2006 return 0; 2007 if (first && info->files.data && 2008 !writenote(&info->files, cprm)) 2009 return 0; 2010 2011 for (i = 1; i < info->thread_notes; ++i) 2012 if (t->notes[i].data && 2013 !writenote(&t->notes[i], cprm)) 2014 return 0; 2015 2016 first = false; 2017 t = t->next; 2018 } while (t); 2019 2020 return 1; 2021 } 2022 2023 static void free_note_info(struct elf_note_info *info) 2024 { 2025 struct elf_thread_core_info *threads = info->thread; 2026 while (threads) { 2027 unsigned int i; 2028 struct elf_thread_core_info *t = threads; 2029 threads = t->next; 2030 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); 2031 for (i = 1; i < info->thread_notes; ++i) 2032 kfree(t->notes[i].data); 2033 kfree(t); 2034 } 2035 kfree(info->psinfo.data); 2036 kvfree(info->files.data); 2037 } 2038 2039 #else 2040 2041 /* Here is the structure in which status of each thread is captured. */ 2042 struct elf_thread_status 2043 { 2044 struct list_head list; 2045 struct elf_prstatus prstatus; /* NT_PRSTATUS */ 2046 elf_fpregset_t fpu; /* NT_PRFPREG */ 2047 struct task_struct *thread; 2048 #ifdef ELF_CORE_COPY_XFPREGS 2049 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ 2050 #endif 2051 struct memelfnote notes[3]; 2052 int num_notes; 2053 }; 2054 2055 /* 2056 * In order to add the specific thread information for the elf file format, 2057 * we need to keep a linked list of every threads pr_status and then create 2058 * a single section for them in the final core file. 2059 */ 2060 static int elf_dump_thread_status(long signr, struct elf_thread_status *t) 2061 { 2062 int sz = 0; 2063 struct task_struct *p = t->thread; 2064 t->num_notes = 0; 2065 2066 fill_prstatus(&t->prstatus, p, signr); 2067 elf_core_copy_task_regs(p, &t->prstatus.pr_reg); 2068 2069 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), 2070 &(t->prstatus)); 2071 t->num_notes++; 2072 sz += notesize(&t->notes[0]); 2073 2074 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, 2075 &t->fpu))) { 2076 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), 2077 &(t->fpu)); 2078 t->num_notes++; 2079 sz += notesize(&t->notes[1]); 2080 } 2081 2082 #ifdef ELF_CORE_COPY_XFPREGS 2083 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { 2084 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE, 2085 sizeof(t->xfpu), &t->xfpu); 2086 t->num_notes++; 2087 sz += notesize(&t->notes[2]); 2088 } 2089 #endif 2090 return sz; 2091 } 2092 2093 struct elf_note_info { 2094 struct memelfnote *notes; 2095 struct memelfnote *notes_files; 2096 struct elf_prstatus *prstatus; /* NT_PRSTATUS */ 2097 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ 2098 struct list_head thread_list; 2099 elf_fpregset_t *fpu; 2100 #ifdef ELF_CORE_COPY_XFPREGS 2101 elf_fpxregset_t *xfpu; 2102 #endif 2103 user_siginfo_t csigdata; 2104 int thread_status_size; 2105 int numnote; 2106 }; 2107 2108 static int elf_note_info_init(struct elf_note_info *info) 2109 { 2110 memset(info, 0, sizeof(*info)); 2111 INIT_LIST_HEAD(&info->thread_list); 2112 2113 /* Allocate space for ELF notes */ 2114 info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL); 2115 if (!info->notes) 2116 return 0; 2117 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 2118 if (!info->psinfo) 2119 return 0; 2120 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 2121 if (!info->prstatus) 2122 return 0; 2123 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 2124 if (!info->fpu) 2125 return 0; 2126 #ifdef ELF_CORE_COPY_XFPREGS 2127 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 2128 if (!info->xfpu) 2129 return 0; 2130 #endif 2131 return 1; 2132 } 2133 2134 static int fill_note_info(struct elfhdr *elf, int phdrs, 2135 struct elf_note_info *info, 2136 const kernel_siginfo_t *siginfo, struct pt_regs *regs) 2137 { 2138 struct core_thread *ct; 2139 struct elf_thread_status *ets; 2140 2141 if (!elf_note_info_init(info)) 2142 return 0; 2143 2144 for (ct = current->mm->core_state->dumper.next; 2145 ct; ct = ct->next) { 2146 ets = kzalloc(sizeof(*ets), GFP_KERNEL); 2147 if (!ets) 2148 return 0; 2149 2150 ets->thread = ct->task; 2151 list_add(&ets->list, &info->thread_list); 2152 } 2153 2154 list_for_each_entry(ets, &info->thread_list, list) { 2155 int sz; 2156 2157 sz = elf_dump_thread_status(siginfo->si_signo, ets); 2158 info->thread_status_size += sz; 2159 } 2160 /* now collect the dump for the current */ 2161 memset(info->prstatus, 0, sizeof(*info->prstatus)); 2162 fill_prstatus(info->prstatus, current, siginfo->si_signo); 2163 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 2164 2165 /* Set up header */ 2166 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); 2167 2168 /* 2169 * Set up the notes in similar form to SVR4 core dumps made 2170 * with info from their /proc. 2171 */ 2172 2173 fill_note(info->notes + 0, "CORE", NT_PRSTATUS, 2174 sizeof(*info->prstatus), info->prstatus); 2175 fill_psinfo(info->psinfo, current->group_leader, current->mm); 2176 fill_note(info->notes + 1, "CORE", NT_PRPSINFO, 2177 sizeof(*info->psinfo), info->psinfo); 2178 2179 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); 2180 fill_auxv_note(info->notes + 3, current->mm); 2181 info->numnote = 4; 2182 2183 if (fill_files_note(info->notes + info->numnote) == 0) { 2184 info->notes_files = info->notes + info->numnote; 2185 info->numnote++; 2186 } 2187 2188 /* Try to dump the FPU. */ 2189 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, 2190 info->fpu); 2191 if (info->prstatus->pr_fpvalid) 2192 fill_note(info->notes + info->numnote++, 2193 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); 2194 #ifdef ELF_CORE_COPY_XFPREGS 2195 if (elf_core_copy_task_xfpregs(current, info->xfpu)) 2196 fill_note(info->notes + info->numnote++, 2197 "LINUX", ELF_CORE_XFPREG_TYPE, 2198 sizeof(*info->xfpu), info->xfpu); 2199 #endif 2200 2201 return 1; 2202 } 2203 2204 static size_t get_note_info_size(struct elf_note_info *info) 2205 { 2206 int sz = 0; 2207 int i; 2208 2209 for (i = 0; i < info->numnote; i++) 2210 sz += notesize(info->notes + i); 2211 2212 sz += info->thread_status_size; 2213 2214 return sz; 2215 } 2216 2217 static int write_note_info(struct elf_note_info *info, 2218 struct coredump_params *cprm) 2219 { 2220 struct elf_thread_status *ets; 2221 int i; 2222 2223 for (i = 0; i < info->numnote; i++) 2224 if (!writenote(info->notes + i, cprm)) 2225 return 0; 2226 2227 /* write out the thread status notes section */ 2228 list_for_each_entry(ets, &info->thread_list, list) { 2229 for (i = 0; i < ets->num_notes; i++) 2230 if (!writenote(&ets->notes[i], cprm)) 2231 return 0; 2232 } 2233 2234 return 1; 2235 } 2236 2237 static void free_note_info(struct elf_note_info *info) 2238 { 2239 while (!list_empty(&info->thread_list)) { 2240 struct list_head *tmp = info->thread_list.next; 2241 list_del(tmp); 2242 kfree(list_entry(tmp, struct elf_thread_status, list)); 2243 } 2244 2245 /* Free data possibly allocated by fill_files_note(): */ 2246 if (info->notes_files) 2247 kvfree(info->notes_files->data); 2248 2249 kfree(info->prstatus); 2250 kfree(info->psinfo); 2251 kfree(info->notes); 2252 kfree(info->fpu); 2253 #ifdef ELF_CORE_COPY_XFPREGS 2254 kfree(info->xfpu); 2255 #endif 2256 } 2257 2258 #endif 2259 2260 static struct vm_area_struct *first_vma(struct task_struct *tsk, 2261 struct vm_area_struct *gate_vma) 2262 { 2263 struct vm_area_struct *ret = tsk->mm->mmap; 2264 2265 if (ret) 2266 return ret; 2267 return gate_vma; 2268 } 2269 /* 2270 * Helper function for iterating across a vma list. It ensures that the caller 2271 * will visit `gate_vma' prior to terminating the search. 2272 */ 2273 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, 2274 struct vm_area_struct *gate_vma) 2275 { 2276 struct vm_area_struct *ret; 2277 2278 ret = this_vma->vm_next; 2279 if (ret) 2280 return ret; 2281 if (this_vma == gate_vma) 2282 return NULL; 2283 return gate_vma; 2284 } 2285 2286 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, 2287 elf_addr_t e_shoff, int segs) 2288 { 2289 elf->e_shoff = e_shoff; 2290 elf->e_shentsize = sizeof(*shdr4extnum); 2291 elf->e_shnum = 1; 2292 elf->e_shstrndx = SHN_UNDEF; 2293 2294 memset(shdr4extnum, 0, sizeof(*shdr4extnum)); 2295 2296 shdr4extnum->sh_type = SHT_NULL; 2297 shdr4extnum->sh_size = elf->e_shnum; 2298 shdr4extnum->sh_link = elf->e_shstrndx; 2299 shdr4extnum->sh_info = segs; 2300 } 2301 2302 /* 2303 * Actual dumper 2304 * 2305 * This is a two-pass process; first we find the offsets of the bits, 2306 * and then they are actually written out. If we run out of core limit 2307 * we just truncate. 2308 */ 2309 static int elf_core_dump(struct coredump_params *cprm) 2310 { 2311 int has_dumped = 0; 2312 int segs, i; 2313 size_t vma_data_size = 0; 2314 struct vm_area_struct *vma, *gate_vma; 2315 struct elfhdr elf; 2316 loff_t offset = 0, dataoff; 2317 struct elf_note_info info = { }; 2318 struct elf_phdr *phdr4note = NULL; 2319 struct elf_shdr *shdr4extnum = NULL; 2320 Elf_Half e_phnum; 2321 elf_addr_t e_shoff; 2322 elf_addr_t *vma_filesz = NULL; 2323 2324 /* 2325 * We no longer stop all VM operations. 2326 * 2327 * This is because those proceses that could possibly change map_count 2328 * or the mmap / vma pages are now blocked in do_exit on current 2329 * finishing this core dump. 2330 * 2331 * Only ptrace can touch these memory addresses, but it doesn't change 2332 * the map_count or the pages allocated. So no possibility of crashing 2333 * exists while dumping the mm->vm_next areas to the core file. 2334 */ 2335 2336 /* 2337 * The number of segs are recored into ELF header as 16bit value. 2338 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. 2339 */ 2340 segs = current->mm->map_count; 2341 segs += elf_core_extra_phdrs(); 2342 2343 gate_vma = get_gate_vma(current->mm); 2344 if (gate_vma != NULL) 2345 segs++; 2346 2347 /* for notes section */ 2348 segs++; 2349 2350 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid 2351 * this, kernel supports extended numbering. Have a look at 2352 * include/linux/elf.h for further information. */ 2353 e_phnum = segs > PN_XNUM ? PN_XNUM : segs; 2354 2355 /* 2356 * Collect all the non-memory information about the process for the 2357 * notes. This also sets up the file header. 2358 */ 2359 if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs)) 2360 goto end_coredump; 2361 2362 has_dumped = 1; 2363 2364 offset += sizeof(elf); /* Elf header */ 2365 offset += segs * sizeof(struct elf_phdr); /* Program headers */ 2366 2367 /* Write notes phdr entry */ 2368 { 2369 size_t sz = get_note_info_size(&info); 2370 2371 sz += elf_coredump_extra_notes_size(); 2372 2373 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); 2374 if (!phdr4note) 2375 goto end_coredump; 2376 2377 fill_elf_note_phdr(phdr4note, sz, offset); 2378 offset += sz; 2379 } 2380 2381 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); 2382 2383 /* 2384 * Zero vma process will get ZERO_SIZE_PTR here. 2385 * Let coredump continue for register state at least. 2386 */ 2387 vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)), 2388 GFP_KERNEL); 2389 if (!vma_filesz) 2390 goto end_coredump; 2391 2392 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2393 vma = next_vma(vma, gate_vma)) { 2394 unsigned long dump_size; 2395 2396 dump_size = vma_dump_size(vma, cprm->mm_flags); 2397 vma_filesz[i++] = dump_size; 2398 vma_data_size += dump_size; 2399 } 2400 2401 offset += vma_data_size; 2402 offset += elf_core_extra_data_size(); 2403 e_shoff = offset; 2404 2405 if (e_phnum == PN_XNUM) { 2406 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); 2407 if (!shdr4extnum) 2408 goto end_coredump; 2409 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs); 2410 } 2411 2412 offset = dataoff; 2413 2414 if (!dump_emit(cprm, &elf, sizeof(elf))) 2415 goto end_coredump; 2416 2417 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) 2418 goto end_coredump; 2419 2420 /* Write program headers for segments dump */ 2421 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2422 vma = next_vma(vma, gate_vma)) { 2423 struct elf_phdr phdr; 2424 2425 phdr.p_type = PT_LOAD; 2426 phdr.p_offset = offset; 2427 phdr.p_vaddr = vma->vm_start; 2428 phdr.p_paddr = 0; 2429 phdr.p_filesz = vma_filesz[i++]; 2430 phdr.p_memsz = vma->vm_end - vma->vm_start; 2431 offset += phdr.p_filesz; 2432 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; 2433 if (vma->vm_flags & VM_WRITE) 2434 phdr.p_flags |= PF_W; 2435 if (vma->vm_flags & VM_EXEC) 2436 phdr.p_flags |= PF_X; 2437 phdr.p_align = ELF_EXEC_PAGESIZE; 2438 2439 if (!dump_emit(cprm, &phdr, sizeof(phdr))) 2440 goto end_coredump; 2441 } 2442 2443 if (!elf_core_write_extra_phdrs(cprm, offset)) 2444 goto end_coredump; 2445 2446 /* write out the notes section */ 2447 if (!write_note_info(&info, cprm)) 2448 goto end_coredump; 2449 2450 if (elf_coredump_extra_notes_write(cprm)) 2451 goto end_coredump; 2452 2453 /* Align to page */ 2454 if (!dump_skip(cprm, dataoff - cprm->pos)) 2455 goto end_coredump; 2456 2457 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2458 vma = next_vma(vma, gate_vma)) { 2459 unsigned long addr; 2460 unsigned long end; 2461 2462 end = vma->vm_start + vma_filesz[i++]; 2463 2464 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 2465 struct page *page; 2466 int stop; 2467 2468 page = get_dump_page(addr); 2469 if (page) { 2470 void *kaddr = kmap(page); 2471 stop = !dump_emit(cprm, kaddr, PAGE_SIZE); 2472 kunmap(page); 2473 put_page(page); 2474 } else 2475 stop = !dump_skip(cprm, PAGE_SIZE); 2476 if (stop) 2477 goto end_coredump; 2478 } 2479 } 2480 dump_truncate(cprm); 2481 2482 if (!elf_core_write_extra_data(cprm)) 2483 goto end_coredump; 2484 2485 if (e_phnum == PN_XNUM) { 2486 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) 2487 goto end_coredump; 2488 } 2489 2490 end_coredump: 2491 free_note_info(&info); 2492 kfree(shdr4extnum); 2493 kvfree(vma_filesz); 2494 kfree(phdr4note); 2495 return has_dumped; 2496 } 2497 2498 #endif /* CONFIG_ELF_CORE */ 2499 2500 static int __init init_elf_binfmt(void) 2501 { 2502 register_binfmt(&elf_format); 2503 return 0; 2504 } 2505 2506 static void __exit exit_elf_binfmt(void) 2507 { 2508 /* Remove the COFF and ELF loaders. */ 2509 unregister_binfmt(&elf_format); 2510 } 2511 2512 core_initcall(init_elf_binfmt); 2513 module_exit(exit_elf_binfmt); 2514 MODULE_LICENSE("GPL"); 2515