1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/binfmt_elf.c 4 * 5 * These are the functions used to load ELF format executables as used 6 * on SVr4 machines. Information on the format may be found in the book 7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support 8 * Tools". 9 * 10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/fs.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/errno.h> 19 #include <linux/signal.h> 20 #include <linux/binfmts.h> 21 #include <linux/string.h> 22 #include <linux/file.h> 23 #include <linux/slab.h> 24 #include <linux/personality.h> 25 #include <linux/elfcore.h> 26 #include <linux/init.h> 27 #include <linux/highuid.h> 28 #include <linux/compiler.h> 29 #include <linux/highmem.h> 30 #include <linux/pagemap.h> 31 #include <linux/vmalloc.h> 32 #include <linux/security.h> 33 #include <linux/random.h> 34 #include <linux/elf.h> 35 #include <linux/elf-randomize.h> 36 #include <linux/utsname.h> 37 #include <linux/coredump.h> 38 #include <linux/sched.h> 39 #include <linux/sched/coredump.h> 40 #include <linux/sched/task_stack.h> 41 #include <linux/sched/cputime.h> 42 #include <linux/cred.h> 43 #include <linux/dax.h> 44 #include <linux/uaccess.h> 45 #include <asm/param.h> 46 #include <asm/page.h> 47 48 #ifndef user_long_t 49 #define user_long_t long 50 #endif 51 #ifndef user_siginfo_t 52 #define user_siginfo_t siginfo_t 53 #endif 54 55 /* That's for binfmt_elf_fdpic to deal with */ 56 #ifndef elf_check_fdpic 57 #define elf_check_fdpic(ex) false 58 #endif 59 60 static int load_elf_binary(struct linux_binprm *bprm); 61 62 #ifdef CONFIG_USELIB 63 static int load_elf_library(struct file *); 64 #else 65 #define load_elf_library NULL 66 #endif 67 68 /* 69 * If we don't support core dumping, then supply a NULL so we 70 * don't even try. 71 */ 72 #ifdef CONFIG_ELF_CORE 73 static int elf_core_dump(struct coredump_params *cprm); 74 #else 75 #define elf_core_dump NULL 76 #endif 77 78 #if ELF_EXEC_PAGESIZE > PAGE_SIZE 79 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE 80 #else 81 #define ELF_MIN_ALIGN PAGE_SIZE 82 #endif 83 84 #ifndef ELF_CORE_EFLAGS 85 #define ELF_CORE_EFLAGS 0 86 #endif 87 88 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) 89 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) 90 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) 91 92 static struct linux_binfmt elf_format = { 93 .module = THIS_MODULE, 94 .load_binary = load_elf_binary, 95 .load_shlib = load_elf_library, 96 .core_dump = elf_core_dump, 97 .min_coredump = ELF_EXEC_PAGESIZE, 98 }; 99 100 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) 101 102 static int set_brk(unsigned long start, unsigned long end, int prot) 103 { 104 start = ELF_PAGEALIGN(start); 105 end = ELF_PAGEALIGN(end); 106 if (end > start) { 107 /* 108 * Map the last of the bss segment. 109 * If the header is requesting these pages to be 110 * executable, honour that (ppc32 needs this). 111 */ 112 int error = vm_brk_flags(start, end - start, 113 prot & PROT_EXEC ? VM_EXEC : 0); 114 if (error) 115 return error; 116 } 117 current->mm->start_brk = current->mm->brk = end; 118 return 0; 119 } 120 121 /* We need to explicitly zero any fractional pages 122 after the data section (i.e. bss). This would 123 contain the junk from the file that should not 124 be in memory 125 */ 126 static int padzero(unsigned long elf_bss) 127 { 128 unsigned long nbyte; 129 130 nbyte = ELF_PAGEOFFSET(elf_bss); 131 if (nbyte) { 132 nbyte = ELF_MIN_ALIGN - nbyte; 133 if (clear_user((void __user *) elf_bss, nbyte)) 134 return -EFAULT; 135 } 136 return 0; 137 } 138 139 /* Let's use some macros to make this stack manipulation a little clearer */ 140 #ifdef CONFIG_STACK_GROWSUP 141 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) 142 #define STACK_ROUND(sp, items) \ 143 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL) 144 #define STACK_ALLOC(sp, len) ({ \ 145 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \ 146 old_sp; }) 147 #else 148 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) 149 #define STACK_ROUND(sp, items) \ 150 (((unsigned long) (sp - items)) &~ 15UL) 151 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 152 #endif 153 154 #ifndef ELF_BASE_PLATFORM 155 /* 156 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. 157 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value 158 * will be copied to the user stack in the same manner as AT_PLATFORM. 159 */ 160 #define ELF_BASE_PLATFORM NULL 161 #endif 162 163 static int 164 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 165 unsigned long load_addr, unsigned long interp_load_addr) 166 { 167 unsigned long p = bprm->p; 168 int argc = bprm->argc; 169 int envc = bprm->envc; 170 elf_addr_t __user *sp; 171 elf_addr_t __user *u_platform; 172 elf_addr_t __user *u_base_platform; 173 elf_addr_t __user *u_rand_bytes; 174 const char *k_platform = ELF_PLATFORM; 175 const char *k_base_platform = ELF_BASE_PLATFORM; 176 unsigned char k_rand_bytes[16]; 177 int items; 178 elf_addr_t *elf_info; 179 int ei_index = 0; 180 const struct cred *cred = current_cred(); 181 struct vm_area_struct *vma; 182 183 /* 184 * In some cases (e.g. Hyper-Threading), we want to avoid L1 185 * evictions by the processes running on the same package. One 186 * thing we can do is to shuffle the initial stack for them. 187 */ 188 189 p = arch_align_stack(p); 190 191 /* 192 * If this architecture has a platform capability string, copy it 193 * to userspace. In some cases (Sparc), this info is impossible 194 * for userspace to get any other way, in others (i386) it is 195 * merely difficult. 196 */ 197 u_platform = NULL; 198 if (k_platform) { 199 size_t len = strlen(k_platform) + 1; 200 201 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 202 if (__copy_to_user(u_platform, k_platform, len)) 203 return -EFAULT; 204 } 205 206 /* 207 * If this architecture has a "base" platform capability 208 * string, copy it to userspace. 209 */ 210 u_base_platform = NULL; 211 if (k_base_platform) { 212 size_t len = strlen(k_base_platform) + 1; 213 214 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 215 if (__copy_to_user(u_base_platform, k_base_platform, len)) 216 return -EFAULT; 217 } 218 219 /* 220 * Generate 16 random bytes for userspace PRNG seeding. 221 */ 222 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); 223 u_rand_bytes = (elf_addr_t __user *) 224 STACK_ALLOC(p, sizeof(k_rand_bytes)); 225 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) 226 return -EFAULT; 227 228 /* Create the ELF interpreter info */ 229 elf_info = (elf_addr_t *)current->mm->saved_auxv; 230 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 231 #define NEW_AUX_ENT(id, val) \ 232 do { \ 233 elf_info[ei_index++] = id; \ 234 elf_info[ei_index++] = val; \ 235 } while (0) 236 237 #ifdef ARCH_DLINFO 238 /* 239 * ARCH_DLINFO must come first so PPC can do its special alignment of 240 * AUXV. 241 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in 242 * ARCH_DLINFO changes 243 */ 244 ARCH_DLINFO; 245 #endif 246 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); 247 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); 248 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); 249 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); 250 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); 251 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); 252 NEW_AUX_ENT(AT_BASE, interp_load_addr); 253 NEW_AUX_ENT(AT_FLAGS, 0); 254 NEW_AUX_ENT(AT_ENTRY, exec->e_entry); 255 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid)); 256 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid)); 257 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid)); 258 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid)); 259 NEW_AUX_ENT(AT_SECURE, bprm->secureexec); 260 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); 261 #ifdef ELF_HWCAP2 262 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); 263 #endif 264 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 265 if (k_platform) { 266 NEW_AUX_ENT(AT_PLATFORM, 267 (elf_addr_t)(unsigned long)u_platform); 268 } 269 if (k_base_platform) { 270 NEW_AUX_ENT(AT_BASE_PLATFORM, 271 (elf_addr_t)(unsigned long)u_base_platform); 272 } 273 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 274 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 275 } 276 #undef NEW_AUX_ENT 277 /* AT_NULL is zero; clear the rest too */ 278 memset(&elf_info[ei_index], 0, 279 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]); 280 281 /* And advance past the AT_NULL entry. */ 282 ei_index += 2; 283 284 sp = STACK_ADD(p, ei_index); 285 286 items = (argc + 1) + (envc + 1) + 1; 287 bprm->p = STACK_ROUND(sp, items); 288 289 /* Point sp at the lowest address on the stack */ 290 #ifdef CONFIG_STACK_GROWSUP 291 sp = (elf_addr_t __user *)bprm->p - items - ei_index; 292 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */ 293 #else 294 sp = (elf_addr_t __user *)bprm->p; 295 #endif 296 297 298 /* 299 * Grow the stack manually; some architectures have a limit on how 300 * far ahead a user-space access may be in order to grow the stack. 301 */ 302 vma = find_extend_vma(current->mm, bprm->p); 303 if (!vma) 304 return -EFAULT; 305 306 /* Now, let's put argc (and argv, envp if appropriate) on the stack */ 307 if (__put_user(argc, sp++)) 308 return -EFAULT; 309 310 /* Populate list of argv pointers back to argv strings. */ 311 p = current->mm->arg_end = current->mm->arg_start; 312 while (argc-- > 0) { 313 size_t len; 314 if (__put_user((elf_addr_t)p, sp++)) 315 return -EFAULT; 316 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 317 if (!len || len > MAX_ARG_STRLEN) 318 return -EINVAL; 319 p += len; 320 } 321 if (__put_user(0, sp++)) 322 return -EFAULT; 323 current->mm->arg_end = p; 324 325 /* Populate list of envp pointers back to envp strings. */ 326 current->mm->env_end = current->mm->env_start = p; 327 while (envc-- > 0) { 328 size_t len; 329 if (__put_user((elf_addr_t)p, sp++)) 330 return -EFAULT; 331 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 332 if (!len || len > MAX_ARG_STRLEN) 333 return -EINVAL; 334 p += len; 335 } 336 if (__put_user(0, sp++)) 337 return -EFAULT; 338 current->mm->env_end = p; 339 340 /* Put the elf_info on the stack in the right place. */ 341 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t))) 342 return -EFAULT; 343 return 0; 344 } 345 346 #ifndef elf_map 347 348 static unsigned long elf_map(struct file *filep, unsigned long addr, 349 const struct elf_phdr *eppnt, int prot, int type, 350 unsigned long total_size) 351 { 352 unsigned long map_addr; 353 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); 354 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); 355 addr = ELF_PAGESTART(addr); 356 size = ELF_PAGEALIGN(size); 357 358 /* mmap() will return -EINVAL if given a zero size, but a 359 * segment with zero filesize is perfectly valid */ 360 if (!size) 361 return addr; 362 363 /* 364 * total_size is the size of the ELF (interpreter) image. 365 * The _first_ mmap needs to know the full size, otherwise 366 * randomization might put this image into an overlapping 367 * position with the ELF binary image. (since size < total_size) 368 * So we first map the 'big' image - and unmap the remainder at 369 * the end. (which unmap is needed for ELF images with holes.) 370 */ 371 if (total_size) { 372 total_size = ELF_PAGEALIGN(total_size); 373 map_addr = vm_mmap(filep, addr, total_size, prot, type, off); 374 if (!BAD_ADDR(map_addr)) 375 vm_munmap(map_addr+size, total_size-size); 376 } else 377 map_addr = vm_mmap(filep, addr, size, prot, type, off); 378 379 if ((type & MAP_FIXED_NOREPLACE) && 380 PTR_ERR((void *)map_addr) == -EEXIST) 381 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", 382 task_pid_nr(current), current->comm, (void *)addr); 383 384 return(map_addr); 385 } 386 387 #endif /* !elf_map */ 388 389 static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) 390 { 391 int i, first_idx = -1, last_idx = -1; 392 393 for (i = 0; i < nr; i++) { 394 if (cmds[i].p_type == PT_LOAD) { 395 last_idx = i; 396 if (first_idx == -1) 397 first_idx = i; 398 } 399 } 400 if (first_idx == -1) 401 return 0; 402 403 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - 404 ELF_PAGESTART(cmds[first_idx].p_vaddr); 405 } 406 407 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos) 408 { 409 ssize_t rv; 410 411 rv = kernel_read(file, buf, len, &pos); 412 if (unlikely(rv != len)) { 413 return (rv < 0) ? rv : -EIO; 414 } 415 return 0; 416 } 417 418 /** 419 * load_elf_phdrs() - load ELF program headers 420 * @elf_ex: ELF header of the binary whose program headers should be loaded 421 * @elf_file: the opened ELF binary file 422 * 423 * Loads ELF program headers from the binary file elf_file, which has the ELF 424 * header pointed to by elf_ex, into a newly allocated array. The caller is 425 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. 426 */ 427 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, 428 struct file *elf_file) 429 { 430 struct elf_phdr *elf_phdata = NULL; 431 int retval, err = -1; 432 unsigned int size; 433 434 /* 435 * If the size of this structure has changed, then punt, since 436 * we will be doing the wrong thing. 437 */ 438 if (elf_ex->e_phentsize != sizeof(struct elf_phdr)) 439 goto out; 440 441 /* Sanity check the number of program headers... */ 442 /* ...and their total size. */ 443 size = sizeof(struct elf_phdr) * elf_ex->e_phnum; 444 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN) 445 goto out; 446 447 elf_phdata = kmalloc(size, GFP_KERNEL); 448 if (!elf_phdata) 449 goto out; 450 451 /* Read in the program headers */ 452 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); 453 if (retval < 0) { 454 err = retval; 455 goto out; 456 } 457 458 /* Success! */ 459 err = 0; 460 out: 461 if (err) { 462 kfree(elf_phdata); 463 elf_phdata = NULL; 464 } 465 return elf_phdata; 466 } 467 468 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE 469 470 /** 471 * struct arch_elf_state - arch-specific ELF loading state 472 * 473 * This structure is used to preserve architecture specific data during 474 * the loading of an ELF file, throughout the checking of architecture 475 * specific ELF headers & through to the point where the ELF load is 476 * known to be proceeding (ie. SET_PERSONALITY). 477 * 478 * This implementation is a dummy for architectures which require no 479 * specific state. 480 */ 481 struct arch_elf_state { 482 }; 483 484 #define INIT_ARCH_ELF_STATE {} 485 486 /** 487 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header 488 * @ehdr: The main ELF header 489 * @phdr: The program header to check 490 * @elf: The open ELF file 491 * @is_interp: True if the phdr is from the interpreter of the ELF being 492 * loaded, else false. 493 * @state: Architecture-specific state preserved throughout the process 494 * of loading the ELF. 495 * 496 * Inspects the program header phdr to validate its correctness and/or 497 * suitability for the system. Called once per ELF program header in the 498 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its 499 * interpreter. 500 * 501 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 502 * with that return code. 503 */ 504 static inline int arch_elf_pt_proc(struct elfhdr *ehdr, 505 struct elf_phdr *phdr, 506 struct file *elf, bool is_interp, 507 struct arch_elf_state *state) 508 { 509 /* Dummy implementation, always proceed */ 510 return 0; 511 } 512 513 /** 514 * arch_check_elf() - check an ELF executable 515 * @ehdr: The main ELF header 516 * @has_interp: True if the ELF has an interpreter, else false. 517 * @interp_ehdr: The interpreter's ELF header 518 * @state: Architecture-specific state preserved throughout the process 519 * of loading the ELF. 520 * 521 * Provides a final opportunity for architecture code to reject the loading 522 * of the ELF & cause an exec syscall to return an error. This is called after 523 * all program headers to be checked by arch_elf_pt_proc have been. 524 * 525 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 526 * with that return code. 527 */ 528 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, 529 struct elfhdr *interp_ehdr, 530 struct arch_elf_state *state) 531 { 532 /* Dummy implementation, always proceed */ 533 return 0; 534 } 535 536 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ 537 538 static inline int make_prot(u32 p_flags) 539 { 540 int prot = 0; 541 542 if (p_flags & PF_R) 543 prot |= PROT_READ; 544 if (p_flags & PF_W) 545 prot |= PROT_WRITE; 546 if (p_flags & PF_X) 547 prot |= PROT_EXEC; 548 return prot; 549 } 550 551 /* This is much more generalized than the library routine read function, 552 so we keep this separate. Technically the library read function 553 is only provided so that we can read a.out libraries that have 554 an ELF header */ 555 556 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, 557 struct file *interpreter, 558 unsigned long no_base, struct elf_phdr *interp_elf_phdata) 559 { 560 struct elf_phdr *eppnt; 561 unsigned long load_addr = 0; 562 int load_addr_set = 0; 563 unsigned long last_bss = 0, elf_bss = 0; 564 int bss_prot = 0; 565 unsigned long error = ~0UL; 566 unsigned long total_size; 567 int i; 568 569 /* First of all, some simple consistency checks */ 570 if (interp_elf_ex->e_type != ET_EXEC && 571 interp_elf_ex->e_type != ET_DYN) 572 goto out; 573 if (!elf_check_arch(interp_elf_ex) || 574 elf_check_fdpic(interp_elf_ex)) 575 goto out; 576 if (!interpreter->f_op->mmap) 577 goto out; 578 579 total_size = total_mapping_size(interp_elf_phdata, 580 interp_elf_ex->e_phnum); 581 if (!total_size) { 582 error = -EINVAL; 583 goto out; 584 } 585 586 eppnt = interp_elf_phdata; 587 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { 588 if (eppnt->p_type == PT_LOAD) { 589 int elf_type = MAP_PRIVATE | MAP_DENYWRITE; 590 int elf_prot = make_prot(eppnt->p_flags); 591 unsigned long vaddr = 0; 592 unsigned long k, map_addr; 593 594 vaddr = eppnt->p_vaddr; 595 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 596 elf_type |= MAP_FIXED_NOREPLACE; 597 else if (no_base && interp_elf_ex->e_type == ET_DYN) 598 load_addr = -vaddr; 599 600 map_addr = elf_map(interpreter, load_addr + vaddr, 601 eppnt, elf_prot, elf_type, total_size); 602 total_size = 0; 603 error = map_addr; 604 if (BAD_ADDR(map_addr)) 605 goto out; 606 607 if (!load_addr_set && 608 interp_elf_ex->e_type == ET_DYN) { 609 load_addr = map_addr - ELF_PAGESTART(vaddr); 610 load_addr_set = 1; 611 } 612 613 /* 614 * Check to see if the section's size will overflow the 615 * allowed task size. Note that p_filesz must always be 616 * <= p_memsize so it's only necessary to check p_memsz. 617 */ 618 k = load_addr + eppnt->p_vaddr; 619 if (BAD_ADDR(k) || 620 eppnt->p_filesz > eppnt->p_memsz || 621 eppnt->p_memsz > TASK_SIZE || 622 TASK_SIZE - eppnt->p_memsz < k) { 623 error = -ENOMEM; 624 goto out; 625 } 626 627 /* 628 * Find the end of the file mapping for this phdr, and 629 * keep track of the largest address we see for this. 630 */ 631 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; 632 if (k > elf_bss) 633 elf_bss = k; 634 635 /* 636 * Do the same thing for the memory mapping - between 637 * elf_bss and last_bss is the bss section. 638 */ 639 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; 640 if (k > last_bss) { 641 last_bss = k; 642 bss_prot = elf_prot; 643 } 644 } 645 } 646 647 /* 648 * Now fill out the bss section: first pad the last page from 649 * the file up to the page boundary, and zero it from elf_bss 650 * up to the end of the page. 651 */ 652 if (padzero(elf_bss)) { 653 error = -EFAULT; 654 goto out; 655 } 656 /* 657 * Next, align both the file and mem bss up to the page size, 658 * since this is where elf_bss was just zeroed up to, and where 659 * last_bss will end after the vm_brk_flags() below. 660 */ 661 elf_bss = ELF_PAGEALIGN(elf_bss); 662 last_bss = ELF_PAGEALIGN(last_bss); 663 /* Finally, if there is still more bss to allocate, do it. */ 664 if (last_bss > elf_bss) { 665 error = vm_brk_flags(elf_bss, last_bss - elf_bss, 666 bss_prot & PROT_EXEC ? VM_EXEC : 0); 667 if (error) 668 goto out; 669 } 670 671 error = load_addr; 672 out: 673 return error; 674 } 675 676 /* 677 * These are the functions used to load ELF style executables and shared 678 * libraries. There is no binary dependent code anywhere else. 679 */ 680 681 static int load_elf_binary(struct linux_binprm *bprm) 682 { 683 struct file *interpreter = NULL; /* to shut gcc up */ 684 unsigned long load_addr = 0, load_bias = 0; 685 int load_addr_set = 0; 686 unsigned long error; 687 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; 688 unsigned long elf_bss, elf_brk; 689 int bss_prot = 0; 690 int retval, i; 691 unsigned long elf_entry; 692 unsigned long interp_load_addr = 0; 693 unsigned long start_code, end_code, start_data, end_data; 694 unsigned long reloc_func_desc __maybe_unused = 0; 695 int executable_stack = EXSTACK_DEFAULT; 696 struct { 697 struct elfhdr elf_ex; 698 struct elfhdr interp_elf_ex; 699 } *loc; 700 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; 701 struct pt_regs *regs; 702 703 loc = kmalloc(sizeof(*loc), GFP_KERNEL); 704 if (!loc) { 705 retval = -ENOMEM; 706 goto out_ret; 707 } 708 709 /* Get the exec-header */ 710 loc->elf_ex = *((struct elfhdr *)bprm->buf); 711 712 retval = -ENOEXEC; 713 /* First of all, some simple consistency checks */ 714 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 715 goto out; 716 717 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) 718 goto out; 719 if (!elf_check_arch(&loc->elf_ex)) 720 goto out; 721 if (elf_check_fdpic(&loc->elf_ex)) 722 goto out; 723 if (!bprm->file->f_op->mmap) 724 goto out; 725 726 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file); 727 if (!elf_phdata) 728 goto out; 729 730 elf_ppnt = elf_phdata; 731 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 732 char *elf_interpreter; 733 734 if (elf_ppnt->p_type != PT_INTERP) 735 continue; 736 737 /* 738 * This is the program interpreter used for shared libraries - 739 * for now assume that this is an a.out format binary. 740 */ 741 retval = -ENOEXEC; 742 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) 743 goto out_free_ph; 744 745 retval = -ENOMEM; 746 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); 747 if (!elf_interpreter) 748 goto out_free_ph; 749 750 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz, 751 elf_ppnt->p_offset); 752 if (retval < 0) 753 goto out_free_interp; 754 /* make sure path is NULL terminated */ 755 retval = -ENOEXEC; 756 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 757 goto out_free_interp; 758 759 interpreter = open_exec(elf_interpreter); 760 kfree(elf_interpreter); 761 retval = PTR_ERR(interpreter); 762 if (IS_ERR(interpreter)) 763 goto out_free_ph; 764 765 /* 766 * If the binary is not readable then enforce mm->dumpable = 0 767 * regardless of the interpreter's permissions. 768 */ 769 would_dump(bprm, interpreter); 770 771 /* Get the exec headers */ 772 retval = elf_read(interpreter, &loc->interp_elf_ex, 773 sizeof(loc->interp_elf_ex), 0); 774 if (retval < 0) 775 goto out_free_dentry; 776 777 break; 778 779 out_free_interp: 780 kfree(elf_interpreter); 781 goto out_free_ph; 782 } 783 784 elf_ppnt = elf_phdata; 785 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) 786 switch (elf_ppnt->p_type) { 787 case PT_GNU_STACK: 788 if (elf_ppnt->p_flags & PF_X) 789 executable_stack = EXSTACK_ENABLE_X; 790 else 791 executable_stack = EXSTACK_DISABLE_X; 792 break; 793 794 case PT_LOPROC ... PT_HIPROC: 795 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt, 796 bprm->file, false, 797 &arch_state); 798 if (retval) 799 goto out_free_dentry; 800 break; 801 } 802 803 /* Some simple consistency checks for the interpreter */ 804 if (interpreter) { 805 retval = -ELIBBAD; 806 /* Not an ELF interpreter */ 807 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 808 goto out_free_dentry; 809 /* Verify the interpreter has a valid arch */ 810 if (!elf_check_arch(&loc->interp_elf_ex) || 811 elf_check_fdpic(&loc->interp_elf_ex)) 812 goto out_free_dentry; 813 814 /* Load the interpreter program headers */ 815 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex, 816 interpreter); 817 if (!interp_elf_phdata) 818 goto out_free_dentry; 819 820 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ 821 elf_ppnt = interp_elf_phdata; 822 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++) 823 switch (elf_ppnt->p_type) { 824 case PT_LOPROC ... PT_HIPROC: 825 retval = arch_elf_pt_proc(&loc->interp_elf_ex, 826 elf_ppnt, interpreter, 827 true, &arch_state); 828 if (retval) 829 goto out_free_dentry; 830 break; 831 } 832 } 833 834 /* 835 * Allow arch code to reject the ELF at this point, whilst it's 836 * still possible to return an error to the code that invoked 837 * the exec syscall. 838 */ 839 retval = arch_check_elf(&loc->elf_ex, 840 !!interpreter, &loc->interp_elf_ex, 841 &arch_state); 842 if (retval) 843 goto out_free_dentry; 844 845 /* Flush all traces of the currently running executable */ 846 retval = flush_old_exec(bprm); 847 if (retval) 848 goto out_free_dentry; 849 850 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 851 may depend on the personality. */ 852 SET_PERSONALITY2(loc->elf_ex, &arch_state); 853 if (elf_read_implies_exec(loc->elf_ex, executable_stack)) 854 current->personality |= READ_IMPLIES_EXEC; 855 856 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 857 current->flags |= PF_RANDOMIZE; 858 859 setup_new_exec(bprm); 860 install_exec_creds(bprm); 861 862 /* Do this so that we can load the interpreter, if need be. We will 863 change some of these later */ 864 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), 865 executable_stack); 866 if (retval < 0) 867 goto out_free_dentry; 868 869 elf_bss = 0; 870 elf_brk = 0; 871 872 start_code = ~0UL; 873 end_code = 0; 874 start_data = 0; 875 end_data = 0; 876 877 /* Now we do a little grungy work by mmapping the ELF image into 878 the correct location in memory. */ 879 for(i = 0, elf_ppnt = elf_phdata; 880 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 881 int elf_prot, elf_flags; 882 unsigned long k, vaddr; 883 unsigned long total_size = 0; 884 885 if (elf_ppnt->p_type != PT_LOAD) 886 continue; 887 888 if (unlikely (elf_brk > elf_bss)) { 889 unsigned long nbyte; 890 891 /* There was a PT_LOAD segment with p_memsz > p_filesz 892 before this one. Map anonymous pages, if needed, 893 and clear the area. */ 894 retval = set_brk(elf_bss + load_bias, 895 elf_brk + load_bias, 896 bss_prot); 897 if (retval) 898 goto out_free_dentry; 899 nbyte = ELF_PAGEOFFSET(elf_bss); 900 if (nbyte) { 901 nbyte = ELF_MIN_ALIGN - nbyte; 902 if (nbyte > elf_brk - elf_bss) 903 nbyte = elf_brk - elf_bss; 904 if (clear_user((void __user *)elf_bss + 905 load_bias, nbyte)) { 906 /* 907 * This bss-zeroing can fail if the ELF 908 * file specifies odd protections. So 909 * we don't check the return value 910 */ 911 } 912 } 913 } 914 915 elf_prot = make_prot(elf_ppnt->p_flags); 916 917 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; 918 919 vaddr = elf_ppnt->p_vaddr; 920 /* 921 * If we are loading ET_EXEC or we have already performed 922 * the ET_DYN load_addr calculations, proceed normally. 923 */ 924 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { 925 elf_flags |= MAP_FIXED; 926 } else if (loc->elf_ex.e_type == ET_DYN) { 927 /* 928 * This logic is run once for the first LOAD Program 929 * Header for ET_DYN binaries to calculate the 930 * randomization (load_bias) for all the LOAD 931 * Program Headers, and to calculate the entire 932 * size of the ELF mapping (total_size). (Note that 933 * load_addr_set is set to true later once the 934 * initial mapping is performed.) 935 * 936 * There are effectively two types of ET_DYN 937 * binaries: programs (i.e. PIE: ET_DYN with INTERP) 938 * and loaders (ET_DYN without INTERP, since they 939 * _are_ the ELF interpreter). The loaders must 940 * be loaded away from programs since the program 941 * may otherwise collide with the loader (especially 942 * for ET_EXEC which does not have a randomized 943 * position). For example to handle invocations of 944 * "./ld.so someprog" to test out a new version of 945 * the loader, the subsequent program that the 946 * loader loads must avoid the loader itself, so 947 * they cannot share the same load range. Sufficient 948 * room for the brk must be allocated with the 949 * loader as well, since brk must be available with 950 * the loader. 951 * 952 * Therefore, programs are loaded offset from 953 * ELF_ET_DYN_BASE and loaders are loaded into the 954 * independently randomized mmap region (0 load_bias 955 * without MAP_FIXED). 956 */ 957 if (interpreter) { 958 load_bias = ELF_ET_DYN_BASE; 959 if (current->flags & PF_RANDOMIZE) 960 load_bias += arch_mmap_rnd(); 961 elf_flags |= MAP_FIXED; 962 } else 963 load_bias = 0; 964 965 /* 966 * Since load_bias is used for all subsequent loading 967 * calculations, we must lower it by the first vaddr 968 * so that the remaining calculations based on the 969 * ELF vaddrs will be correctly offset. The result 970 * is then page aligned. 971 */ 972 load_bias = ELF_PAGESTART(load_bias - vaddr); 973 974 total_size = total_mapping_size(elf_phdata, 975 loc->elf_ex.e_phnum); 976 if (!total_size) { 977 retval = -EINVAL; 978 goto out_free_dentry; 979 } 980 } 981 982 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 983 elf_prot, elf_flags, total_size); 984 if (BAD_ADDR(error)) { 985 retval = IS_ERR((void *)error) ? 986 PTR_ERR((void*)error) : -EINVAL; 987 goto out_free_dentry; 988 } 989 990 if (!load_addr_set) { 991 load_addr_set = 1; 992 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); 993 if (loc->elf_ex.e_type == ET_DYN) { 994 load_bias += error - 995 ELF_PAGESTART(load_bias + vaddr); 996 load_addr += load_bias; 997 reloc_func_desc = load_bias; 998 } 999 } 1000 k = elf_ppnt->p_vaddr; 1001 if (k < start_code) 1002 start_code = k; 1003 if (start_data < k) 1004 start_data = k; 1005 1006 /* 1007 * Check to see if the section's size will overflow the 1008 * allowed task size. Note that p_filesz must always be 1009 * <= p_memsz so it is only necessary to check p_memsz. 1010 */ 1011 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 1012 elf_ppnt->p_memsz > TASK_SIZE || 1013 TASK_SIZE - elf_ppnt->p_memsz < k) { 1014 /* set_brk can never work. Avoid overflows. */ 1015 retval = -EINVAL; 1016 goto out_free_dentry; 1017 } 1018 1019 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; 1020 1021 if (k > elf_bss) 1022 elf_bss = k; 1023 if ((elf_ppnt->p_flags & PF_X) && end_code < k) 1024 end_code = k; 1025 if (end_data < k) 1026 end_data = k; 1027 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; 1028 if (k > elf_brk) { 1029 bss_prot = elf_prot; 1030 elf_brk = k; 1031 } 1032 } 1033 1034 loc->elf_ex.e_entry += load_bias; 1035 elf_bss += load_bias; 1036 elf_brk += load_bias; 1037 start_code += load_bias; 1038 end_code += load_bias; 1039 start_data += load_bias; 1040 end_data += load_bias; 1041 1042 /* Calling set_brk effectively mmaps the pages that we need 1043 * for the bss and break sections. We must do this before 1044 * mapping in the interpreter, to make sure it doesn't wind 1045 * up getting placed where the bss needs to go. 1046 */ 1047 retval = set_brk(elf_bss, elf_brk, bss_prot); 1048 if (retval) 1049 goto out_free_dentry; 1050 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { 1051 retval = -EFAULT; /* Nobody gets to see this, but.. */ 1052 goto out_free_dentry; 1053 } 1054 1055 if (interpreter) { 1056 elf_entry = load_elf_interp(&loc->interp_elf_ex, 1057 interpreter, 1058 load_bias, interp_elf_phdata); 1059 if (!IS_ERR((void *)elf_entry)) { 1060 /* 1061 * load_elf_interp() returns relocation 1062 * adjustment 1063 */ 1064 interp_load_addr = elf_entry; 1065 elf_entry += loc->interp_elf_ex.e_entry; 1066 } 1067 if (BAD_ADDR(elf_entry)) { 1068 retval = IS_ERR((void *)elf_entry) ? 1069 (int)elf_entry : -EINVAL; 1070 goto out_free_dentry; 1071 } 1072 reloc_func_desc = interp_load_addr; 1073 1074 allow_write_access(interpreter); 1075 fput(interpreter); 1076 } else { 1077 elf_entry = loc->elf_ex.e_entry; 1078 if (BAD_ADDR(elf_entry)) { 1079 retval = -EINVAL; 1080 goto out_free_dentry; 1081 } 1082 } 1083 1084 kfree(interp_elf_phdata); 1085 kfree(elf_phdata); 1086 1087 set_binfmt(&elf_format); 1088 1089 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES 1090 retval = arch_setup_additional_pages(bprm, !!interpreter); 1091 if (retval < 0) 1092 goto out; 1093 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 1094 1095 retval = create_elf_tables(bprm, &loc->elf_ex, 1096 load_addr, interp_load_addr); 1097 if (retval < 0) 1098 goto out; 1099 current->mm->end_code = end_code; 1100 current->mm->start_code = start_code; 1101 current->mm->start_data = start_data; 1102 current->mm->end_data = end_data; 1103 current->mm->start_stack = bprm->p; 1104 1105 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1106 /* 1107 * For architectures with ELF randomization, when executing 1108 * a loader directly (i.e. no interpreter listed in ELF 1109 * headers), move the brk area out of the mmap region 1110 * (since it grows up, and may collide early with the stack 1111 * growing down), and into the unused ELF_ET_DYN_BASE region. 1112 */ 1113 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && 1114 loc->elf_ex.e_type == ET_DYN && !interpreter) 1115 current->mm->brk = current->mm->start_brk = 1116 ELF_ET_DYN_BASE; 1117 1118 current->mm->brk = current->mm->start_brk = 1119 arch_randomize_brk(current->mm); 1120 #ifdef compat_brk_randomized 1121 current->brk_randomized = 1; 1122 #endif 1123 } 1124 1125 if (current->personality & MMAP_PAGE_ZERO) { 1126 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1127 and some applications "depend" upon this behavior. 1128 Since we do not have the power to recompile these, we 1129 emulate the SVr4 behavior. Sigh. */ 1130 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, 1131 MAP_FIXED | MAP_PRIVATE, 0); 1132 } 1133 1134 regs = current_pt_regs(); 1135 #ifdef ELF_PLAT_INIT 1136 /* 1137 * The ABI may specify that certain registers be set up in special 1138 * ways (on i386 %edx is the address of a DT_FINI function, for 1139 * example. In addition, it may also specify (eg, PowerPC64 ELF) 1140 * that the e_entry field is the address of the function descriptor 1141 * for the startup routine, rather than the address of the startup 1142 * routine itself. This macro performs whatever initialization to 1143 * the regs structure is required as well as any relocations to the 1144 * function descriptor entries when executing dynamically links apps. 1145 */ 1146 ELF_PLAT_INIT(regs, reloc_func_desc); 1147 #endif 1148 1149 finalize_exec(bprm); 1150 start_thread(regs, elf_entry, bprm->p); 1151 retval = 0; 1152 out: 1153 kfree(loc); 1154 out_ret: 1155 return retval; 1156 1157 /* error cleanup */ 1158 out_free_dentry: 1159 kfree(interp_elf_phdata); 1160 allow_write_access(interpreter); 1161 if (interpreter) 1162 fput(interpreter); 1163 out_free_ph: 1164 kfree(elf_phdata); 1165 goto out; 1166 } 1167 1168 #ifdef CONFIG_USELIB 1169 /* This is really simpleminded and specialized - we are loading an 1170 a.out library that is given an ELF header. */ 1171 static int load_elf_library(struct file *file) 1172 { 1173 struct elf_phdr *elf_phdata; 1174 struct elf_phdr *eppnt; 1175 unsigned long elf_bss, bss, len; 1176 int retval, error, i, j; 1177 struct elfhdr elf_ex; 1178 1179 error = -ENOEXEC; 1180 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0); 1181 if (retval < 0) 1182 goto out; 1183 1184 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 1185 goto out; 1186 1187 /* First of all, some simple consistency checks */ 1188 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || 1189 !elf_check_arch(&elf_ex) || !file->f_op->mmap) 1190 goto out; 1191 if (elf_check_fdpic(&elf_ex)) 1192 goto out; 1193 1194 /* Now read in all of the header information */ 1195 1196 j = sizeof(struct elf_phdr) * elf_ex.e_phnum; 1197 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ 1198 1199 error = -ENOMEM; 1200 elf_phdata = kmalloc(j, GFP_KERNEL); 1201 if (!elf_phdata) 1202 goto out; 1203 1204 eppnt = elf_phdata; 1205 error = -ENOEXEC; 1206 retval = elf_read(file, eppnt, j, elf_ex.e_phoff); 1207 if (retval < 0) 1208 goto out_free_ph; 1209 1210 for (j = 0, i = 0; i<elf_ex.e_phnum; i++) 1211 if ((eppnt + i)->p_type == PT_LOAD) 1212 j++; 1213 if (j != 1) 1214 goto out_free_ph; 1215 1216 while (eppnt->p_type != PT_LOAD) 1217 eppnt++; 1218 1219 /* Now use mmap to map the library into memory. */ 1220 error = vm_mmap(file, 1221 ELF_PAGESTART(eppnt->p_vaddr), 1222 (eppnt->p_filesz + 1223 ELF_PAGEOFFSET(eppnt->p_vaddr)), 1224 PROT_READ | PROT_WRITE | PROT_EXEC, 1225 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE, 1226 (eppnt->p_offset - 1227 ELF_PAGEOFFSET(eppnt->p_vaddr))); 1228 if (error != ELF_PAGESTART(eppnt->p_vaddr)) 1229 goto out_free_ph; 1230 1231 elf_bss = eppnt->p_vaddr + eppnt->p_filesz; 1232 if (padzero(elf_bss)) { 1233 error = -EFAULT; 1234 goto out_free_ph; 1235 } 1236 1237 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); 1238 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); 1239 if (bss > len) { 1240 error = vm_brk(len, bss - len); 1241 if (error) 1242 goto out_free_ph; 1243 } 1244 error = 0; 1245 1246 out_free_ph: 1247 kfree(elf_phdata); 1248 out: 1249 return error; 1250 } 1251 #endif /* #ifdef CONFIG_USELIB */ 1252 1253 #ifdef CONFIG_ELF_CORE 1254 /* 1255 * ELF core dumper 1256 * 1257 * Modelled on fs/exec.c:aout_core_dump() 1258 * Jeremy Fitzhardinge <jeremy@sw.oz.au> 1259 */ 1260 1261 /* 1262 * The purpose of always_dump_vma() is to make sure that special kernel mappings 1263 * that are useful for post-mortem analysis are included in every core dump. 1264 * In that way we ensure that the core dump is fully interpretable later 1265 * without matching up the same kernel and hardware config to see what PC values 1266 * meant. These special mappings include - vDSO, vsyscall, and other 1267 * architecture specific mappings 1268 */ 1269 static bool always_dump_vma(struct vm_area_struct *vma) 1270 { 1271 /* Any vsyscall mappings? */ 1272 if (vma == get_gate_vma(vma->vm_mm)) 1273 return true; 1274 1275 /* 1276 * Assume that all vmas with a .name op should always be dumped. 1277 * If this changes, a new vm_ops field can easily be added. 1278 */ 1279 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1280 return true; 1281 1282 /* 1283 * arch_vma_name() returns non-NULL for special architecture mappings, 1284 * such as vDSO sections. 1285 */ 1286 if (arch_vma_name(vma)) 1287 return true; 1288 1289 return false; 1290 } 1291 1292 /* 1293 * Decide what to dump of a segment, part, all or none. 1294 */ 1295 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1296 unsigned long mm_flags) 1297 { 1298 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1299 1300 /* always dump the vdso and vsyscall sections */ 1301 if (always_dump_vma(vma)) 1302 goto whole; 1303 1304 if (vma->vm_flags & VM_DONTDUMP) 1305 return 0; 1306 1307 /* support for DAX */ 1308 if (vma_is_dax(vma)) { 1309 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1310 goto whole; 1311 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1312 goto whole; 1313 return 0; 1314 } 1315 1316 /* Hugetlb memory check */ 1317 if (vma->vm_flags & VM_HUGETLB) { 1318 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1319 goto whole; 1320 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1321 goto whole; 1322 return 0; 1323 } 1324 1325 /* Do not dump I/O mapped devices or special mappings */ 1326 if (vma->vm_flags & VM_IO) 1327 return 0; 1328 1329 /* By default, dump shared memory if mapped from an anonymous file. */ 1330 if (vma->vm_flags & VM_SHARED) { 1331 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1332 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1333 goto whole; 1334 return 0; 1335 } 1336 1337 /* Dump segments that have been written to. */ 1338 if (vma->anon_vma && FILTER(ANON_PRIVATE)) 1339 goto whole; 1340 if (vma->vm_file == NULL) 1341 return 0; 1342 1343 if (FILTER(MAPPED_PRIVATE)) 1344 goto whole; 1345 1346 /* 1347 * If this looks like the beginning of a DSO or executable mapping, 1348 * check for an ELF header. If we find one, dump the first page to 1349 * aid in determining what was mapped here. 1350 */ 1351 if (FILTER(ELF_HEADERS) && 1352 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1353 u32 __user *header = (u32 __user *) vma->vm_start; 1354 u32 word; 1355 mm_segment_t fs = get_fs(); 1356 /* 1357 * Doing it this way gets the constant folded by GCC. 1358 */ 1359 union { 1360 u32 cmp; 1361 char elfmag[SELFMAG]; 1362 } magic; 1363 BUILD_BUG_ON(SELFMAG != sizeof word); 1364 magic.elfmag[EI_MAG0] = ELFMAG0; 1365 magic.elfmag[EI_MAG1] = ELFMAG1; 1366 magic.elfmag[EI_MAG2] = ELFMAG2; 1367 magic.elfmag[EI_MAG3] = ELFMAG3; 1368 /* 1369 * Switch to the user "segment" for get_user(), 1370 * then put back what elf_core_dump() had in place. 1371 */ 1372 set_fs(USER_DS); 1373 if (unlikely(get_user(word, header))) 1374 word = 0; 1375 set_fs(fs); 1376 if (word == magic.cmp) 1377 return PAGE_SIZE; 1378 } 1379 1380 #undef FILTER 1381 1382 return 0; 1383 1384 whole: 1385 return vma->vm_end - vma->vm_start; 1386 } 1387 1388 /* An ELF note in memory */ 1389 struct memelfnote 1390 { 1391 const char *name; 1392 int type; 1393 unsigned int datasz; 1394 void *data; 1395 }; 1396 1397 static int notesize(struct memelfnote *en) 1398 { 1399 int sz; 1400 1401 sz = sizeof(struct elf_note); 1402 sz += roundup(strlen(en->name) + 1, 4); 1403 sz += roundup(en->datasz, 4); 1404 1405 return sz; 1406 } 1407 1408 static int writenote(struct memelfnote *men, struct coredump_params *cprm) 1409 { 1410 struct elf_note en; 1411 en.n_namesz = strlen(men->name) + 1; 1412 en.n_descsz = men->datasz; 1413 en.n_type = men->type; 1414 1415 return dump_emit(cprm, &en, sizeof(en)) && 1416 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && 1417 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); 1418 } 1419 1420 static void fill_elf_header(struct elfhdr *elf, int segs, 1421 u16 machine, u32 flags) 1422 { 1423 memset(elf, 0, sizeof(*elf)); 1424 1425 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1426 elf->e_ident[EI_CLASS] = ELF_CLASS; 1427 elf->e_ident[EI_DATA] = ELF_DATA; 1428 elf->e_ident[EI_VERSION] = EV_CURRENT; 1429 elf->e_ident[EI_OSABI] = ELF_OSABI; 1430 1431 elf->e_type = ET_CORE; 1432 elf->e_machine = machine; 1433 elf->e_version = EV_CURRENT; 1434 elf->e_phoff = sizeof(struct elfhdr); 1435 elf->e_flags = flags; 1436 elf->e_ehsize = sizeof(struct elfhdr); 1437 elf->e_phentsize = sizeof(struct elf_phdr); 1438 elf->e_phnum = segs; 1439 } 1440 1441 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) 1442 { 1443 phdr->p_type = PT_NOTE; 1444 phdr->p_offset = offset; 1445 phdr->p_vaddr = 0; 1446 phdr->p_paddr = 0; 1447 phdr->p_filesz = sz; 1448 phdr->p_memsz = 0; 1449 phdr->p_flags = 0; 1450 phdr->p_align = 0; 1451 } 1452 1453 static void fill_note(struct memelfnote *note, const char *name, int type, 1454 unsigned int sz, void *data) 1455 { 1456 note->name = name; 1457 note->type = type; 1458 note->datasz = sz; 1459 note->data = data; 1460 } 1461 1462 /* 1463 * fill up all the fields in prstatus from the given task struct, except 1464 * registers which need to be filled up separately. 1465 */ 1466 static void fill_prstatus(struct elf_prstatus *prstatus, 1467 struct task_struct *p, long signr) 1468 { 1469 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1470 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1471 prstatus->pr_sighold = p->blocked.sig[0]; 1472 rcu_read_lock(); 1473 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1474 rcu_read_unlock(); 1475 prstatus->pr_pid = task_pid_vnr(p); 1476 prstatus->pr_pgrp = task_pgrp_vnr(p); 1477 prstatus->pr_sid = task_session_vnr(p); 1478 if (thread_group_leader(p)) { 1479 struct task_cputime cputime; 1480 1481 /* 1482 * This is the record for the group leader. It shows the 1483 * group-wide total, not its individual thread total. 1484 */ 1485 thread_group_cputime(p, &cputime); 1486 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime); 1487 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime); 1488 } else { 1489 u64 utime, stime; 1490 1491 task_cputime(p, &utime, &stime); 1492 prstatus->pr_utime = ns_to_kernel_old_timeval(utime); 1493 prstatus->pr_stime = ns_to_kernel_old_timeval(stime); 1494 } 1495 1496 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime); 1497 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime); 1498 } 1499 1500 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, 1501 struct mm_struct *mm) 1502 { 1503 const struct cred *cred; 1504 unsigned int i, len; 1505 1506 /* first copy the parameters from user space */ 1507 memset(psinfo, 0, sizeof(struct elf_prpsinfo)); 1508 1509 len = mm->arg_end - mm->arg_start; 1510 if (len >= ELF_PRARGSZ) 1511 len = ELF_PRARGSZ-1; 1512 if (copy_from_user(&psinfo->pr_psargs, 1513 (const char __user *)mm->arg_start, len)) 1514 return -EFAULT; 1515 for(i = 0; i < len; i++) 1516 if (psinfo->pr_psargs[i] == 0) 1517 psinfo->pr_psargs[i] = ' '; 1518 psinfo->pr_psargs[len] = 0; 1519 1520 rcu_read_lock(); 1521 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1522 rcu_read_unlock(); 1523 psinfo->pr_pid = task_pid_vnr(p); 1524 psinfo->pr_pgrp = task_pgrp_vnr(p); 1525 psinfo->pr_sid = task_session_vnr(p); 1526 1527 i = p->state ? ffz(~p->state) + 1 : 0; 1528 psinfo->pr_state = i; 1529 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; 1530 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; 1531 psinfo->pr_nice = task_nice(p); 1532 psinfo->pr_flag = p->flags; 1533 rcu_read_lock(); 1534 cred = __task_cred(p); 1535 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); 1536 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); 1537 rcu_read_unlock(); 1538 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1539 1540 return 0; 1541 } 1542 1543 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) 1544 { 1545 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv; 1546 int i = 0; 1547 do 1548 i += 2; 1549 while (auxv[i - 2] != AT_NULL); 1550 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); 1551 } 1552 1553 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, 1554 const kernel_siginfo_t *siginfo) 1555 { 1556 mm_segment_t old_fs = get_fs(); 1557 set_fs(KERNEL_DS); 1558 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo); 1559 set_fs(old_fs); 1560 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); 1561 } 1562 1563 #define MAX_FILE_NOTE_SIZE (4*1024*1024) 1564 /* 1565 * Format of NT_FILE note: 1566 * 1567 * long count -- how many files are mapped 1568 * long page_size -- units for file_ofs 1569 * array of [COUNT] elements of 1570 * long start 1571 * long end 1572 * long file_ofs 1573 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... 1574 */ 1575 static int fill_files_note(struct memelfnote *note) 1576 { 1577 struct vm_area_struct *vma; 1578 unsigned count, size, names_ofs, remaining, n; 1579 user_long_t *data; 1580 user_long_t *start_end_ofs; 1581 char *name_base, *name_curpos; 1582 1583 /* *Estimated* file count and total data size needed */ 1584 count = current->mm->map_count; 1585 if (count > UINT_MAX / 64) 1586 return -EINVAL; 1587 size = count * 64; 1588 1589 names_ofs = (2 + 3 * count) * sizeof(data[0]); 1590 alloc: 1591 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ 1592 return -EINVAL; 1593 size = round_up(size, PAGE_SIZE); 1594 data = kvmalloc(size, GFP_KERNEL); 1595 if (ZERO_OR_NULL_PTR(data)) 1596 return -ENOMEM; 1597 1598 start_end_ofs = data + 2; 1599 name_base = name_curpos = ((char *)data) + names_ofs; 1600 remaining = size - names_ofs; 1601 count = 0; 1602 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { 1603 struct file *file; 1604 const char *filename; 1605 1606 file = vma->vm_file; 1607 if (!file) 1608 continue; 1609 filename = file_path(file, name_curpos, remaining); 1610 if (IS_ERR(filename)) { 1611 if (PTR_ERR(filename) == -ENAMETOOLONG) { 1612 kvfree(data); 1613 size = size * 5 / 4; 1614 goto alloc; 1615 } 1616 continue; 1617 } 1618 1619 /* file_path() fills at the end, move name down */ 1620 /* n = strlen(filename) + 1: */ 1621 n = (name_curpos + remaining) - filename; 1622 remaining = filename - name_curpos; 1623 memmove(name_curpos, filename, n); 1624 name_curpos += n; 1625 1626 *start_end_ofs++ = vma->vm_start; 1627 *start_end_ofs++ = vma->vm_end; 1628 *start_end_ofs++ = vma->vm_pgoff; 1629 count++; 1630 } 1631 1632 /* Now we know exact count of files, can store it */ 1633 data[0] = count; 1634 data[1] = PAGE_SIZE; 1635 /* 1636 * Count usually is less than current->mm->map_count, 1637 * we need to move filenames down. 1638 */ 1639 n = current->mm->map_count - count; 1640 if (n != 0) { 1641 unsigned shift_bytes = n * 3 * sizeof(data[0]); 1642 memmove(name_base - shift_bytes, name_base, 1643 name_curpos - name_base); 1644 name_curpos -= shift_bytes; 1645 } 1646 1647 size = name_curpos - (char *)data; 1648 fill_note(note, "CORE", NT_FILE, size, data); 1649 return 0; 1650 } 1651 1652 #ifdef CORE_DUMP_USE_REGSET 1653 #include <linux/regset.h> 1654 1655 struct elf_thread_core_info { 1656 struct elf_thread_core_info *next; 1657 struct task_struct *task; 1658 struct elf_prstatus prstatus; 1659 struct memelfnote notes[0]; 1660 }; 1661 1662 struct elf_note_info { 1663 struct elf_thread_core_info *thread; 1664 struct memelfnote psinfo; 1665 struct memelfnote signote; 1666 struct memelfnote auxv; 1667 struct memelfnote files; 1668 user_siginfo_t csigdata; 1669 size_t size; 1670 int thread_notes; 1671 }; 1672 1673 /* 1674 * When a regset has a writeback hook, we call it on each thread before 1675 * dumping user memory. On register window machines, this makes sure the 1676 * user memory backing the register data is up to date before we read it. 1677 */ 1678 static void do_thread_regset_writeback(struct task_struct *task, 1679 const struct user_regset *regset) 1680 { 1681 if (regset->writeback) 1682 regset->writeback(task, regset, 1); 1683 } 1684 1685 #ifndef PRSTATUS_SIZE 1686 #define PRSTATUS_SIZE(S, R) sizeof(S) 1687 #endif 1688 1689 #ifndef SET_PR_FPVALID 1690 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V)) 1691 #endif 1692 1693 static int fill_thread_core_info(struct elf_thread_core_info *t, 1694 const struct user_regset_view *view, 1695 long signr, size_t *total) 1696 { 1697 unsigned int i; 1698 unsigned int regset0_size = regset_size(t->task, &view->regsets[0]); 1699 1700 /* 1701 * NT_PRSTATUS is the one special case, because the regset data 1702 * goes into the pr_reg field inside the note contents, rather 1703 * than being the whole note contents. We fill the reset in here. 1704 * We assume that regset 0 is NT_PRSTATUS. 1705 */ 1706 fill_prstatus(&t->prstatus, t->task, signr); 1707 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size, 1708 &t->prstatus.pr_reg, NULL); 1709 1710 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, 1711 PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus); 1712 *total += notesize(&t->notes[0]); 1713 1714 do_thread_regset_writeback(t->task, &view->regsets[0]); 1715 1716 /* 1717 * Each other regset might generate a note too. For each regset 1718 * that has no core_note_type or is inactive, we leave t->notes[i] 1719 * all zero and we'll know to skip writing it later. 1720 */ 1721 for (i = 1; i < view->n; ++i) { 1722 const struct user_regset *regset = &view->regsets[i]; 1723 do_thread_regset_writeback(t->task, regset); 1724 if (regset->core_note_type && regset->get && 1725 (!regset->active || regset->active(t->task, regset) > 0)) { 1726 int ret; 1727 size_t size = regset_size(t->task, regset); 1728 void *data = kmalloc(size, GFP_KERNEL); 1729 if (unlikely(!data)) 1730 return 0; 1731 ret = regset->get(t->task, regset, 1732 0, size, data, NULL); 1733 if (unlikely(ret)) 1734 kfree(data); 1735 else { 1736 if (regset->core_note_type != NT_PRFPREG) 1737 fill_note(&t->notes[i], "LINUX", 1738 regset->core_note_type, 1739 size, data); 1740 else { 1741 SET_PR_FPVALID(&t->prstatus, 1742 1, regset0_size); 1743 fill_note(&t->notes[i], "CORE", 1744 NT_PRFPREG, size, data); 1745 } 1746 *total += notesize(&t->notes[i]); 1747 } 1748 } 1749 } 1750 1751 return 1; 1752 } 1753 1754 static int fill_note_info(struct elfhdr *elf, int phdrs, 1755 struct elf_note_info *info, 1756 const kernel_siginfo_t *siginfo, struct pt_regs *regs) 1757 { 1758 struct task_struct *dump_task = current; 1759 const struct user_regset_view *view = task_user_regset_view(dump_task); 1760 struct elf_thread_core_info *t; 1761 struct elf_prpsinfo *psinfo; 1762 struct core_thread *ct; 1763 unsigned int i; 1764 1765 info->size = 0; 1766 info->thread = NULL; 1767 1768 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); 1769 if (psinfo == NULL) { 1770 info->psinfo.data = NULL; /* So we don't free this wrongly */ 1771 return 0; 1772 } 1773 1774 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); 1775 1776 /* 1777 * Figure out how many notes we're going to need for each thread. 1778 */ 1779 info->thread_notes = 0; 1780 for (i = 0; i < view->n; ++i) 1781 if (view->regsets[i].core_note_type != 0) 1782 ++info->thread_notes; 1783 1784 /* 1785 * Sanity check. We rely on regset 0 being in NT_PRSTATUS, 1786 * since it is our one special case. 1787 */ 1788 if (unlikely(info->thread_notes == 0) || 1789 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) { 1790 WARN_ON(1); 1791 return 0; 1792 } 1793 1794 /* 1795 * Initialize the ELF file header. 1796 */ 1797 fill_elf_header(elf, phdrs, 1798 view->e_machine, view->e_flags); 1799 1800 /* 1801 * Allocate a structure for each thread. 1802 */ 1803 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { 1804 t = kzalloc(offsetof(struct elf_thread_core_info, 1805 notes[info->thread_notes]), 1806 GFP_KERNEL); 1807 if (unlikely(!t)) 1808 return 0; 1809 1810 t->task = ct->task; 1811 if (ct->task == dump_task || !info->thread) { 1812 t->next = info->thread; 1813 info->thread = t; 1814 } else { 1815 /* 1816 * Make sure to keep the original task at 1817 * the head of the list. 1818 */ 1819 t->next = info->thread->next; 1820 info->thread->next = t; 1821 } 1822 } 1823 1824 /* 1825 * Now fill in each thread's information. 1826 */ 1827 for (t = info->thread; t != NULL; t = t->next) 1828 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) 1829 return 0; 1830 1831 /* 1832 * Fill in the two process-wide notes. 1833 */ 1834 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); 1835 info->size += notesize(&info->psinfo); 1836 1837 fill_siginfo_note(&info->signote, &info->csigdata, siginfo); 1838 info->size += notesize(&info->signote); 1839 1840 fill_auxv_note(&info->auxv, current->mm); 1841 info->size += notesize(&info->auxv); 1842 1843 if (fill_files_note(&info->files) == 0) 1844 info->size += notesize(&info->files); 1845 1846 return 1; 1847 } 1848 1849 static size_t get_note_info_size(struct elf_note_info *info) 1850 { 1851 return info->size; 1852 } 1853 1854 /* 1855 * Write all the notes for each thread. When writing the first thread, the 1856 * process-wide notes are interleaved after the first thread-specific note. 1857 */ 1858 static int write_note_info(struct elf_note_info *info, 1859 struct coredump_params *cprm) 1860 { 1861 bool first = true; 1862 struct elf_thread_core_info *t = info->thread; 1863 1864 do { 1865 int i; 1866 1867 if (!writenote(&t->notes[0], cprm)) 1868 return 0; 1869 1870 if (first && !writenote(&info->psinfo, cprm)) 1871 return 0; 1872 if (first && !writenote(&info->signote, cprm)) 1873 return 0; 1874 if (first && !writenote(&info->auxv, cprm)) 1875 return 0; 1876 if (first && info->files.data && 1877 !writenote(&info->files, cprm)) 1878 return 0; 1879 1880 for (i = 1; i < info->thread_notes; ++i) 1881 if (t->notes[i].data && 1882 !writenote(&t->notes[i], cprm)) 1883 return 0; 1884 1885 first = false; 1886 t = t->next; 1887 } while (t); 1888 1889 return 1; 1890 } 1891 1892 static void free_note_info(struct elf_note_info *info) 1893 { 1894 struct elf_thread_core_info *threads = info->thread; 1895 while (threads) { 1896 unsigned int i; 1897 struct elf_thread_core_info *t = threads; 1898 threads = t->next; 1899 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); 1900 for (i = 1; i < info->thread_notes; ++i) 1901 kfree(t->notes[i].data); 1902 kfree(t); 1903 } 1904 kfree(info->psinfo.data); 1905 kvfree(info->files.data); 1906 } 1907 1908 #else 1909 1910 /* Here is the structure in which status of each thread is captured. */ 1911 struct elf_thread_status 1912 { 1913 struct list_head list; 1914 struct elf_prstatus prstatus; /* NT_PRSTATUS */ 1915 elf_fpregset_t fpu; /* NT_PRFPREG */ 1916 struct task_struct *thread; 1917 #ifdef ELF_CORE_COPY_XFPREGS 1918 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ 1919 #endif 1920 struct memelfnote notes[3]; 1921 int num_notes; 1922 }; 1923 1924 /* 1925 * In order to add the specific thread information for the elf file format, 1926 * we need to keep a linked list of every threads pr_status and then create 1927 * a single section for them in the final core file. 1928 */ 1929 static int elf_dump_thread_status(long signr, struct elf_thread_status *t) 1930 { 1931 int sz = 0; 1932 struct task_struct *p = t->thread; 1933 t->num_notes = 0; 1934 1935 fill_prstatus(&t->prstatus, p, signr); 1936 elf_core_copy_task_regs(p, &t->prstatus.pr_reg); 1937 1938 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), 1939 &(t->prstatus)); 1940 t->num_notes++; 1941 sz += notesize(&t->notes[0]); 1942 1943 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, 1944 &t->fpu))) { 1945 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), 1946 &(t->fpu)); 1947 t->num_notes++; 1948 sz += notesize(&t->notes[1]); 1949 } 1950 1951 #ifdef ELF_CORE_COPY_XFPREGS 1952 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { 1953 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE, 1954 sizeof(t->xfpu), &t->xfpu); 1955 t->num_notes++; 1956 sz += notesize(&t->notes[2]); 1957 } 1958 #endif 1959 return sz; 1960 } 1961 1962 struct elf_note_info { 1963 struct memelfnote *notes; 1964 struct memelfnote *notes_files; 1965 struct elf_prstatus *prstatus; /* NT_PRSTATUS */ 1966 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ 1967 struct list_head thread_list; 1968 elf_fpregset_t *fpu; 1969 #ifdef ELF_CORE_COPY_XFPREGS 1970 elf_fpxregset_t *xfpu; 1971 #endif 1972 user_siginfo_t csigdata; 1973 int thread_status_size; 1974 int numnote; 1975 }; 1976 1977 static int elf_note_info_init(struct elf_note_info *info) 1978 { 1979 memset(info, 0, sizeof(*info)); 1980 INIT_LIST_HEAD(&info->thread_list); 1981 1982 /* Allocate space for ELF notes */ 1983 info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL); 1984 if (!info->notes) 1985 return 0; 1986 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1987 if (!info->psinfo) 1988 return 0; 1989 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1990 if (!info->prstatus) 1991 return 0; 1992 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1993 if (!info->fpu) 1994 return 0; 1995 #ifdef ELF_CORE_COPY_XFPREGS 1996 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1997 if (!info->xfpu) 1998 return 0; 1999 #endif 2000 return 1; 2001 } 2002 2003 static int fill_note_info(struct elfhdr *elf, int phdrs, 2004 struct elf_note_info *info, 2005 const kernel_siginfo_t *siginfo, struct pt_regs *regs) 2006 { 2007 struct core_thread *ct; 2008 struct elf_thread_status *ets; 2009 2010 if (!elf_note_info_init(info)) 2011 return 0; 2012 2013 for (ct = current->mm->core_state->dumper.next; 2014 ct; ct = ct->next) { 2015 ets = kzalloc(sizeof(*ets), GFP_KERNEL); 2016 if (!ets) 2017 return 0; 2018 2019 ets->thread = ct->task; 2020 list_add(&ets->list, &info->thread_list); 2021 } 2022 2023 list_for_each_entry(ets, &info->thread_list, list) { 2024 int sz; 2025 2026 sz = elf_dump_thread_status(siginfo->si_signo, ets); 2027 info->thread_status_size += sz; 2028 } 2029 /* now collect the dump for the current */ 2030 memset(info->prstatus, 0, sizeof(*info->prstatus)); 2031 fill_prstatus(info->prstatus, current, siginfo->si_signo); 2032 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 2033 2034 /* Set up header */ 2035 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); 2036 2037 /* 2038 * Set up the notes in similar form to SVR4 core dumps made 2039 * with info from their /proc. 2040 */ 2041 2042 fill_note(info->notes + 0, "CORE", NT_PRSTATUS, 2043 sizeof(*info->prstatus), info->prstatus); 2044 fill_psinfo(info->psinfo, current->group_leader, current->mm); 2045 fill_note(info->notes + 1, "CORE", NT_PRPSINFO, 2046 sizeof(*info->psinfo), info->psinfo); 2047 2048 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); 2049 fill_auxv_note(info->notes + 3, current->mm); 2050 info->numnote = 4; 2051 2052 if (fill_files_note(info->notes + info->numnote) == 0) { 2053 info->notes_files = info->notes + info->numnote; 2054 info->numnote++; 2055 } 2056 2057 /* Try to dump the FPU. */ 2058 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, 2059 info->fpu); 2060 if (info->prstatus->pr_fpvalid) 2061 fill_note(info->notes + info->numnote++, 2062 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); 2063 #ifdef ELF_CORE_COPY_XFPREGS 2064 if (elf_core_copy_task_xfpregs(current, info->xfpu)) 2065 fill_note(info->notes + info->numnote++, 2066 "LINUX", ELF_CORE_XFPREG_TYPE, 2067 sizeof(*info->xfpu), info->xfpu); 2068 #endif 2069 2070 return 1; 2071 } 2072 2073 static size_t get_note_info_size(struct elf_note_info *info) 2074 { 2075 int sz = 0; 2076 int i; 2077 2078 for (i = 0; i < info->numnote; i++) 2079 sz += notesize(info->notes + i); 2080 2081 sz += info->thread_status_size; 2082 2083 return sz; 2084 } 2085 2086 static int write_note_info(struct elf_note_info *info, 2087 struct coredump_params *cprm) 2088 { 2089 struct elf_thread_status *ets; 2090 int i; 2091 2092 for (i = 0; i < info->numnote; i++) 2093 if (!writenote(info->notes + i, cprm)) 2094 return 0; 2095 2096 /* write out the thread status notes section */ 2097 list_for_each_entry(ets, &info->thread_list, list) { 2098 for (i = 0; i < ets->num_notes; i++) 2099 if (!writenote(&ets->notes[i], cprm)) 2100 return 0; 2101 } 2102 2103 return 1; 2104 } 2105 2106 static void free_note_info(struct elf_note_info *info) 2107 { 2108 while (!list_empty(&info->thread_list)) { 2109 struct list_head *tmp = info->thread_list.next; 2110 list_del(tmp); 2111 kfree(list_entry(tmp, struct elf_thread_status, list)); 2112 } 2113 2114 /* Free data possibly allocated by fill_files_note(): */ 2115 if (info->notes_files) 2116 kvfree(info->notes_files->data); 2117 2118 kfree(info->prstatus); 2119 kfree(info->psinfo); 2120 kfree(info->notes); 2121 kfree(info->fpu); 2122 #ifdef ELF_CORE_COPY_XFPREGS 2123 kfree(info->xfpu); 2124 #endif 2125 } 2126 2127 #endif 2128 2129 static struct vm_area_struct *first_vma(struct task_struct *tsk, 2130 struct vm_area_struct *gate_vma) 2131 { 2132 struct vm_area_struct *ret = tsk->mm->mmap; 2133 2134 if (ret) 2135 return ret; 2136 return gate_vma; 2137 } 2138 /* 2139 * Helper function for iterating across a vma list. It ensures that the caller 2140 * will visit `gate_vma' prior to terminating the search. 2141 */ 2142 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, 2143 struct vm_area_struct *gate_vma) 2144 { 2145 struct vm_area_struct *ret; 2146 2147 ret = this_vma->vm_next; 2148 if (ret) 2149 return ret; 2150 if (this_vma == gate_vma) 2151 return NULL; 2152 return gate_vma; 2153 } 2154 2155 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, 2156 elf_addr_t e_shoff, int segs) 2157 { 2158 elf->e_shoff = e_shoff; 2159 elf->e_shentsize = sizeof(*shdr4extnum); 2160 elf->e_shnum = 1; 2161 elf->e_shstrndx = SHN_UNDEF; 2162 2163 memset(shdr4extnum, 0, sizeof(*shdr4extnum)); 2164 2165 shdr4extnum->sh_type = SHT_NULL; 2166 shdr4extnum->sh_size = elf->e_shnum; 2167 shdr4extnum->sh_link = elf->e_shstrndx; 2168 shdr4extnum->sh_info = segs; 2169 } 2170 2171 /* 2172 * Actual dumper 2173 * 2174 * This is a two-pass process; first we find the offsets of the bits, 2175 * and then they are actually written out. If we run out of core limit 2176 * we just truncate. 2177 */ 2178 static int elf_core_dump(struct coredump_params *cprm) 2179 { 2180 int has_dumped = 0; 2181 mm_segment_t fs; 2182 int segs, i; 2183 size_t vma_data_size = 0; 2184 struct vm_area_struct *vma, *gate_vma; 2185 struct elfhdr *elf = NULL; 2186 loff_t offset = 0, dataoff; 2187 struct elf_note_info info = { }; 2188 struct elf_phdr *phdr4note = NULL; 2189 struct elf_shdr *shdr4extnum = NULL; 2190 Elf_Half e_phnum; 2191 elf_addr_t e_shoff; 2192 elf_addr_t *vma_filesz = NULL; 2193 2194 /* 2195 * We no longer stop all VM operations. 2196 * 2197 * This is because those proceses that could possibly change map_count 2198 * or the mmap / vma pages are now blocked in do_exit on current 2199 * finishing this core dump. 2200 * 2201 * Only ptrace can touch these memory addresses, but it doesn't change 2202 * the map_count or the pages allocated. So no possibility of crashing 2203 * exists while dumping the mm->vm_next areas to the core file. 2204 */ 2205 2206 /* alloc memory for large data structures: too large to be on stack */ 2207 elf = kmalloc(sizeof(*elf), GFP_KERNEL); 2208 if (!elf) 2209 goto out; 2210 /* 2211 * The number of segs are recored into ELF header as 16bit value. 2212 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. 2213 */ 2214 segs = current->mm->map_count; 2215 segs += elf_core_extra_phdrs(); 2216 2217 gate_vma = get_gate_vma(current->mm); 2218 if (gate_vma != NULL) 2219 segs++; 2220 2221 /* for notes section */ 2222 segs++; 2223 2224 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid 2225 * this, kernel supports extended numbering. Have a look at 2226 * include/linux/elf.h for further information. */ 2227 e_phnum = segs > PN_XNUM ? PN_XNUM : segs; 2228 2229 /* 2230 * Collect all the non-memory information about the process for the 2231 * notes. This also sets up the file header. 2232 */ 2233 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs)) 2234 goto cleanup; 2235 2236 has_dumped = 1; 2237 2238 fs = get_fs(); 2239 set_fs(KERNEL_DS); 2240 2241 offset += sizeof(*elf); /* Elf header */ 2242 offset += segs * sizeof(struct elf_phdr); /* Program headers */ 2243 2244 /* Write notes phdr entry */ 2245 { 2246 size_t sz = get_note_info_size(&info); 2247 2248 sz += elf_coredump_extra_notes_size(); 2249 2250 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); 2251 if (!phdr4note) 2252 goto end_coredump; 2253 2254 fill_elf_note_phdr(phdr4note, sz, offset); 2255 offset += sz; 2256 } 2257 2258 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); 2259 2260 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) 2261 goto end_coredump; 2262 vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)), 2263 GFP_KERNEL); 2264 if (ZERO_OR_NULL_PTR(vma_filesz)) 2265 goto end_coredump; 2266 2267 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2268 vma = next_vma(vma, gate_vma)) { 2269 unsigned long dump_size; 2270 2271 dump_size = vma_dump_size(vma, cprm->mm_flags); 2272 vma_filesz[i++] = dump_size; 2273 vma_data_size += dump_size; 2274 } 2275 2276 offset += vma_data_size; 2277 offset += elf_core_extra_data_size(); 2278 e_shoff = offset; 2279 2280 if (e_phnum == PN_XNUM) { 2281 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); 2282 if (!shdr4extnum) 2283 goto end_coredump; 2284 fill_extnum_info(elf, shdr4extnum, e_shoff, segs); 2285 } 2286 2287 offset = dataoff; 2288 2289 if (!dump_emit(cprm, elf, sizeof(*elf))) 2290 goto end_coredump; 2291 2292 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) 2293 goto end_coredump; 2294 2295 /* Write program headers for segments dump */ 2296 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2297 vma = next_vma(vma, gate_vma)) { 2298 struct elf_phdr phdr; 2299 2300 phdr.p_type = PT_LOAD; 2301 phdr.p_offset = offset; 2302 phdr.p_vaddr = vma->vm_start; 2303 phdr.p_paddr = 0; 2304 phdr.p_filesz = vma_filesz[i++]; 2305 phdr.p_memsz = vma->vm_end - vma->vm_start; 2306 offset += phdr.p_filesz; 2307 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; 2308 if (vma->vm_flags & VM_WRITE) 2309 phdr.p_flags |= PF_W; 2310 if (vma->vm_flags & VM_EXEC) 2311 phdr.p_flags |= PF_X; 2312 phdr.p_align = ELF_EXEC_PAGESIZE; 2313 2314 if (!dump_emit(cprm, &phdr, sizeof(phdr))) 2315 goto end_coredump; 2316 } 2317 2318 if (!elf_core_write_extra_phdrs(cprm, offset)) 2319 goto end_coredump; 2320 2321 /* write out the notes section */ 2322 if (!write_note_info(&info, cprm)) 2323 goto end_coredump; 2324 2325 if (elf_coredump_extra_notes_write(cprm)) 2326 goto end_coredump; 2327 2328 /* Align to page */ 2329 if (!dump_skip(cprm, dataoff - cprm->pos)) 2330 goto end_coredump; 2331 2332 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2333 vma = next_vma(vma, gate_vma)) { 2334 unsigned long addr; 2335 unsigned long end; 2336 2337 end = vma->vm_start + vma_filesz[i++]; 2338 2339 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 2340 struct page *page; 2341 int stop; 2342 2343 page = get_dump_page(addr); 2344 if (page) { 2345 void *kaddr = kmap(page); 2346 stop = !dump_emit(cprm, kaddr, PAGE_SIZE); 2347 kunmap(page); 2348 put_page(page); 2349 } else 2350 stop = !dump_skip(cprm, PAGE_SIZE); 2351 if (stop) 2352 goto end_coredump; 2353 } 2354 } 2355 dump_truncate(cprm); 2356 2357 if (!elf_core_write_extra_data(cprm)) 2358 goto end_coredump; 2359 2360 if (e_phnum == PN_XNUM) { 2361 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) 2362 goto end_coredump; 2363 } 2364 2365 end_coredump: 2366 set_fs(fs); 2367 2368 cleanup: 2369 free_note_info(&info); 2370 kfree(shdr4extnum); 2371 kvfree(vma_filesz); 2372 kfree(phdr4note); 2373 kfree(elf); 2374 out: 2375 return has_dumped; 2376 } 2377 2378 #endif /* CONFIG_ELF_CORE */ 2379 2380 static int __init init_elf_binfmt(void) 2381 { 2382 register_binfmt(&elf_format); 2383 return 0; 2384 } 2385 2386 static void __exit exit_elf_binfmt(void) 2387 { 2388 /* Remove the COFF and ELF loaders. */ 2389 unregister_binfmt(&elf_format); 2390 } 2391 2392 core_initcall(init_elf_binfmt); 2393 module_exit(exit_elf_binfmt); 2394 MODULE_LICENSE("GPL"); 2395