1 /* This is the Linux kernel elf-loading code, ported into user space */ 2 #include "qemu/osdep.h" 3 #include <sys/param.h> 4 5 #include <sys/prctl.h> 6 #include <sys/resource.h> 7 #include <sys/shm.h> 8 9 #include "qemu.h" 10 #include "user/tswap-target.h" 11 #include "user/page-protection.h" 12 #include "exec/page-protection.h" 13 #include "exec/mmap-lock.h" 14 #include "exec/translation-block.h" 15 #include "exec/tswap.h" 16 #include "user/guest-base.h" 17 #include "user-internals.h" 18 #include "signal-common.h" 19 #include "loader.h" 20 #include "user-mmap.h" 21 #include "disas/disas.h" 22 #include "qemu/bitops.h" 23 #include "qemu/path.h" 24 #include "qemu/queue.h" 25 #include "qemu/guest-random.h" 26 #include "qemu/units.h" 27 #include "qemu/selfmap.h" 28 #include "qemu/lockable.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "target_elf.h" 32 #include "target_signal.h" 33 #include "tcg/debuginfo.h" 34 35 #ifdef TARGET_ARM 36 #include "target/arm/cpu-features.h" 37 #endif 38 39 #ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE 40 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0 41 #endif 42 43 typedef struct { 44 const uint8_t *image; 45 const uint32_t *relocs; 46 unsigned image_size; 47 unsigned reloc_count; 48 unsigned sigreturn_ofs; 49 unsigned rt_sigreturn_ofs; 50 } VdsoImageInfo; 51 52 #define ELF_OSABI ELFOSABI_SYSV 53 54 /* from personality.h */ 55 56 /* 57 * Flags for bug emulation. 58 * 59 * These occupy the top three bytes. 60 */ 61 enum { 62 ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization of VA space */ 63 FDPIC_FUNCPTRS = 0x0080000, /* userspace function ptrs point to 64 descriptors (signal handling) */ 65 MMAP_PAGE_ZERO = 0x0100000, 66 ADDR_COMPAT_LAYOUT = 0x0200000, 67 READ_IMPLIES_EXEC = 0x0400000, 68 ADDR_LIMIT_32BIT = 0x0800000, 69 SHORT_INODE = 0x1000000, 70 WHOLE_SECONDS = 0x2000000, 71 STICKY_TIMEOUTS = 0x4000000, 72 ADDR_LIMIT_3GB = 0x8000000, 73 }; 74 75 /* 76 * Personality types. 77 * 78 * These go in the low byte. Avoid using the top bit, it will 79 * conflict with error returns. 80 */ 81 enum { 82 PER_LINUX = 0x0000, 83 PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT, 84 PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS, 85 PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, 86 PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE, 87 PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE, 88 PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS, 89 PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE, 90 PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS, 91 PER_BSD = 0x0006, 92 PER_SUNOS = 0x0006 | STICKY_TIMEOUTS, 93 PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE, 94 PER_LINUX32 = 0x0008, 95 PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB, 96 PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */ 97 PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */ 98 PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */ 99 PER_RISCOS = 0x000c, 100 PER_SOLARIS = 0x000d | STICKY_TIMEOUTS, 101 PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, 102 PER_OSF4 = 0x000f, /* OSF/1 v4 */ 103 PER_HPUX = 0x0010, 104 PER_MASK = 0x00ff, 105 }; 106 107 /* 108 * Return the base personality without flags. 109 */ 110 #define personality(pers) (pers & PER_MASK) 111 112 int info_is_fdpic(struct image_info *info) 113 { 114 return info->personality == PER_LINUX_FDPIC; 115 } 116 117 /* this flag is uneffective under linux too, should be deleted */ 118 #ifndef MAP_DENYWRITE 119 #define MAP_DENYWRITE 0 120 #endif 121 122 /* should probably go in elf.h */ 123 #ifndef ELIBBAD 124 #define ELIBBAD 80 125 #endif 126 127 #if TARGET_BIG_ENDIAN 128 #define ELF_DATA ELFDATA2MSB 129 #else 130 #define ELF_DATA ELFDATA2LSB 131 #endif 132 133 #ifdef TARGET_ABI_MIPSN32 134 typedef abi_ullong target_elf_greg_t; 135 #define tswapreg(ptr) tswap64(ptr) 136 #else 137 typedef abi_ulong target_elf_greg_t; 138 #define tswapreg(ptr) tswapal(ptr) 139 #endif 140 141 #ifdef USE_UID16 142 typedef abi_ushort target_uid_t; 143 typedef abi_ushort target_gid_t; 144 #else 145 typedef abi_uint target_uid_t; 146 typedef abi_uint target_gid_t; 147 #endif 148 typedef abi_int target_pid_t; 149 150 #ifdef TARGET_I386 151 152 #define HAVE_INIT_MAIN_THREAD 153 154 #ifdef TARGET_X86_64 155 #define ELF_CLASS ELFCLASS64 156 #define ELF_ARCH EM_X86_64 157 158 #define ELF_NREG 27 159 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 160 161 /* 162 * Note that ELF_NREG should be 29 as there should be place for 163 * TRAPNO and ERR "registers" as well but linux doesn't dump 164 * those. 165 * 166 * See linux kernel: arch/x86/include/asm/elf.h 167 */ 168 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) 169 { 170 (*regs)[0] = tswapreg(env->regs[15]); 171 (*regs)[1] = tswapreg(env->regs[14]); 172 (*regs)[2] = tswapreg(env->regs[13]); 173 (*regs)[3] = tswapreg(env->regs[12]); 174 (*regs)[4] = tswapreg(env->regs[R_EBP]); 175 (*regs)[5] = tswapreg(env->regs[R_EBX]); 176 (*regs)[6] = tswapreg(env->regs[11]); 177 (*regs)[7] = tswapreg(env->regs[10]); 178 (*regs)[8] = tswapreg(env->regs[9]); 179 (*regs)[9] = tswapreg(env->regs[8]); 180 (*regs)[10] = tswapreg(env->regs[R_EAX]); 181 (*regs)[11] = tswapreg(env->regs[R_ECX]); 182 (*regs)[12] = tswapreg(env->regs[R_EDX]); 183 (*regs)[13] = tswapreg(env->regs[R_ESI]); 184 (*regs)[14] = tswapreg(env->regs[R_EDI]); 185 (*regs)[15] = tswapreg(get_task_state(env_cpu_const(env))->orig_ax); 186 (*regs)[16] = tswapreg(env->eip); 187 (*regs)[17] = tswapreg(env->segs[R_CS].selector & 0xffff); 188 (*regs)[18] = tswapreg(env->eflags); 189 (*regs)[19] = tswapreg(env->regs[R_ESP]); 190 (*regs)[20] = tswapreg(env->segs[R_SS].selector & 0xffff); 191 (*regs)[21] = tswapreg(env->segs[R_FS].selector & 0xffff); 192 (*regs)[22] = tswapreg(env->segs[R_GS].selector & 0xffff); 193 (*regs)[23] = tswapreg(env->segs[R_DS].selector & 0xffff); 194 (*regs)[24] = tswapreg(env->segs[R_ES].selector & 0xffff); 195 (*regs)[25] = tswapreg(env->segs[R_FS].selector & 0xffff); 196 (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff); 197 } 198 199 #if ULONG_MAX > UINT32_MAX 200 #define INIT_GUEST_COMMPAGE 201 static bool init_guest_commpage(void) 202 { 203 /* 204 * The vsyscall page is at a high negative address aka kernel space, 205 * which means that we cannot actually allocate it with target_mmap. 206 * We still should be able to use page_set_flags, unless the user 207 * has specified -R reserved_va, which would trigger an assert(). 208 */ 209 if (reserved_va != 0 && 210 TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE - 1 > reserved_va) { 211 error_report("Cannot allocate vsyscall page"); 212 exit(EXIT_FAILURE); 213 } 214 page_set_flags(TARGET_VSYSCALL_PAGE, 215 TARGET_VSYSCALL_PAGE | ~TARGET_PAGE_MASK, 216 PAGE_EXEC | PAGE_VALID); 217 return true; 218 } 219 #endif 220 #else 221 222 /* 223 * This is used to ensure we don't load something for the wrong architecture. 224 */ 225 #define elf_check_arch(x) ( ((x) == EM_386) || ((x) == EM_486) ) 226 227 /* 228 * These are used to set parameters in the core dumps. 229 */ 230 #define ELF_CLASS ELFCLASS32 231 #define ELF_ARCH EM_386 232 233 #define EXSTACK_DEFAULT true 234 235 #define ELF_NREG 17 236 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 237 238 /* 239 * Note that ELF_NREG should be 19 as there should be place for 240 * TRAPNO and ERR "registers" as well but linux doesn't dump 241 * those. 242 * 243 * See linux kernel: arch/x86/include/asm/elf.h 244 */ 245 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) 246 { 247 (*regs)[0] = tswapreg(env->regs[R_EBX]); 248 (*regs)[1] = tswapreg(env->regs[R_ECX]); 249 (*regs)[2] = tswapreg(env->regs[R_EDX]); 250 (*regs)[3] = tswapreg(env->regs[R_ESI]); 251 (*regs)[4] = tswapreg(env->regs[R_EDI]); 252 (*regs)[5] = tswapreg(env->regs[R_EBP]); 253 (*regs)[6] = tswapreg(env->regs[R_EAX]); 254 (*regs)[7] = tswapreg(env->segs[R_DS].selector & 0xffff); 255 (*regs)[8] = tswapreg(env->segs[R_ES].selector & 0xffff); 256 (*regs)[9] = tswapreg(env->segs[R_FS].selector & 0xffff); 257 (*regs)[10] = tswapreg(env->segs[R_GS].selector & 0xffff); 258 (*regs)[11] = tswapreg(get_task_state(env_cpu_const(env))->orig_ax); 259 (*regs)[12] = tswapreg(env->eip); 260 (*regs)[13] = tswapreg(env->segs[R_CS].selector & 0xffff); 261 (*regs)[14] = tswapreg(env->eflags); 262 (*regs)[15] = tswapreg(env->regs[R_ESP]); 263 (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff); 264 } 265 266 /* 267 * i386 is the only target which supplies AT_SYSINFO for the vdso. 268 * All others only supply AT_SYSINFO_EHDR. 269 */ 270 #define DLINFO_ARCH_ITEMS (vdso_info != NULL) 271 #define ARCH_DLINFO \ 272 do { \ 273 if (vdso_info) { \ 274 NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry); \ 275 } \ 276 } while (0) 277 278 #endif /* TARGET_X86_64 */ 279 280 #define VDSO_HEADER "vdso.c.inc" 281 282 #define USE_ELF_CORE_DUMP 283 #define ELF_EXEC_PAGESIZE 4096 284 285 #endif /* TARGET_I386 */ 286 287 #ifdef TARGET_ARM 288 289 #ifndef TARGET_AARCH64 290 /* 32 bit ARM definitions */ 291 292 #define ELF_ARCH EM_ARM 293 #define ELF_CLASS ELFCLASS32 294 #define EXSTACK_DEFAULT true 295 296 #define HAVE_INIT_MAIN_THREAD 297 298 #define ELF_NREG 18 299 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 300 301 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUARMState *env) 302 { 303 (*regs)[0] = tswapreg(env->regs[0]); 304 (*regs)[1] = tswapreg(env->regs[1]); 305 (*regs)[2] = tswapreg(env->regs[2]); 306 (*regs)[3] = tswapreg(env->regs[3]); 307 (*regs)[4] = tswapreg(env->regs[4]); 308 (*regs)[5] = tswapreg(env->regs[5]); 309 (*regs)[6] = tswapreg(env->regs[6]); 310 (*regs)[7] = tswapreg(env->regs[7]); 311 (*regs)[8] = tswapreg(env->regs[8]); 312 (*regs)[9] = tswapreg(env->regs[9]); 313 (*regs)[10] = tswapreg(env->regs[10]); 314 (*regs)[11] = tswapreg(env->regs[11]); 315 (*regs)[12] = tswapreg(env->regs[12]); 316 (*regs)[13] = tswapreg(env->regs[13]); 317 (*regs)[14] = tswapreg(env->regs[14]); 318 (*regs)[15] = tswapreg(env->regs[15]); 319 320 (*regs)[16] = tswapreg(cpsr_read((CPUARMState *)env)); 321 (*regs)[17] = tswapreg(env->regs[0]); /* XXX */ 322 } 323 324 #define USE_ELF_CORE_DUMP 325 #define ELF_EXEC_PAGESIZE 4096 326 327 /* The commpage only exists for 32 bit kernels */ 328 329 #define HI_COMMPAGE (intptr_t)0xffff0f00u 330 331 static bool init_guest_commpage(void) 332 { 333 ARMCPU *cpu = ARM_CPU(thread_cpu); 334 int host_page_size = qemu_real_host_page_size(); 335 abi_ptr commpage; 336 void *want; 337 void *addr; 338 339 /* 340 * M-profile allocates maximum of 2GB address space, so can never 341 * allocate the commpage. Skip it. 342 */ 343 if (arm_feature(&cpu->env, ARM_FEATURE_M)) { 344 return true; 345 } 346 347 commpage = HI_COMMPAGE & -host_page_size; 348 want = g2h_untagged(commpage); 349 addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, 350 MAP_ANONYMOUS | MAP_PRIVATE | 351 (commpage < reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), 352 -1, 0); 353 354 if (addr == MAP_FAILED) { 355 perror("Allocating guest commpage"); 356 exit(EXIT_FAILURE); 357 } 358 if (addr != want) { 359 return false; 360 } 361 362 /* Set kernel helper versions; rest of page is 0. */ 363 __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu)); 364 365 if (mprotect(addr, host_page_size, PROT_READ)) { 366 perror("Protecting guest commpage"); 367 exit(EXIT_FAILURE); 368 } 369 370 page_set_flags(commpage, commpage | (host_page_size - 1), 371 PAGE_READ | PAGE_EXEC | PAGE_VALID); 372 return true; 373 } 374 375 #if TARGET_BIG_ENDIAN 376 #include "elf.h" 377 #include "vdso-be8.c.inc" 378 #include "vdso-be32.c.inc" 379 380 static const VdsoImageInfo *vdso_image_info(uint32_t elf_flags) 381 { 382 return (EF_ARM_EABI_VERSION(elf_flags) >= EF_ARM_EABI_VER4 383 && (elf_flags & EF_ARM_BE8) 384 ? &vdso_be8_image_info 385 : &vdso_be32_image_info); 386 } 387 #define vdso_image_info vdso_image_info 388 #else 389 # define VDSO_HEADER "vdso-le.c.inc" 390 #endif 391 392 #else 393 /* 64 bit ARM definitions */ 394 395 #define ELF_ARCH EM_AARCH64 396 #define ELF_CLASS ELFCLASS64 397 398 #define HAVE_INIT_MAIN_THREAD 399 400 #define ELF_NREG 34 401 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 402 403 static void elf_core_copy_regs(target_elf_gregset_t *regs, 404 const CPUARMState *env) 405 { 406 int i; 407 408 for (i = 0; i < 32; i++) { 409 (*regs)[i] = tswapreg(env->xregs[i]); 410 } 411 (*regs)[32] = tswapreg(env->pc); 412 (*regs)[33] = tswapreg(pstate_read((CPUARMState *)env)); 413 } 414 415 #define USE_ELF_CORE_DUMP 416 #define ELF_EXEC_PAGESIZE 4096 417 418 #if TARGET_BIG_ENDIAN 419 # define VDSO_HEADER "vdso-be.c.inc" 420 #else 421 # define VDSO_HEADER "vdso-le.c.inc" 422 #endif 423 424 #endif /* not TARGET_AARCH64 */ 425 426 #endif /* TARGET_ARM */ 427 428 #ifdef TARGET_SPARC 429 430 #ifndef TARGET_SPARC64 431 # define ELF_CLASS ELFCLASS32 432 # define ELF_ARCH EM_SPARC 433 #elif defined(TARGET_ABI32) 434 # define ELF_CLASS ELFCLASS32 435 # define elf_check_arch(x) ((x) == EM_SPARC32PLUS || (x) == EM_SPARC) 436 #else 437 # define ELF_CLASS ELFCLASS64 438 # define ELF_ARCH EM_SPARCV9 439 #endif 440 441 #define HAVE_INIT_MAIN_THREAD 442 443 #endif /* TARGET_SPARC */ 444 445 #ifdef TARGET_PPC 446 447 #define ELF_MACHINE PPC_ELF_MACHINE 448 449 #if defined(TARGET_PPC64) 450 451 #define elf_check_arch(x) ( (x) == EM_PPC64 ) 452 453 #define ELF_CLASS ELFCLASS64 454 455 #else 456 457 #define ELF_CLASS ELFCLASS32 458 #define EXSTACK_DEFAULT true 459 460 #endif 461 462 #define ELF_ARCH EM_PPC 463 464 /* 465 * The requirements here are: 466 * - keep the final alignment of sp (sp & 0xf) 467 * - make sure the 32-bit value at the first 16 byte aligned position of 468 * AUXV is greater than 16 for glibc compatibility. 469 * AT_IGNOREPPC is used for that. 470 * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, 471 * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. 472 */ 473 #define DLINFO_ARCH_ITEMS 5 474 #define ARCH_DLINFO \ 475 do { \ 476 PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \ 477 /* \ 478 * Handle glibc compatibility: these magic entries must \ 479 * be at the lowest addresses in the final auxv. \ 480 */ \ 481 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 482 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 483 NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \ 484 NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \ 485 NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ 486 } while (0) 487 488 #define HAVE_INIT_MAIN_THREAD 489 490 /* See linux kernel: arch/powerpc/include/asm/elf.h. */ 491 #define ELF_NREG 48 492 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 493 494 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *env) 495 { 496 int i; 497 target_ulong ccr = 0; 498 499 for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { 500 (*regs)[i] = tswapreg(env->gpr[i]); 501 } 502 503 (*regs)[32] = tswapreg(env->nip); 504 (*regs)[33] = tswapreg(env->msr); 505 (*regs)[35] = tswapreg(env->ctr); 506 (*regs)[36] = tswapreg(env->lr); 507 (*regs)[37] = tswapreg(cpu_read_xer(env)); 508 509 ccr = ppc_get_cr(env); 510 (*regs)[38] = tswapreg(ccr); 511 } 512 513 #define USE_ELF_CORE_DUMP 514 #define ELF_EXEC_PAGESIZE 4096 515 516 #ifndef TARGET_PPC64 517 # define VDSO_HEADER "vdso-32.c.inc" 518 #elif TARGET_BIG_ENDIAN 519 # define VDSO_HEADER "vdso-64.c.inc" 520 #else 521 # define VDSO_HEADER "vdso-64le.c.inc" 522 #endif 523 524 #endif 525 526 #ifdef TARGET_LOONGARCH64 527 528 #define ELF_CLASS ELFCLASS64 529 #define ELF_ARCH EM_LOONGARCH 530 #define EXSTACK_DEFAULT true 531 532 #define elf_check_arch(x) ((x) == EM_LOONGARCH) 533 534 #define VDSO_HEADER "vdso.c.inc" 535 536 #define HAVE_INIT_MAIN_THREAD 537 538 /* See linux kernel: arch/loongarch/include/asm/elf.h */ 539 #define ELF_NREG 45 540 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 541 542 enum { 543 TARGET_EF_R0 = 0, 544 TARGET_EF_CSR_ERA = TARGET_EF_R0 + 33, 545 TARGET_EF_CSR_BADV = TARGET_EF_R0 + 34, 546 }; 547 548 static void elf_core_copy_regs(target_elf_gregset_t *regs, 549 const CPULoongArchState *env) 550 { 551 int i; 552 553 (*regs)[TARGET_EF_R0] = 0; 554 555 for (i = 1; i < ARRAY_SIZE(env->gpr); i++) { 556 (*regs)[TARGET_EF_R0 + i] = tswapreg(env->gpr[i]); 557 } 558 559 (*regs)[TARGET_EF_CSR_ERA] = tswapreg(env->pc); 560 (*regs)[TARGET_EF_CSR_BADV] = tswapreg(env->CSR_BADV); 561 } 562 563 #define USE_ELF_CORE_DUMP 564 #define ELF_EXEC_PAGESIZE 4096 565 566 #endif /* TARGET_LOONGARCH64 */ 567 568 #ifdef TARGET_MIPS 569 570 #ifdef TARGET_MIPS64 571 #define ELF_CLASS ELFCLASS64 572 #else 573 #define ELF_CLASS ELFCLASS32 574 #endif 575 #define ELF_ARCH EM_MIPS 576 #define EXSTACK_DEFAULT true 577 578 #ifdef TARGET_ABI_MIPSN32 579 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2) 580 #else 581 #define elf_check_abi(x) (!((x) & EF_MIPS_ABI2)) 582 #endif 583 584 #define HAVE_INIT_MAIN_THREAD 585 586 /* See linux kernel: arch/mips/include/asm/elf.h. */ 587 #define ELF_NREG 45 588 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 589 590 /* See linux kernel: arch/mips/include/asm/reg.h. */ 591 enum { 592 #ifdef TARGET_MIPS64 593 TARGET_EF_R0 = 0, 594 #else 595 TARGET_EF_R0 = 6, 596 #endif 597 TARGET_EF_R26 = TARGET_EF_R0 + 26, 598 TARGET_EF_R27 = TARGET_EF_R0 + 27, 599 TARGET_EF_LO = TARGET_EF_R0 + 32, 600 TARGET_EF_HI = TARGET_EF_R0 + 33, 601 TARGET_EF_CP0_EPC = TARGET_EF_R0 + 34, 602 TARGET_EF_CP0_BADVADDR = TARGET_EF_R0 + 35, 603 TARGET_EF_CP0_STATUS = TARGET_EF_R0 + 36, 604 TARGET_EF_CP0_CAUSE = TARGET_EF_R0 + 37 605 }; 606 607 /* See linux kernel: arch/mips/kernel/process.c:elf_dump_regs. */ 608 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMIPSState *env) 609 { 610 int i; 611 612 for (i = 0; i < TARGET_EF_R0; i++) { 613 (*regs)[i] = 0; 614 } 615 (*regs)[TARGET_EF_R0] = 0; 616 617 for (i = 1; i < ARRAY_SIZE(env->active_tc.gpr); i++) { 618 (*regs)[TARGET_EF_R0 + i] = tswapreg(env->active_tc.gpr[i]); 619 } 620 621 (*regs)[TARGET_EF_R26] = 0; 622 (*regs)[TARGET_EF_R27] = 0; 623 (*regs)[TARGET_EF_LO] = tswapreg(env->active_tc.LO[0]); 624 (*regs)[TARGET_EF_HI] = tswapreg(env->active_tc.HI[0]); 625 (*regs)[TARGET_EF_CP0_EPC] = tswapreg(env->active_tc.PC); 626 (*regs)[TARGET_EF_CP0_BADVADDR] = tswapreg(env->CP0_BadVAddr); 627 (*regs)[TARGET_EF_CP0_STATUS] = tswapreg(env->CP0_Status); 628 (*regs)[TARGET_EF_CP0_CAUSE] = tswapreg(env->CP0_Cause); 629 } 630 631 #define USE_ELF_CORE_DUMP 632 #define ELF_EXEC_PAGESIZE 4096 633 634 #endif /* TARGET_MIPS */ 635 636 #ifdef TARGET_MICROBLAZE 637 638 #define elf_check_arch(x) ( (x) == EM_MICROBLAZE || (x) == EM_MICROBLAZE_OLD) 639 640 #define ELF_CLASS ELFCLASS32 641 #define ELF_ARCH EM_MICROBLAZE 642 643 #define HAVE_INIT_MAIN_THREAD 644 645 #define ELF_EXEC_PAGESIZE 4096 646 647 #define USE_ELF_CORE_DUMP 648 #define ELF_NREG 38 649 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 650 651 /* See linux kernel: arch/mips/kernel/process.c:elf_dump_regs. */ 652 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMBState *env) 653 { 654 int i, pos = 0; 655 656 for (i = 0; i < 32; i++) { 657 (*regs)[pos++] = tswapreg(env->regs[i]); 658 } 659 660 (*regs)[pos++] = tswapreg(env->pc); 661 (*regs)[pos++] = tswapreg(mb_cpu_read_msr(env)); 662 (*regs)[pos++] = 0; 663 (*regs)[pos++] = tswapreg(env->ear); 664 (*regs)[pos++] = 0; 665 (*regs)[pos++] = tswapreg(env->esr); 666 } 667 668 #endif /* TARGET_MICROBLAZE */ 669 670 #ifdef TARGET_OPENRISC 671 672 #define ELF_ARCH EM_OPENRISC 673 #define ELF_CLASS ELFCLASS32 674 #define ELF_DATA ELFDATA2MSB 675 676 static inline void init_thread(struct target_pt_regs *regs, 677 struct image_info *infop) 678 { 679 regs->pc = infop->entry; 680 regs->gpr[1] = infop->start_stack; 681 } 682 683 #define USE_ELF_CORE_DUMP 684 #define ELF_EXEC_PAGESIZE 8192 685 686 /* See linux kernel arch/openrisc/include/asm/elf.h. */ 687 #define ELF_NREG 34 /* gprs and pc, sr */ 688 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 689 690 static void elf_core_copy_regs(target_elf_gregset_t *regs, 691 const CPUOpenRISCState *env) 692 { 693 int i; 694 695 for (i = 0; i < 32; i++) { 696 (*regs)[i] = tswapreg(cpu_get_gpr(env, i)); 697 } 698 (*regs)[32] = tswapreg(env->pc); 699 (*regs)[33] = tswapreg(cpu_get_sr(env)); 700 } 701 702 #endif /* TARGET_OPENRISC */ 703 704 #ifdef TARGET_SH4 705 706 #define ELF_CLASS ELFCLASS32 707 #define ELF_ARCH EM_SH 708 709 static inline void init_thread(struct target_pt_regs *regs, 710 struct image_info *infop) 711 { 712 /* Check other registers XXXXX */ 713 regs->pc = infop->entry; 714 regs->regs[15] = infop->start_stack; 715 } 716 717 /* See linux kernel: arch/sh/include/asm/elf.h. */ 718 #define ELF_NREG 23 719 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 720 721 /* See linux kernel: arch/sh/include/asm/ptrace.h. */ 722 enum { 723 TARGET_REG_PC = 16, 724 TARGET_REG_PR = 17, 725 TARGET_REG_SR = 18, 726 TARGET_REG_GBR = 19, 727 TARGET_REG_MACH = 20, 728 TARGET_REG_MACL = 21, 729 TARGET_REG_SYSCALL = 22 730 }; 731 732 static inline void elf_core_copy_regs(target_elf_gregset_t *regs, 733 const CPUSH4State *env) 734 { 735 int i; 736 737 for (i = 0; i < 16; i++) { 738 (*regs)[i] = tswapreg(env->gregs[i]); 739 } 740 741 (*regs)[TARGET_REG_PC] = tswapreg(env->pc); 742 (*regs)[TARGET_REG_PR] = tswapreg(env->pr); 743 (*regs)[TARGET_REG_SR] = tswapreg(env->sr); 744 (*regs)[TARGET_REG_GBR] = tswapreg(env->gbr); 745 (*regs)[TARGET_REG_MACH] = tswapreg(env->mach); 746 (*regs)[TARGET_REG_MACL] = tswapreg(env->macl); 747 (*regs)[TARGET_REG_SYSCALL] = 0; /* FIXME */ 748 } 749 750 #define USE_ELF_CORE_DUMP 751 #define ELF_EXEC_PAGESIZE 4096 752 753 #endif 754 755 #ifdef TARGET_M68K 756 757 #define ELF_CLASS ELFCLASS32 758 #define ELF_ARCH EM_68K 759 760 /* ??? Does this need to do anything? 761 #define ELF_PLAT_INIT(_r) */ 762 763 static inline void init_thread(struct target_pt_regs *regs, 764 struct image_info *infop) 765 { 766 regs->usp = infop->start_stack; 767 regs->sr = 0; 768 regs->pc = infop->entry; 769 } 770 771 /* See linux kernel: arch/m68k/include/asm/elf.h. */ 772 #define ELF_NREG 20 773 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 774 775 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUM68KState *env) 776 { 777 (*regs)[0] = tswapreg(env->dregs[1]); 778 (*regs)[1] = tswapreg(env->dregs[2]); 779 (*regs)[2] = tswapreg(env->dregs[3]); 780 (*regs)[3] = tswapreg(env->dregs[4]); 781 (*regs)[4] = tswapreg(env->dregs[5]); 782 (*regs)[5] = tswapreg(env->dregs[6]); 783 (*regs)[6] = tswapreg(env->dregs[7]); 784 (*regs)[7] = tswapreg(env->aregs[0]); 785 (*regs)[8] = tswapreg(env->aregs[1]); 786 (*regs)[9] = tswapreg(env->aregs[2]); 787 (*regs)[10] = tswapreg(env->aregs[3]); 788 (*regs)[11] = tswapreg(env->aregs[4]); 789 (*regs)[12] = tswapreg(env->aregs[5]); 790 (*regs)[13] = tswapreg(env->aregs[6]); 791 (*regs)[14] = tswapreg(env->dregs[0]); 792 (*regs)[15] = tswapreg(env->aregs[7]); 793 (*regs)[16] = tswapreg(env->dregs[0]); /* FIXME: orig_d0 */ 794 (*regs)[17] = tswapreg(env->sr); 795 (*regs)[18] = tswapreg(env->pc); 796 (*regs)[19] = 0; /* FIXME: regs->format | regs->vector */ 797 } 798 799 #define USE_ELF_CORE_DUMP 800 #define ELF_EXEC_PAGESIZE 8192 801 802 #endif 803 804 #ifdef TARGET_ALPHA 805 806 #define ELF_CLASS ELFCLASS64 807 #define ELF_ARCH EM_ALPHA 808 809 static inline void init_thread(struct target_pt_regs *regs, 810 struct image_info *infop) 811 { 812 regs->pc = infop->entry; 813 regs->ps = 8; 814 regs->usp = infop->start_stack; 815 } 816 817 #define ELF_EXEC_PAGESIZE 8192 818 819 #endif /* TARGET_ALPHA */ 820 821 #ifdef TARGET_S390X 822 823 #define ELF_CLASS ELFCLASS64 824 #define ELF_DATA ELFDATA2MSB 825 #define ELF_ARCH EM_S390 826 827 static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) 828 { 829 regs->psw.addr = infop->entry; 830 regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \ 831 PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \ 832 PSW_MASK_32; 833 regs->gprs[15] = infop->start_stack; 834 } 835 836 /* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs). */ 837 #define ELF_NREG 27 838 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 839 840 enum { 841 TARGET_REG_PSWM = 0, 842 TARGET_REG_PSWA = 1, 843 TARGET_REG_GPRS = 2, 844 TARGET_REG_ARS = 18, 845 TARGET_REG_ORIG_R2 = 26, 846 }; 847 848 static void elf_core_copy_regs(target_elf_gregset_t *regs, 849 const CPUS390XState *env) 850 { 851 int i; 852 uint32_t *aregs; 853 854 (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask); 855 (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr); 856 for (i = 0; i < 16; i++) { 857 (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]); 858 } 859 aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]); 860 for (i = 0; i < 16; i++) { 861 aregs[i] = tswap32(env->aregs[i]); 862 } 863 (*regs)[TARGET_REG_ORIG_R2] = 0; 864 } 865 866 #define USE_ELF_CORE_DUMP 867 #define ELF_EXEC_PAGESIZE 4096 868 869 #define VDSO_HEADER "vdso.c.inc" 870 871 #endif /* TARGET_S390X */ 872 873 #ifdef TARGET_RISCV 874 875 #define ELF_ARCH EM_RISCV 876 877 #ifdef TARGET_RISCV32 878 #define ELF_CLASS ELFCLASS32 879 #define VDSO_HEADER "vdso-32.c.inc" 880 #else 881 #define ELF_CLASS ELFCLASS64 882 #define VDSO_HEADER "vdso-64.c.inc" 883 #endif 884 885 static inline void init_thread(struct target_pt_regs *regs, 886 struct image_info *infop) 887 { 888 regs->sepc = infop->entry; 889 regs->sp = infop->start_stack; 890 } 891 892 #define ELF_EXEC_PAGESIZE 4096 893 894 #endif /* TARGET_RISCV */ 895 896 #ifdef TARGET_HPPA 897 898 #define ELF_CLASS ELFCLASS32 899 #define ELF_ARCH EM_PARISC 900 #define STACK_GROWS_DOWN 0 901 #define STACK_ALIGNMENT 64 902 903 #define VDSO_HEADER "vdso.c.inc" 904 905 static inline void init_thread(struct target_pt_regs *regs, 906 struct image_info *infop) 907 { 908 regs->iaoq[0] = infop->entry | PRIV_USER; 909 regs->iaoq[1] = regs->iaoq[0] + 4; 910 regs->gr[23] = 0; 911 regs->gr[24] = infop->argv; 912 regs->gr[25] = infop->argc; 913 /* The top-of-stack contains a linkage buffer. */ 914 regs->gr[30] = infop->start_stack + 64; 915 regs->gr[31] = infop->entry; 916 } 917 918 #define LO_COMMPAGE 0 919 920 static bool init_guest_commpage(void) 921 { 922 /* If reserved_va, then we have already mapped 0 page on the host. */ 923 if (!reserved_va) { 924 void *want, *addr; 925 926 want = g2h_untagged(LO_COMMPAGE); 927 addr = mmap(want, TARGET_PAGE_SIZE, PROT_NONE, 928 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); 929 if (addr == MAP_FAILED) { 930 perror("Allocating guest commpage"); 931 exit(EXIT_FAILURE); 932 } 933 if (addr != want) { 934 return false; 935 } 936 } 937 938 /* 939 * On Linux, page zero is normally marked execute only + gateway. 940 * Normal read or write is supposed to fail (thus PROT_NONE above), 941 * but specific offsets have kernel code mapped to raise permissions 942 * and implement syscalls. Here, simply mark the page executable. 943 * Special case the entry points during translation (see do_page_zero). 944 */ 945 page_set_flags(LO_COMMPAGE, LO_COMMPAGE | ~TARGET_PAGE_MASK, 946 PAGE_EXEC | PAGE_VALID); 947 return true; 948 } 949 950 #endif /* TARGET_HPPA */ 951 952 #ifdef TARGET_XTENSA 953 954 #define ELF_CLASS ELFCLASS32 955 #define ELF_ARCH EM_XTENSA 956 957 static inline void init_thread(struct target_pt_regs *regs, 958 struct image_info *infop) 959 { 960 regs->windowbase = 0; 961 regs->windowstart = 1; 962 regs->areg[1] = infop->start_stack; 963 regs->pc = infop->entry; 964 if (info_is_fdpic(infop)) { 965 regs->areg[4] = infop->loadmap_addr; 966 regs->areg[5] = infop->interpreter_loadmap_addr; 967 if (infop->interpreter_loadmap_addr) { 968 regs->areg[6] = infop->interpreter_pt_dynamic_addr; 969 } else { 970 regs->areg[6] = infop->pt_dynamic_addr; 971 } 972 } 973 } 974 975 /* See linux kernel: arch/xtensa/include/asm/elf.h. */ 976 #define ELF_NREG 128 977 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; 978 979 enum { 980 TARGET_REG_PC, 981 TARGET_REG_PS, 982 TARGET_REG_LBEG, 983 TARGET_REG_LEND, 984 TARGET_REG_LCOUNT, 985 TARGET_REG_SAR, 986 TARGET_REG_WINDOWSTART, 987 TARGET_REG_WINDOWBASE, 988 TARGET_REG_THREADPTR, 989 TARGET_REG_AR0 = 64, 990 }; 991 992 static void elf_core_copy_regs(target_elf_gregset_t *regs, 993 const CPUXtensaState *env) 994 { 995 unsigned i; 996 997 (*regs)[TARGET_REG_PC] = tswapreg(env->pc); 998 (*regs)[TARGET_REG_PS] = tswapreg(env->sregs[PS] & ~PS_EXCM); 999 (*regs)[TARGET_REG_LBEG] = tswapreg(env->sregs[LBEG]); 1000 (*regs)[TARGET_REG_LEND] = tswapreg(env->sregs[LEND]); 1001 (*regs)[TARGET_REG_LCOUNT] = tswapreg(env->sregs[LCOUNT]); 1002 (*regs)[TARGET_REG_SAR] = tswapreg(env->sregs[SAR]); 1003 (*regs)[TARGET_REG_WINDOWSTART] = tswapreg(env->sregs[WINDOW_START]); 1004 (*regs)[TARGET_REG_WINDOWBASE] = tswapreg(env->sregs[WINDOW_BASE]); 1005 (*regs)[TARGET_REG_THREADPTR] = tswapreg(env->uregs[THREADPTR]); 1006 xtensa_sync_phys_from_window((CPUXtensaState *)env); 1007 for (i = 0; i < env->config->nareg; ++i) { 1008 (*regs)[TARGET_REG_AR0 + i] = tswapreg(env->phys_regs[i]); 1009 } 1010 } 1011 1012 #define USE_ELF_CORE_DUMP 1013 #define ELF_EXEC_PAGESIZE 4096 1014 1015 #endif /* TARGET_XTENSA */ 1016 1017 #ifdef TARGET_HEXAGON 1018 1019 #define ELF_CLASS ELFCLASS32 1020 #define ELF_ARCH EM_HEXAGON 1021 1022 static inline void init_thread(struct target_pt_regs *regs, 1023 struct image_info *infop) 1024 { 1025 regs->sepc = infop->entry; 1026 regs->sp = infop->start_stack; 1027 } 1028 1029 #endif /* TARGET_HEXAGON */ 1030 1031 #ifndef ELF_MACHINE 1032 #define ELF_MACHINE ELF_ARCH 1033 #endif 1034 1035 #ifndef elf_check_arch 1036 #define elf_check_arch(x) ((x) == ELF_ARCH) 1037 #endif 1038 1039 #ifndef elf_check_abi 1040 #define elf_check_abi(x) (1) 1041 #endif 1042 1043 #ifndef STACK_GROWS_DOWN 1044 #define STACK_GROWS_DOWN 1 1045 #endif 1046 1047 #ifndef STACK_ALIGNMENT 1048 #define STACK_ALIGNMENT 16 1049 #endif 1050 1051 #ifdef TARGET_ABI32 1052 #undef ELF_CLASS 1053 #define ELF_CLASS ELFCLASS32 1054 #undef bswaptls 1055 #define bswaptls(ptr) bswap32s(ptr) 1056 #endif 1057 1058 #ifndef EXSTACK_DEFAULT 1059 #define EXSTACK_DEFAULT false 1060 #endif 1061 1062 /* 1063 * Provide fallback definitions that the target may omit. 1064 * One way or another, we'll get a link error if the setting of 1065 * HAVE_* doesn't match the implementation. 1066 */ 1067 #ifndef HAVE_ELF_HWCAP 1068 abi_ulong get_elf_hwcap(CPUState *cs) { return 0; } 1069 #endif 1070 #ifndef HAVE_ELF_HWCAP2 1071 abi_ulong get_elf_hwcap2(CPUState *cs) { g_assert_not_reached(); } 1072 #define HAVE_ELF_HWCAP2 0 1073 #endif 1074 #ifndef HAVE_ELF_PLATFORM 1075 const char *get_elf_platform(CPUState *cs) { return NULL; } 1076 #endif 1077 #ifndef HAVE_ELF_BASE_PLATFORM 1078 const char *get_elf_base_platform(CPUState *cs) { return NULL; } 1079 #endif 1080 1081 #include "elf.h" 1082 1083 /* We must delay the following stanzas until after "elf.h". */ 1084 #if defined(TARGET_AARCH64) 1085 1086 static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz, 1087 const uint32_t *data, 1088 struct image_info *info, 1089 Error **errp) 1090 { 1091 if (pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { 1092 if (pr_datasz != sizeof(uint32_t)) { 1093 error_setg(errp, "Ill-formed GNU_PROPERTY_AARCH64_FEATURE_1_AND"); 1094 return false; 1095 } 1096 /* We will extract GNU_PROPERTY_AARCH64_FEATURE_1_BTI later. */ 1097 info->note_flags = *data; 1098 } 1099 return true; 1100 } 1101 #define ARCH_USE_GNU_PROPERTY 1 1102 1103 #else 1104 1105 static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz, 1106 const uint32_t *data, 1107 struct image_info *info, 1108 Error **errp) 1109 { 1110 g_assert_not_reached(); 1111 } 1112 #define ARCH_USE_GNU_PROPERTY 0 1113 1114 #endif 1115 1116 struct exec 1117 { 1118 unsigned int a_info; /* Use macros N_MAGIC, etc for access */ 1119 unsigned int a_text; /* length of text, in bytes */ 1120 unsigned int a_data; /* length of data, in bytes */ 1121 unsigned int a_bss; /* length of uninitialized data area, in bytes */ 1122 unsigned int a_syms; /* length of symbol table data in file, in bytes */ 1123 unsigned int a_entry; /* start address */ 1124 unsigned int a_trsize; /* length of relocation info for text, in bytes */ 1125 unsigned int a_drsize; /* length of relocation info for data, in bytes */ 1126 }; 1127 1128 1129 #define N_MAGIC(exec) ((exec).a_info & 0xffff) 1130 #define OMAGIC 0407 1131 #define NMAGIC 0410 1132 #define ZMAGIC 0413 1133 #define QMAGIC 0314 1134 1135 #define DLINFO_ITEMS 16 1136 1137 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n) 1138 { 1139 memcpy(to, from, n); 1140 } 1141 1142 static void bswap_ehdr(struct elfhdr *ehdr) 1143 { 1144 if (!target_needs_bswap()) { 1145 return; 1146 } 1147 1148 bswap16s(&ehdr->e_type); /* Object file type */ 1149 bswap16s(&ehdr->e_machine); /* Architecture */ 1150 bswap32s(&ehdr->e_version); /* Object file version */ 1151 bswaptls(&ehdr->e_entry); /* Entry point virtual address */ 1152 bswaptls(&ehdr->e_phoff); /* Program header table file offset */ 1153 bswaptls(&ehdr->e_shoff); /* Section header table file offset */ 1154 bswap32s(&ehdr->e_flags); /* Processor-specific flags */ 1155 bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */ 1156 bswap16s(&ehdr->e_phentsize); /* Program header table entry size */ 1157 bswap16s(&ehdr->e_phnum); /* Program header table entry count */ 1158 bswap16s(&ehdr->e_shentsize); /* Section header table entry size */ 1159 bswap16s(&ehdr->e_shnum); /* Section header table entry count */ 1160 bswap16s(&ehdr->e_shstrndx); /* Section header string table index */ 1161 } 1162 1163 static void bswap_phdr(struct elf_phdr *phdr, int phnum) 1164 { 1165 if (!target_needs_bswap()) { 1166 return; 1167 } 1168 1169 for (int i = 0; i < phnum; ++i, ++phdr) { 1170 bswap32s(&phdr->p_type); /* Segment type */ 1171 bswap32s(&phdr->p_flags); /* Segment flags */ 1172 bswaptls(&phdr->p_offset); /* Segment file offset */ 1173 bswaptls(&phdr->p_vaddr); /* Segment virtual address */ 1174 bswaptls(&phdr->p_paddr); /* Segment physical address */ 1175 bswaptls(&phdr->p_filesz); /* Segment size in file */ 1176 bswaptls(&phdr->p_memsz); /* Segment size in memory */ 1177 bswaptls(&phdr->p_align); /* Segment alignment */ 1178 } 1179 } 1180 1181 static void bswap_shdr(struct elf_shdr *shdr, int shnum) 1182 { 1183 if (!target_needs_bswap()) { 1184 return; 1185 } 1186 1187 for (int i = 0; i < shnum; ++i, ++shdr) { 1188 bswap32s(&shdr->sh_name); 1189 bswap32s(&shdr->sh_type); 1190 bswaptls(&shdr->sh_flags); 1191 bswaptls(&shdr->sh_addr); 1192 bswaptls(&shdr->sh_offset); 1193 bswaptls(&shdr->sh_size); 1194 bswap32s(&shdr->sh_link); 1195 bswap32s(&shdr->sh_info); 1196 bswaptls(&shdr->sh_addralign); 1197 bswaptls(&shdr->sh_entsize); 1198 } 1199 } 1200 1201 static void bswap_sym(struct elf_sym *sym) 1202 { 1203 if (!target_needs_bswap()) { 1204 return; 1205 } 1206 1207 bswap32s(&sym->st_name); 1208 bswaptls(&sym->st_value); 1209 bswaptls(&sym->st_size); 1210 bswap16s(&sym->st_shndx); 1211 } 1212 1213 #ifdef TARGET_MIPS 1214 static void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags) 1215 { 1216 if (!target_needs_bswap()) { 1217 return; 1218 } 1219 1220 bswap16s(&abiflags->version); 1221 bswap32s(&abiflags->ases); 1222 bswap32s(&abiflags->isa_ext); 1223 bswap32s(&abiflags->flags1); 1224 bswap32s(&abiflags->flags2); 1225 } 1226 #endif 1227 1228 #ifdef USE_ELF_CORE_DUMP 1229 static int elf_core_dump(int, const CPUArchState *); 1230 #endif /* USE_ELF_CORE_DUMP */ 1231 static void load_symbols(struct elfhdr *hdr, const ImageSource *src, 1232 abi_ulong load_bias); 1233 1234 /* Verify the portions of EHDR within E_IDENT for the target. 1235 This can be performed before bswapping the entire header. */ 1236 static bool elf_check_ident(struct elfhdr *ehdr) 1237 { 1238 return (ehdr->e_ident[EI_MAG0] == ELFMAG0 1239 && ehdr->e_ident[EI_MAG1] == ELFMAG1 1240 && ehdr->e_ident[EI_MAG2] == ELFMAG2 1241 && ehdr->e_ident[EI_MAG3] == ELFMAG3 1242 && ehdr->e_ident[EI_CLASS] == ELF_CLASS 1243 && ehdr->e_ident[EI_DATA] == ELF_DATA 1244 && ehdr->e_ident[EI_VERSION] == EV_CURRENT); 1245 } 1246 1247 /* Verify the portions of EHDR outside of E_IDENT for the target. 1248 This has to wait until after bswapping the header. */ 1249 static bool elf_check_ehdr(struct elfhdr *ehdr) 1250 { 1251 return (elf_check_arch(ehdr->e_machine) 1252 && elf_check_abi(ehdr->e_flags) 1253 && ehdr->e_ehsize == sizeof(struct elfhdr) 1254 && ehdr->e_phentsize == sizeof(struct elf_phdr) 1255 && (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN)); 1256 } 1257 1258 /* 1259 * 'copy_elf_strings()' copies argument/envelope strings from user 1260 * memory to free pages in kernel mem. These are in a format ready 1261 * to be put directly into the top of new user memory. 1262 * 1263 */ 1264 static abi_ulong copy_elf_strings(int argc, char **argv, char *scratch, 1265 abi_ulong p, abi_ulong stack_limit) 1266 { 1267 char *tmp; 1268 int len, i; 1269 abi_ulong top = p; 1270 1271 if (!p) { 1272 return 0; /* bullet-proofing */ 1273 } 1274 1275 if (STACK_GROWS_DOWN) { 1276 int offset = ((p - 1) % TARGET_PAGE_SIZE) + 1; 1277 for (i = argc - 1; i >= 0; --i) { 1278 tmp = argv[i]; 1279 if (!tmp) { 1280 fprintf(stderr, "VFS: argc is wrong"); 1281 exit(-1); 1282 } 1283 len = strlen(tmp) + 1; 1284 tmp += len; 1285 1286 if (len > (p - stack_limit)) { 1287 return 0; 1288 } 1289 while (len) { 1290 int bytes_to_copy = (len > offset) ? offset : len; 1291 tmp -= bytes_to_copy; 1292 p -= bytes_to_copy; 1293 offset -= bytes_to_copy; 1294 len -= bytes_to_copy; 1295 1296 memcpy_fromfs(scratch + offset, tmp, bytes_to_copy); 1297 1298 if (offset == 0) { 1299 memcpy_to_target(p, scratch, top - p); 1300 top = p; 1301 offset = TARGET_PAGE_SIZE; 1302 } 1303 } 1304 } 1305 if (p != top) { 1306 memcpy_to_target(p, scratch + offset, top - p); 1307 } 1308 } else { 1309 int remaining = TARGET_PAGE_SIZE - (p % TARGET_PAGE_SIZE); 1310 for (i = 0; i < argc; ++i) { 1311 tmp = argv[i]; 1312 if (!tmp) { 1313 fprintf(stderr, "VFS: argc is wrong"); 1314 exit(-1); 1315 } 1316 len = strlen(tmp) + 1; 1317 if (len > (stack_limit - p)) { 1318 return 0; 1319 } 1320 while (len) { 1321 int bytes_to_copy = (len > remaining) ? remaining : len; 1322 1323 memcpy_fromfs(scratch + (p - top), tmp, bytes_to_copy); 1324 1325 tmp += bytes_to_copy; 1326 remaining -= bytes_to_copy; 1327 p += bytes_to_copy; 1328 len -= bytes_to_copy; 1329 1330 if (remaining == 0) { 1331 memcpy_to_target(top, scratch, p - top); 1332 top = p; 1333 remaining = TARGET_PAGE_SIZE; 1334 } 1335 } 1336 } 1337 if (p != top) { 1338 memcpy_to_target(top, scratch, p - top); 1339 } 1340 } 1341 1342 return p; 1343 } 1344 1345 /* Older linux kernels provide up to MAX_ARG_PAGES (default: 32) of 1346 * argument/environment space. Newer kernels (>2.6.33) allow more, 1347 * dependent on stack size, but guarantee at least 32 pages for 1348 * backwards compatibility. 1349 */ 1350 #define STACK_LOWER_LIMIT (32 * TARGET_PAGE_SIZE) 1351 1352 static abi_ulong setup_arg_pages(struct linux_binprm *bprm, 1353 struct image_info *info) 1354 { 1355 abi_ulong size, error, guard; 1356 int prot; 1357 1358 size = guest_stack_size; 1359 if (size < STACK_LOWER_LIMIT) { 1360 size = STACK_LOWER_LIMIT; 1361 } 1362 1363 if (STACK_GROWS_DOWN) { 1364 guard = TARGET_PAGE_SIZE; 1365 if (guard < qemu_real_host_page_size()) { 1366 guard = qemu_real_host_page_size(); 1367 } 1368 } else { 1369 /* no guard page for hppa target where stack grows upwards. */ 1370 guard = 0; 1371 } 1372 1373 prot = PROT_READ | PROT_WRITE; 1374 if (info->exec_stack) { 1375 prot |= PROT_EXEC; 1376 } 1377 error = target_mmap(0, size + guard, prot, 1378 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1379 if (error == -1) { 1380 perror("mmap stack"); 1381 exit(-1); 1382 } 1383 1384 /* We reserve one extra page at the top of the stack as guard. */ 1385 if (STACK_GROWS_DOWN) { 1386 target_mprotect(error, guard, PROT_NONE); 1387 info->stack_limit = error + guard; 1388 return info->stack_limit + size - sizeof(void *); 1389 } else { 1390 info->stack_limit = error + size; 1391 return error; 1392 } 1393 } 1394 1395 /** 1396 * zero_bss: 1397 * 1398 * Map and zero the bss. We need to explicitly zero any fractional pages 1399 * after the data section (i.e. bss). Return false on mapping failure. 1400 */ 1401 static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, 1402 int prot, Error **errp) 1403 { 1404 abi_ulong align_bss; 1405 1406 /* We only expect writable bss; the code segment shouldn't need this. */ 1407 if (!(prot & PROT_WRITE)) { 1408 error_setg(errp, "PT_LOAD with non-writable bss"); 1409 return false; 1410 } 1411 1412 align_bss = TARGET_PAGE_ALIGN(start_bss); 1413 end_bss = TARGET_PAGE_ALIGN(end_bss); 1414 1415 if (start_bss < align_bss) { 1416 int flags = page_get_flags(start_bss); 1417 1418 if (!(flags & PAGE_RWX)) { 1419 /* 1420 * The whole address space of the executable was reserved 1421 * at the start, therefore all pages will be VALID. 1422 * But assuming there are no PROT_NONE PT_LOAD segments, 1423 * a PROT_NONE page means no data all bss, and we can 1424 * simply extend the new anon mapping back to the start 1425 * of the page of bss. 1426 */ 1427 align_bss -= TARGET_PAGE_SIZE; 1428 } else { 1429 /* 1430 * The start of the bss shares a page with something. 1431 * The only thing that we expect is the data section, 1432 * which would already be marked writable. 1433 * Overlapping the RX code segment seems malformed. 1434 */ 1435 if (!(flags & PAGE_WRITE)) { 1436 error_setg(errp, "PT_LOAD with bss overlapping " 1437 "non-writable page"); 1438 return false; 1439 } 1440 1441 /* The page is already mapped and writable. */ 1442 memset(g2h_untagged(start_bss), 0, align_bss - start_bss); 1443 } 1444 } 1445 1446 if (align_bss < end_bss && 1447 target_mmap(align_bss, end_bss - align_bss, prot, 1448 MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { 1449 error_setg_errno(errp, errno, "Error mapping bss"); 1450 return false; 1451 } 1452 return true; 1453 } 1454 1455 #if defined(TARGET_ARM) 1456 static int elf_is_fdpic(struct elfhdr *exec) 1457 { 1458 return exec->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC; 1459 } 1460 #elif defined(TARGET_XTENSA) 1461 static int elf_is_fdpic(struct elfhdr *exec) 1462 { 1463 return exec->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC; 1464 } 1465 #else 1466 /* Default implementation, always false. */ 1467 static int elf_is_fdpic(struct elfhdr *exec) 1468 { 1469 return 0; 1470 } 1471 #endif 1472 1473 static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong sp) 1474 { 1475 uint16_t n; 1476 struct elf32_fdpic_loadseg *loadsegs = info->loadsegs; 1477 1478 /* elf32_fdpic_loadseg */ 1479 n = info->nsegs; 1480 while (n--) { 1481 sp -= 12; 1482 put_user_u32(loadsegs[n].addr, sp+0); 1483 put_user_u32(loadsegs[n].p_vaddr, sp+4); 1484 put_user_u32(loadsegs[n].p_memsz, sp+8); 1485 } 1486 1487 /* elf32_fdpic_loadmap */ 1488 sp -= 4; 1489 put_user_u16(0, sp+0); /* version */ 1490 put_user_u16(info->nsegs, sp+2); /* nsegs */ 1491 1492 info->personality = PER_LINUX_FDPIC; 1493 info->loadmap_addr = sp; 1494 1495 return sp; 1496 } 1497 1498 static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, 1499 struct elfhdr *exec, 1500 struct image_info *info, 1501 struct image_info *interp_info, 1502 struct image_info *vdso_info) 1503 { 1504 abi_ulong sp; 1505 abi_ulong u_argc, u_argv, u_envp, u_auxv; 1506 int size; 1507 int i; 1508 abi_ulong u_rand_bytes; 1509 uint8_t k_rand_bytes[16]; 1510 abi_ulong u_platform, u_base_platform; 1511 const char *k_platform, *k_base_platform; 1512 const int n = sizeof(elf_addr_t); 1513 1514 sp = p; 1515 1516 /* Needs to be before we load the env/argc/... */ 1517 if (elf_is_fdpic(exec)) { 1518 /* Need 4 byte alignment for these structs */ 1519 sp &= ~3; 1520 sp = loader_build_fdpic_loadmap(info, sp); 1521 info->other_info = interp_info; 1522 if (interp_info) { 1523 interp_info->other_info = info; 1524 sp = loader_build_fdpic_loadmap(interp_info, sp); 1525 info->interpreter_loadmap_addr = interp_info->loadmap_addr; 1526 info->interpreter_pt_dynamic_addr = interp_info->pt_dynamic_addr; 1527 } else { 1528 info->interpreter_loadmap_addr = 0; 1529 info->interpreter_pt_dynamic_addr = 0; 1530 } 1531 } 1532 1533 u_base_platform = 0; 1534 k_base_platform = get_elf_base_platform(thread_cpu); 1535 if (k_base_platform) { 1536 size_t len = strlen(k_base_platform) + 1; 1537 if (STACK_GROWS_DOWN) { 1538 sp -= (len + n - 1) & ~(n - 1); 1539 u_base_platform = sp; 1540 /* FIXME - check return value of memcpy_to_target() for failure */ 1541 memcpy_to_target(sp, k_base_platform, len); 1542 } else { 1543 memcpy_to_target(sp, k_base_platform, len); 1544 u_base_platform = sp; 1545 sp += len + 1; 1546 } 1547 } 1548 1549 u_platform = 0; 1550 k_platform = get_elf_platform(thread_cpu); 1551 if (k_platform) { 1552 size_t len = strlen(k_platform) + 1; 1553 if (STACK_GROWS_DOWN) { 1554 sp -= (len + n - 1) & ~(n - 1); 1555 u_platform = sp; 1556 /* FIXME - check return value of memcpy_to_target() for failure */ 1557 memcpy_to_target(sp, k_platform, len); 1558 } else { 1559 memcpy_to_target(sp, k_platform, len); 1560 u_platform = sp; 1561 sp += len + 1; 1562 } 1563 } 1564 1565 /* Provide 16 byte alignment for the PRNG, and basic alignment for 1566 * the argv and envp pointers. 1567 */ 1568 if (STACK_GROWS_DOWN) { 1569 sp = QEMU_ALIGN_DOWN(sp, 16); 1570 } else { 1571 sp = QEMU_ALIGN_UP(sp, 16); 1572 } 1573 1574 /* 1575 * Generate 16 random bytes for userspace PRNG seeding. 1576 */ 1577 qemu_guest_getrandom_nofail(k_rand_bytes, sizeof(k_rand_bytes)); 1578 if (STACK_GROWS_DOWN) { 1579 sp -= 16; 1580 u_rand_bytes = sp; 1581 /* FIXME - check return value of memcpy_to_target() for failure */ 1582 memcpy_to_target(sp, k_rand_bytes, 16); 1583 } else { 1584 memcpy_to_target(sp, k_rand_bytes, 16); 1585 u_rand_bytes = sp; 1586 sp += 16; 1587 } 1588 1589 size = (DLINFO_ITEMS + 1) * 2; 1590 if (k_base_platform) { 1591 size += 2; 1592 } 1593 if (k_platform) { 1594 size += 2; 1595 } 1596 if (vdso_info) { 1597 size += 2; 1598 } 1599 #ifdef DLINFO_ARCH_ITEMS 1600 size += DLINFO_ARCH_ITEMS * 2; 1601 #endif 1602 if (HAVE_ELF_HWCAP2) { 1603 size += 2; 1604 } 1605 info->auxv_len = size * n; 1606 1607 size += envc + argc + 2; 1608 size += 1; /* argc itself */ 1609 size *= n; 1610 1611 /* Allocate space and finalize stack alignment for entry now. */ 1612 if (STACK_GROWS_DOWN) { 1613 u_argc = QEMU_ALIGN_DOWN(sp - size, STACK_ALIGNMENT); 1614 sp = u_argc; 1615 } else { 1616 u_argc = sp; 1617 sp = QEMU_ALIGN_UP(sp + size, STACK_ALIGNMENT); 1618 } 1619 1620 u_argv = u_argc + n; 1621 u_envp = u_argv + (argc + 1) * n; 1622 u_auxv = u_envp + (envc + 1) * n; 1623 info->saved_auxv = u_auxv; 1624 info->argc = argc; 1625 info->envc = envc; 1626 info->argv = u_argv; 1627 info->envp = u_envp; 1628 1629 /* This is correct because Linux defines 1630 * elf_addr_t as Elf32_Off / Elf64_Off 1631 */ 1632 #define NEW_AUX_ENT(id, val) do { \ 1633 put_user_ual(id, u_auxv); u_auxv += n; \ 1634 put_user_ual(val, u_auxv); u_auxv += n; \ 1635 } while(0) 1636 1637 #ifdef ARCH_DLINFO 1638 /* 1639 * ARCH_DLINFO must come first so platform specific code can enforce 1640 * special alignment requirements on the AUXV if necessary (eg. PPC). 1641 */ 1642 ARCH_DLINFO; 1643 #endif 1644 /* There must be exactly DLINFO_ITEMS entries here, or the assert 1645 * on info->auxv_len will trigger. 1646 */ 1647 NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff)); 1648 NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); 1649 NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); 1650 NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); 1651 NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0)); 1652 NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); 1653 NEW_AUX_ENT(AT_ENTRY, info->entry); 1654 NEW_AUX_ENT(AT_UID, (abi_ulong) getuid()); 1655 NEW_AUX_ENT(AT_EUID, (abi_ulong) geteuid()); 1656 NEW_AUX_ENT(AT_GID, (abi_ulong) getgid()); 1657 NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid()); 1658 NEW_AUX_ENT(AT_HWCAP, get_elf_hwcap(thread_cpu)); 1659 NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK)); 1660 NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes); 1661 NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE)); 1662 NEW_AUX_ENT(AT_EXECFN, info->file_string); 1663 1664 if (HAVE_ELF_HWCAP2) { 1665 NEW_AUX_ENT(AT_HWCAP2, get_elf_hwcap(thread_cpu)); 1666 } 1667 if (u_base_platform) { 1668 NEW_AUX_ENT(AT_BASE_PLATFORM, u_base_platform); 1669 } 1670 if (u_platform) { 1671 NEW_AUX_ENT(AT_PLATFORM, u_platform); 1672 } 1673 if (vdso_info) { 1674 NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr); 1675 } 1676 NEW_AUX_ENT (AT_NULL, 0); 1677 #undef NEW_AUX_ENT 1678 1679 /* Check that our initial calculation of the auxv length matches how much 1680 * we actually put into it. 1681 */ 1682 assert(info->auxv_len == u_auxv - info->saved_auxv); 1683 1684 put_user_ual(argc, u_argc); 1685 1686 p = info->arg_strings; 1687 for (i = 0; i < argc; ++i) { 1688 put_user_ual(p, u_argv); 1689 u_argv += n; 1690 p += target_strlen(p) + 1; 1691 } 1692 put_user_ual(0, u_argv); 1693 1694 p = info->env_strings; 1695 for (i = 0; i < envc; ++i) { 1696 put_user_ual(p, u_envp); 1697 u_envp += n; 1698 p += target_strlen(p) + 1; 1699 } 1700 put_user_ual(0, u_envp); 1701 1702 return sp; 1703 } 1704 1705 #if defined(HI_COMMPAGE) 1706 #define LO_COMMPAGE -1 1707 #elif defined(LO_COMMPAGE) 1708 #define HI_COMMPAGE 0 1709 #else 1710 #define HI_COMMPAGE 0 1711 #define LO_COMMPAGE -1 1712 #ifndef INIT_GUEST_COMMPAGE 1713 #define init_guest_commpage() true 1714 #endif 1715 #endif 1716 1717 /** 1718 * pgb_try_mmap: 1719 * @addr: host start address 1720 * @addr_last: host last address 1721 * @keep: do not unmap the probe region 1722 * 1723 * Return 1 if [@addr, @addr_last] is not mapped in the host, 1724 * return 0 if it is not available to map, and -1 on mmap error. 1725 * If @keep, the region is left mapped on success, otherwise unmapped. 1726 */ 1727 static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep) 1728 { 1729 size_t size = addr_last - addr + 1; 1730 void *p = mmap((void *)addr, size, PROT_NONE, 1731 MAP_ANONYMOUS | MAP_PRIVATE | 1732 MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0); 1733 int ret; 1734 1735 if (p == MAP_FAILED) { 1736 return errno == EEXIST ? 0 : -1; 1737 } 1738 ret = p == (void *)addr; 1739 if (!keep || !ret) { 1740 munmap(p, size); 1741 } 1742 return ret; 1743 } 1744 1745 /** 1746 * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk) 1747 * @addr: host address 1748 * @addr_last: host last address 1749 * @brk: host brk 1750 * 1751 * Like pgb_try_mmap, but additionally reserve some memory following brk. 1752 */ 1753 static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last, 1754 uintptr_t brk, bool keep) 1755 { 1756 uintptr_t brk_last = brk + 16 * MiB - 1; 1757 1758 /* Do not map anything close to the host brk. */ 1759 if (addr <= brk_last && brk <= addr_last) { 1760 return 0; 1761 } 1762 return pgb_try_mmap(addr, addr_last, keep); 1763 } 1764 1765 /** 1766 * pgb_try_mmap_set: 1767 * @ga: set of guest addrs 1768 * @base: guest_base 1769 * @brk: host brk 1770 * 1771 * Return true if all @ga can be mapped by the host at @base. 1772 * On success, retain the mapping at index 0 for reserved_va. 1773 */ 1774 1775 typedef struct PGBAddrs { 1776 uintptr_t bounds[3][2]; /* start/last pairs */ 1777 int nbounds; 1778 } PGBAddrs; 1779 1780 static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk) 1781 { 1782 for (int i = ga->nbounds - 1; i >= 0; --i) { 1783 if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base, 1784 ga->bounds[i][1] + base, 1785 brk, i == 0 && reserved_va) <= 0) { 1786 return false; 1787 } 1788 } 1789 return true; 1790 } 1791 1792 /** 1793 * pgb_addr_set: 1794 * @ga: output set of guest addrs 1795 * @guest_loaddr: guest image low address 1796 * @guest_loaddr: guest image high address 1797 * @identity: create for identity mapping 1798 * 1799 * Fill in @ga with the image, COMMPAGE and NULL page. 1800 */ 1801 static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr, 1802 abi_ulong guest_hiaddr, bool try_identity) 1803 { 1804 int n; 1805 1806 /* 1807 * With a low commpage, or a guest mapped very low, 1808 * we may not be able to use the identity map. 1809 */ 1810 if (try_identity) { 1811 if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) { 1812 return false; 1813 } 1814 if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) { 1815 return false; 1816 } 1817 } 1818 1819 memset(ga, 0, sizeof(*ga)); 1820 n = 0; 1821 1822 if (reserved_va) { 1823 ga->bounds[n][0] = try_identity ? mmap_min_addr : 0; 1824 ga->bounds[n][1] = reserved_va; 1825 n++; 1826 /* LO_COMMPAGE and NULL handled by reserving from 0. */ 1827 } else { 1828 /* Add any LO_COMMPAGE or NULL page. */ 1829 if (LO_COMMPAGE != -1) { 1830 ga->bounds[n][0] = 0; 1831 ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1; 1832 n++; 1833 } else if (!try_identity) { 1834 ga->bounds[n][0] = 0; 1835 ga->bounds[n][1] = TARGET_PAGE_SIZE - 1; 1836 n++; 1837 } 1838 1839 /* Add the guest image for ET_EXEC. */ 1840 if (guest_loaddr) { 1841 ga->bounds[n][0] = guest_loaddr; 1842 ga->bounds[n][1] = guest_hiaddr; 1843 n++; 1844 } 1845 } 1846 1847 /* 1848 * Temporarily disable 1849 * "comparison is always false due to limited range of data type" 1850 * due to comparison between unsigned and (possible) 0. 1851 */ 1852 #pragma GCC diagnostic push 1853 #pragma GCC diagnostic ignored "-Wtype-limits" 1854 1855 /* Add any HI_COMMPAGE not covered by reserved_va. */ 1856 if (reserved_va < HI_COMMPAGE) { 1857 ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask(); 1858 ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1; 1859 n++; 1860 } 1861 1862 #pragma GCC diagnostic pop 1863 1864 ga->nbounds = n; 1865 return true; 1866 } 1867 1868 static void pgb_fail_in_use(const char *image_name) 1869 { 1870 error_report("%s: requires virtual address space that is in use " 1871 "(omit the -B option or choose a different value)", 1872 image_name); 1873 exit(EXIT_FAILURE); 1874 } 1875 1876 static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr, 1877 uintptr_t guest_hiaddr, uintptr_t align) 1878 { 1879 PGBAddrs ga; 1880 uintptr_t brk = (uintptr_t)sbrk(0); 1881 1882 if (!QEMU_IS_ALIGNED(guest_base, align)) { 1883 fprintf(stderr, "Requested guest base %p does not satisfy " 1884 "host minimum alignment (0x%" PRIxPTR ")\n", 1885 (void *)guest_base, align); 1886 exit(EXIT_FAILURE); 1887 } 1888 1889 if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base) 1890 || !pgb_try_mmap_set(&ga, guest_base, brk)) { 1891 pgb_fail_in_use(image_name); 1892 } 1893 } 1894 1895 /** 1896 * pgb_find_fallback: 1897 * 1898 * This is a fallback method for finding holes in the host address space 1899 * if we don't have the benefit of being able to access /proc/self/map. 1900 * It can potentially take a very long time as we can only dumbly iterate 1901 * up the host address space seeing if the allocation would work. 1902 */ 1903 static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align, 1904 uintptr_t brk) 1905 { 1906 /* TODO: come up with a better estimate of how much to skip. */ 1907 uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB; 1908 1909 for (uintptr_t base = skip; ; base += skip) { 1910 base = ROUND_UP(base, align); 1911 if (pgb_try_mmap_set(ga, base, brk)) { 1912 return base; 1913 } 1914 if (base >= -skip) { 1915 return -1; 1916 } 1917 } 1918 } 1919 1920 static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base, 1921 IntervalTreeRoot *root) 1922 { 1923 for (int i = ga->nbounds - 1; i >= 0; --i) { 1924 uintptr_t s = base + ga->bounds[i][0]; 1925 uintptr_t l = base + ga->bounds[i][1]; 1926 IntervalTreeNode *n; 1927 1928 if (l < s) { 1929 /* Wraparound. Skip to advance S to mmap_min_addr. */ 1930 return mmap_min_addr - s; 1931 } 1932 1933 n = interval_tree_iter_first(root, s, l); 1934 if (n != NULL) { 1935 /* Conflict. Skip to advance S to LAST + 1. */ 1936 return n->last - s + 1; 1937 } 1938 } 1939 return 0; /* success */ 1940 } 1941 1942 static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root, 1943 uintptr_t align, uintptr_t brk) 1944 { 1945 uintptr_t last = sizeof(uintptr_t) == 4 ? MiB : GiB; 1946 uintptr_t base, skip; 1947 1948 while (true) { 1949 base = ROUND_UP(last, align); 1950 if (base < last) { 1951 return -1; 1952 } 1953 1954 skip = pgb_try_itree(ga, base, root); 1955 if (skip == 0) { 1956 break; 1957 } 1958 1959 last = base + skip; 1960 if (last < base) { 1961 return -1; 1962 } 1963 } 1964 1965 /* 1966 * We've chosen 'base' based on holes in the interval tree, 1967 * but we don't yet know if it is a valid host address. 1968 * Because it is the first matching hole, if the host addresses 1969 * are invalid we know there are no further matches. 1970 */ 1971 return pgb_try_mmap_set(ga, base, brk) ? base : -1; 1972 } 1973 1974 static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr, 1975 uintptr_t guest_hiaddr, uintptr_t align) 1976 { 1977 IntervalTreeRoot *root; 1978 uintptr_t brk, ret; 1979 PGBAddrs ga; 1980 1981 /* Try the identity map first. */ 1982 if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) { 1983 brk = (uintptr_t)sbrk(0); 1984 if (pgb_try_mmap_set(&ga, 0, brk)) { 1985 guest_base = 0; 1986 return; 1987 } 1988 } 1989 1990 /* 1991 * Rebuild the address set for non-identity map. 1992 * This differs in the mapping of the guest NULL page. 1993 */ 1994 pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false); 1995 1996 root = read_self_maps(); 1997 1998 /* Read brk after we've read the maps, which will malloc. */ 1999 brk = (uintptr_t)sbrk(0); 2000 2001 if (!root) { 2002 ret = pgb_find_fallback(&ga, align, brk); 2003 } else { 2004 /* 2005 * Reserve the area close to the host brk. 2006 * This will be freed with the rest of the tree. 2007 */ 2008 IntervalTreeNode *b = g_new0(IntervalTreeNode, 1); 2009 b->start = brk; 2010 b->last = brk + 16 * MiB - 1; 2011 interval_tree_insert(b, root); 2012 2013 ret = pgb_find_itree(&ga, root, align, brk); 2014 free_self_maps(root); 2015 } 2016 2017 if (ret == -1) { 2018 int w = TARGET_LONG_BITS / 4; 2019 2020 error_report("%s: Unable to find a guest_base to satisfy all " 2021 "guest address mapping requirements", image_name); 2022 2023 for (int i = 0; i < ga.nbounds; ++i) { 2024 error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n", 2025 w, (uint64_t)ga.bounds[i][0], 2026 w, (uint64_t)ga.bounds[i][1]); 2027 } 2028 exit(EXIT_FAILURE); 2029 } 2030 guest_base = ret; 2031 } 2032 2033 void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, 2034 abi_ulong guest_hiaddr) 2035 { 2036 /* In order to use host shmat, we must be able to honor SHMLBA. */ 2037 uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE); 2038 2039 /* Sanity check the guest binary. */ 2040 if (reserved_va) { 2041 if (guest_hiaddr > reserved_va) { 2042 error_report("%s: requires more than reserved virtual " 2043 "address space (0x%" PRIx64 " > 0x%lx)", 2044 image_name, (uint64_t)guest_hiaddr, reserved_va); 2045 exit(EXIT_FAILURE); 2046 } 2047 } else { 2048 if (guest_hiaddr != (uintptr_t)guest_hiaddr) { 2049 error_report("%s: requires more virtual address space " 2050 "than the host can provide (0x%" PRIx64 ")", 2051 image_name, (uint64_t)guest_hiaddr + 1); 2052 exit(EXIT_FAILURE); 2053 } 2054 } 2055 2056 if (have_guest_base) { 2057 pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align); 2058 } else { 2059 pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align); 2060 } 2061 2062 /* Reserve and initialize the commpage. */ 2063 if (!init_guest_commpage()) { 2064 /* We have already probed for the commpage being free. */ 2065 g_assert_not_reached(); 2066 } 2067 2068 assert(QEMU_IS_ALIGNED(guest_base, align)); 2069 qemu_log_mask(CPU_LOG_PAGE, "Locating guest address space " 2070 "@ 0x%" PRIx64 "\n", (uint64_t)guest_base); 2071 } 2072 2073 enum { 2074 /* The string "GNU\0" as a magic number. */ 2075 GNU0_MAGIC = const_le32('G' | 'N' << 8 | 'U' << 16), 2076 NOTE_DATA_SZ = 1 * KiB, 2077 NOTE_NAME_SZ = 4, 2078 ELF_GNU_PROPERTY_ALIGN = ELF_CLASS == ELFCLASS32 ? 4 : 8, 2079 }; 2080 2081 /* 2082 * Process a single gnu_property entry. 2083 * Return false for error. 2084 */ 2085 static bool parse_elf_property(const uint32_t *data, int *off, int datasz, 2086 struct image_info *info, bool have_prev_type, 2087 uint32_t *prev_type, Error **errp) 2088 { 2089 uint32_t pr_type, pr_datasz, step; 2090 2091 if (*off > datasz || !QEMU_IS_ALIGNED(*off, ELF_GNU_PROPERTY_ALIGN)) { 2092 goto error_data; 2093 } 2094 datasz -= *off; 2095 data += *off / sizeof(uint32_t); 2096 2097 if (datasz < 2 * sizeof(uint32_t)) { 2098 goto error_data; 2099 } 2100 pr_type = data[0]; 2101 pr_datasz = data[1]; 2102 data += 2; 2103 datasz -= 2 * sizeof(uint32_t); 2104 step = ROUND_UP(pr_datasz, ELF_GNU_PROPERTY_ALIGN); 2105 if (step > datasz) { 2106 goto error_data; 2107 } 2108 2109 /* Properties are supposed to be unique and sorted on pr_type. */ 2110 if (have_prev_type && pr_type <= *prev_type) { 2111 if (pr_type == *prev_type) { 2112 error_setg(errp, "Duplicate property in PT_GNU_PROPERTY"); 2113 } else { 2114 error_setg(errp, "Unsorted property in PT_GNU_PROPERTY"); 2115 } 2116 return false; 2117 } 2118 *prev_type = pr_type; 2119 2120 if (!arch_parse_elf_property(pr_type, pr_datasz, data, info, errp)) { 2121 return false; 2122 } 2123 2124 *off += 2 * sizeof(uint32_t) + step; 2125 return true; 2126 2127 error_data: 2128 error_setg(errp, "Ill-formed property in PT_GNU_PROPERTY"); 2129 return false; 2130 } 2131 2132 /* Process NT_GNU_PROPERTY_TYPE_0. */ 2133 static bool parse_elf_properties(const ImageSource *src, 2134 struct image_info *info, 2135 const struct elf_phdr *phdr, 2136 Error **errp) 2137 { 2138 union { 2139 struct elf_note nhdr; 2140 uint32_t data[NOTE_DATA_SZ / sizeof(uint32_t)]; 2141 } note; 2142 2143 int n, off, datasz; 2144 bool have_prev_type; 2145 uint32_t prev_type; 2146 2147 /* Unless the arch requires properties, ignore them. */ 2148 if (!ARCH_USE_GNU_PROPERTY) { 2149 return true; 2150 } 2151 2152 /* If the properties are crazy large, that's too bad. */ 2153 n = phdr->p_filesz; 2154 if (n > sizeof(note)) { 2155 error_setg(errp, "PT_GNU_PROPERTY too large"); 2156 return false; 2157 } 2158 if (n < sizeof(note.nhdr)) { 2159 error_setg(errp, "PT_GNU_PROPERTY too small"); 2160 return false; 2161 } 2162 2163 if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) { 2164 return false; 2165 } 2166 2167 /* 2168 * The contents of a valid PT_GNU_PROPERTY is a sequence of uint32_t. 2169 * Swap most of them now, beyond the header and namesz. 2170 */ 2171 if (target_needs_bswap()) { 2172 for (int i = 4; i < n / 4; i++) { 2173 bswap32s(note.data + i); 2174 } 2175 } 2176 2177 /* 2178 * Note that nhdr is 3 words, and that the "name" described by namesz 2179 * immediately follows nhdr and is thus at the 4th word. Further, all 2180 * of the inputs to the kernel's round_up are multiples of 4. 2181 */ 2182 if (tswap32(note.nhdr.n_type) != NT_GNU_PROPERTY_TYPE_0 || 2183 tswap32(note.nhdr.n_namesz) != NOTE_NAME_SZ || 2184 note.data[3] != GNU0_MAGIC) { 2185 error_setg(errp, "Invalid note in PT_GNU_PROPERTY"); 2186 return false; 2187 } 2188 off = sizeof(note.nhdr) + NOTE_NAME_SZ; 2189 2190 datasz = tswap32(note.nhdr.n_descsz) + off; 2191 if (datasz > n) { 2192 error_setg(errp, "Invalid note size in PT_GNU_PROPERTY"); 2193 return false; 2194 } 2195 2196 have_prev_type = false; 2197 prev_type = 0; 2198 while (1) { 2199 if (off == datasz) { 2200 return true; /* end, exit ok */ 2201 } 2202 if (!parse_elf_property(note.data, &off, datasz, info, 2203 have_prev_type, &prev_type, errp)) { 2204 return false; 2205 } 2206 have_prev_type = true; 2207 } 2208 } 2209 2210 /** 2211 * load_elf_image: Load an ELF image into the address space. 2212 * @image_name: the filename of the image, to use in error messages. 2213 * @src: the ImageSource from which to read. 2214 * @info: info collected from the loaded image. 2215 * @ehdr: the ELF header, not yet bswapped. 2216 * @pinterp_name: record any PT_INTERP string found. 2217 * 2218 * On return: @info values will be filled in, as necessary or available. 2219 */ 2220 2221 static void load_elf_image(const char *image_name, const ImageSource *src, 2222 struct image_info *info, struct elfhdr *ehdr, 2223 char **pinterp_name) 2224 { 2225 g_autofree struct elf_phdr *phdr = NULL; 2226 abi_ulong load_addr, load_bias, loaddr, hiaddr, error, align; 2227 size_t reserve_size, align_size; 2228 int i, prot_exec; 2229 Error *err = NULL; 2230 2231 /* 2232 * First of all, some simple consistency checks. 2233 * Note that we rely on the bswapped ehdr staying in bprm_buf, 2234 * for later use by load_elf_binary and create_elf_tables. 2235 */ 2236 if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) { 2237 goto exit_errmsg; 2238 } 2239 if (!elf_check_ident(ehdr)) { 2240 error_setg(&err, "Invalid ELF image for this architecture"); 2241 goto exit_errmsg; 2242 } 2243 bswap_ehdr(ehdr); 2244 if (!elf_check_ehdr(ehdr)) { 2245 error_setg(&err, "Invalid ELF image for this architecture"); 2246 goto exit_errmsg; 2247 } 2248 2249 phdr = imgsrc_read_alloc(ehdr->e_phoff, 2250 ehdr->e_phnum * sizeof(struct elf_phdr), 2251 src, &err); 2252 if (phdr == NULL) { 2253 goto exit_errmsg; 2254 } 2255 bswap_phdr(phdr, ehdr->e_phnum); 2256 2257 info->nsegs = 0; 2258 info->pt_dynamic_addr = 0; 2259 2260 mmap_lock(); 2261 2262 /* 2263 * Find the maximum size of the image and allocate an appropriate 2264 * amount of memory to handle that. Locate the interpreter, if any. 2265 */ 2266 loaddr = -1, hiaddr = 0; 2267 align = 0; 2268 info->exec_stack = EXSTACK_DEFAULT; 2269 for (i = 0; i < ehdr->e_phnum; ++i) { 2270 struct elf_phdr *eppnt = phdr + i; 2271 if (eppnt->p_type == PT_LOAD) { 2272 abi_ulong a = eppnt->p_vaddr & TARGET_PAGE_MASK; 2273 if (a < loaddr) { 2274 loaddr = a; 2275 } 2276 a = eppnt->p_vaddr + eppnt->p_memsz - 1; 2277 if (a > hiaddr) { 2278 hiaddr = a; 2279 } 2280 ++info->nsegs; 2281 align |= eppnt->p_align; 2282 } else if (eppnt->p_type == PT_INTERP && pinterp_name) { 2283 g_autofree char *interp_name = NULL; 2284 2285 if (*pinterp_name) { 2286 error_setg(&err, "Multiple PT_INTERP entries"); 2287 goto exit_errmsg; 2288 } 2289 2290 interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz, 2291 src, &err); 2292 if (interp_name == NULL) { 2293 goto exit_errmsg; 2294 } 2295 if (interp_name[eppnt->p_filesz - 1] != 0) { 2296 error_setg(&err, "Invalid PT_INTERP entry"); 2297 goto exit_errmsg; 2298 } 2299 *pinterp_name = g_steal_pointer(&interp_name); 2300 } else if (eppnt->p_type == PT_GNU_PROPERTY) { 2301 if (!parse_elf_properties(src, info, eppnt, &err)) { 2302 goto exit_errmsg; 2303 } 2304 } else if (eppnt->p_type == PT_GNU_STACK) { 2305 info->exec_stack = eppnt->p_flags & PF_X; 2306 } 2307 } 2308 2309 load_addr = loaddr; 2310 2311 align = pow2ceil(align); 2312 2313 if (pinterp_name != NULL) { 2314 if (ehdr->e_type == ET_EXEC) { 2315 /* 2316 * Make sure that the low address does not conflict with 2317 * MMAP_MIN_ADDR or the QEMU application itself. 2318 */ 2319 probe_guest_base(image_name, loaddr, hiaddr); 2320 } else { 2321 /* 2322 * The binary is dynamic, but we still need to 2323 * select guest_base. In this case we pass a size. 2324 */ 2325 probe_guest_base(image_name, 0, hiaddr - loaddr); 2326 2327 /* 2328 * Avoid collision with the loader by providing a different 2329 * default load address. 2330 */ 2331 load_addr += elf_et_dyn_base; 2332 2333 /* 2334 * TODO: Better support for mmap alignment is desirable. 2335 * Since we do not have complete control over the guest 2336 * address space, we prefer the kernel to choose some address 2337 * rather than force the use of LOAD_ADDR via MAP_FIXED. 2338 */ 2339 if (align) { 2340 load_addr &= -align; 2341 } 2342 } 2343 } 2344 2345 /* 2346 * Reserve address space for all of this. 2347 * 2348 * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get 2349 * exactly the address range that is required. Without reserved_va, 2350 * the guest address space is not isolated. We have attempted to avoid 2351 * conflict with the host program itself via probe_guest_base, but using 2352 * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check. 2353 * 2354 * Otherwise this is ET_DYN, and we are searching for a location 2355 * that can hold the memory space required. If the image is 2356 * pre-linked, LOAD_ADDR will be non-zero, and the kernel should 2357 * honor that address if it happens to be free. 2358 * 2359 * In both cases, we will overwrite pages in this range with mappings 2360 * from the executable. 2361 */ 2362 reserve_size = (size_t)hiaddr - loaddr + 1; 2363 align_size = reserve_size; 2364 2365 if (ehdr->e_type != ET_EXEC && align > qemu_real_host_page_size()) { 2366 align_size += align - 1; 2367 } 2368 2369 load_addr = target_mmap(load_addr, align_size, PROT_NONE, 2370 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE | 2371 (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0), 2372 -1, 0); 2373 if (load_addr == -1) { 2374 goto exit_mmap; 2375 } 2376 2377 if (align_size != reserve_size) { 2378 abi_ulong align_addr = ROUND_UP(load_addr, align); 2379 abi_ulong align_end = TARGET_PAGE_ALIGN(align_addr + reserve_size); 2380 abi_ulong load_end = TARGET_PAGE_ALIGN(load_addr + align_size); 2381 2382 if (align_addr != load_addr) { 2383 target_munmap(load_addr, align_addr - load_addr); 2384 } 2385 if (align_end != load_end) { 2386 target_munmap(align_end, load_end - align_end); 2387 } 2388 load_addr = align_addr; 2389 } 2390 2391 load_bias = load_addr - loaddr; 2392 2393 if (elf_is_fdpic(ehdr)) { 2394 struct elf32_fdpic_loadseg *loadsegs = info->loadsegs = 2395 g_malloc(sizeof(*loadsegs) * info->nsegs); 2396 2397 for (i = 0; i < ehdr->e_phnum; ++i) { 2398 switch (phdr[i].p_type) { 2399 case PT_DYNAMIC: 2400 info->pt_dynamic_addr = phdr[i].p_vaddr + load_bias; 2401 break; 2402 case PT_LOAD: 2403 loadsegs->addr = phdr[i].p_vaddr + load_bias; 2404 loadsegs->p_vaddr = phdr[i].p_vaddr; 2405 loadsegs->p_memsz = phdr[i].p_memsz; 2406 ++loadsegs; 2407 break; 2408 } 2409 } 2410 } 2411 2412 info->load_bias = load_bias; 2413 info->code_offset = load_bias; 2414 info->data_offset = load_bias; 2415 info->load_addr = load_addr; 2416 info->entry = ehdr->e_entry + load_bias; 2417 info->start_code = -1; 2418 info->end_code = 0; 2419 info->start_data = -1; 2420 info->end_data = 0; 2421 /* Usual start for brk is after all sections of the main executable. */ 2422 info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias); 2423 info->elf_flags = ehdr->e_flags; 2424 2425 prot_exec = PROT_EXEC; 2426 #ifdef TARGET_AARCH64 2427 /* 2428 * If the BTI feature is present, this indicates that the executable 2429 * pages of the startup binary should be mapped with PROT_BTI, so that 2430 * branch targets are enforced. 2431 * 2432 * The startup binary is either the interpreter or the static executable. 2433 * The interpreter is responsible for all pages of a dynamic executable. 2434 * 2435 * Elf notes are backward compatible to older cpus. 2436 * Do not enable BTI unless it is supported. 2437 */ 2438 if ((info->note_flags & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 2439 && (pinterp_name == NULL || *pinterp_name == 0) 2440 && cpu_isar_feature(aa64_bti, ARM_CPU(thread_cpu))) { 2441 prot_exec |= TARGET_PROT_BTI; 2442 } 2443 #endif 2444 2445 for (i = 0; i < ehdr->e_phnum; i++) { 2446 struct elf_phdr *eppnt = phdr + i; 2447 if (eppnt->p_type == PT_LOAD) { 2448 abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em; 2449 int elf_prot = 0; 2450 2451 if (eppnt->p_flags & PF_R) { 2452 elf_prot |= PROT_READ; 2453 } 2454 if (eppnt->p_flags & PF_W) { 2455 elf_prot |= PROT_WRITE; 2456 } 2457 if (eppnt->p_flags & PF_X) { 2458 elf_prot |= prot_exec; 2459 } 2460 2461 vaddr = load_bias + eppnt->p_vaddr; 2462 vaddr_po = vaddr & ~TARGET_PAGE_MASK; 2463 vaddr_ps = vaddr & TARGET_PAGE_MASK; 2464 2465 vaddr_ef = vaddr + eppnt->p_filesz; 2466 vaddr_em = vaddr + eppnt->p_memsz; 2467 2468 /* 2469 * Some segments may be completely empty, with a non-zero p_memsz 2470 * but no backing file segment. 2471 */ 2472 if (eppnt->p_filesz != 0) { 2473 error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, 2474 elf_prot, MAP_PRIVATE | MAP_FIXED, 2475 src, eppnt->p_offset - vaddr_po); 2476 if (error == -1) { 2477 goto exit_mmap; 2478 } 2479 } 2480 2481 /* If the load segment requests extra zeros (e.g. bss), map it. */ 2482 if (vaddr_ef < vaddr_em && 2483 !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) { 2484 goto exit_errmsg; 2485 } 2486 2487 /* Find the full program boundaries. */ 2488 if (elf_prot & PROT_EXEC) { 2489 if (vaddr < info->start_code) { 2490 info->start_code = vaddr; 2491 } 2492 if (vaddr_ef > info->end_code) { 2493 info->end_code = vaddr_ef; 2494 } 2495 } 2496 if (elf_prot & PROT_WRITE) { 2497 if (vaddr < info->start_data) { 2498 info->start_data = vaddr; 2499 } 2500 if (vaddr_ef > info->end_data) { 2501 info->end_data = vaddr_ef; 2502 } 2503 } 2504 #ifdef TARGET_MIPS 2505 } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) { 2506 Mips_elf_abiflags_v0 abiflags; 2507 2508 if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags), 2509 src, &err)) { 2510 goto exit_errmsg; 2511 } 2512 bswap_mips_abiflags(&abiflags); 2513 info->fp_abi = abiflags.fp_abi; 2514 #endif 2515 } 2516 } 2517 2518 if (info->end_data == 0) { 2519 info->start_data = info->end_code; 2520 info->end_data = info->end_code; 2521 } 2522 2523 if (qemu_log_enabled()) { 2524 load_symbols(ehdr, src, load_bias); 2525 } 2526 2527 debuginfo_report_elf(image_name, src->fd, load_bias); 2528 2529 mmap_unlock(); 2530 2531 close(src->fd); 2532 return; 2533 2534 exit_mmap: 2535 error_setg_errno(&err, errno, "Error mapping file"); 2536 goto exit_errmsg; 2537 exit_errmsg: 2538 error_reportf_err(err, "%s: ", image_name); 2539 exit(-1); 2540 } 2541 2542 static void load_elf_interp(const char *filename, struct image_info *info, 2543 char bprm_buf[BPRM_BUF_SIZE]) 2544 { 2545 struct elfhdr ehdr; 2546 ImageSource src; 2547 int fd, retval; 2548 Error *err = NULL; 2549 2550 fd = open(path(filename), O_RDONLY); 2551 if (fd < 0) { 2552 error_setg_file_open(&err, errno, filename); 2553 error_report_err(err); 2554 exit(-1); 2555 } 2556 2557 retval = read(fd, bprm_buf, BPRM_BUF_SIZE); 2558 if (retval < 0) { 2559 error_setg_errno(&err, errno, "Error reading file header"); 2560 error_reportf_err(err, "%s: ", filename); 2561 exit(-1); 2562 } 2563 2564 src.fd = fd; 2565 src.cache = bprm_buf; 2566 src.cache_size = retval; 2567 2568 load_elf_image(filename, &src, info, &ehdr, NULL); 2569 } 2570 2571 #ifndef vdso_image_info 2572 #ifdef VDSO_HEADER 2573 #include VDSO_HEADER 2574 #define vdso_image_info(flags) &vdso_image_info 2575 #else 2576 #define vdso_image_info(flags) NULL 2577 #endif /* VDSO_HEADER */ 2578 #endif /* vdso_image_info */ 2579 2580 static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso) 2581 { 2582 ImageSource src; 2583 struct elfhdr ehdr; 2584 abi_ulong load_bias, load_addr; 2585 2586 src.fd = -1; 2587 src.cache = vdso->image; 2588 src.cache_size = vdso->image_size; 2589 2590 load_elf_image("<internal-vdso>", &src, info, &ehdr, NULL); 2591 load_addr = info->load_addr; 2592 load_bias = info->load_bias; 2593 2594 /* 2595 * We need to relocate the VDSO image. The one built into the kernel 2596 * is built for a fixed address. The one built for QEMU is not, since 2597 * that requires close control of the guest address space. 2598 * We pre-processed the image to locate all of the addresses that need 2599 * to be updated. 2600 */ 2601 for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) { 2602 abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]); 2603 *addr = tswapal(tswapal(*addr) + load_bias); 2604 } 2605 2606 /* Install signal trampolines, if present. */ 2607 if (vdso->sigreturn_ofs) { 2608 default_sigreturn = load_addr + vdso->sigreturn_ofs; 2609 } 2610 if (vdso->rt_sigreturn_ofs) { 2611 default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs; 2612 } 2613 2614 /* Remove write from VDSO segment. */ 2615 target_mprotect(info->start_data, info->end_data - info->start_data, 2616 PROT_READ | PROT_EXEC); 2617 } 2618 2619 static int symfind(const void *s0, const void *s1) 2620 { 2621 struct elf_sym *sym = (struct elf_sym *)s1; 2622 __typeof(sym->st_value) addr = *(uint64_t *)s0; 2623 int result = 0; 2624 2625 if (addr < sym->st_value) { 2626 result = -1; 2627 } else if (addr >= sym->st_value + sym->st_size) { 2628 result = 1; 2629 } 2630 return result; 2631 } 2632 2633 static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) 2634 { 2635 #if ELF_CLASS == ELFCLASS32 2636 struct elf_sym *syms = s->disas_symtab.elf32; 2637 #else 2638 struct elf_sym *syms = s->disas_symtab.elf64; 2639 #endif 2640 2641 // binary search 2642 struct elf_sym *sym; 2643 2644 sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), symfind); 2645 if (sym != NULL) { 2646 return s->disas_strtab + sym->st_name; 2647 } 2648 2649 return ""; 2650 } 2651 2652 /* FIXME: This should use elf_ops.h.inc */ 2653 static int symcmp(const void *s0, const void *s1) 2654 { 2655 struct elf_sym *sym0 = (struct elf_sym *)s0; 2656 struct elf_sym *sym1 = (struct elf_sym *)s1; 2657 return (sym0->st_value < sym1->st_value) 2658 ? -1 2659 : ((sym0->st_value > sym1->st_value) ? 1 : 0); 2660 } 2661 2662 /* Best attempt to load symbols from this ELF object. */ 2663 static void load_symbols(struct elfhdr *hdr, const ImageSource *src, 2664 abi_ulong load_bias) 2665 { 2666 int i, shnum, nsyms, sym_idx = 0, str_idx = 0; 2667 g_autofree struct elf_shdr *shdr = NULL; 2668 char *strings = NULL; 2669 struct elf_sym *syms = NULL; 2670 struct elf_sym *new_syms; 2671 uint64_t segsz; 2672 2673 shnum = hdr->e_shnum; 2674 shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr), 2675 src, NULL); 2676 if (shdr == NULL) { 2677 return; 2678 } 2679 2680 bswap_shdr(shdr, shnum); 2681 for (i = 0; i < shnum; ++i) { 2682 if (shdr[i].sh_type == SHT_SYMTAB) { 2683 sym_idx = i; 2684 str_idx = shdr[i].sh_link; 2685 goto found; 2686 } 2687 } 2688 2689 /* There will be no symbol table if the file was stripped. */ 2690 return; 2691 2692 found: 2693 /* Now know where the strtab and symtab are. Snarf them. */ 2694 2695 segsz = shdr[str_idx].sh_size; 2696 strings = g_try_malloc(segsz); 2697 if (!strings) { 2698 goto give_up; 2699 } 2700 if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) { 2701 goto give_up; 2702 } 2703 2704 segsz = shdr[sym_idx].sh_size; 2705 if (segsz / sizeof(struct elf_sym) > INT_MAX) { 2706 /* 2707 * Implausibly large symbol table: give up rather than ploughing 2708 * on with the number of symbols calculation overflowing. 2709 */ 2710 goto give_up; 2711 } 2712 nsyms = segsz / sizeof(struct elf_sym); 2713 syms = g_try_malloc(segsz); 2714 if (!syms) { 2715 goto give_up; 2716 } 2717 if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) { 2718 goto give_up; 2719 } 2720 2721 for (i = 0; i < nsyms; ) { 2722 bswap_sym(syms + i); 2723 /* Throw away entries which we do not need. */ 2724 if (syms[i].st_shndx == SHN_UNDEF 2725 || syms[i].st_shndx >= SHN_LORESERVE 2726 || ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) { 2727 if (i < --nsyms) { 2728 syms[i] = syms[nsyms]; 2729 } 2730 } else { 2731 #if defined(TARGET_ARM) || defined (TARGET_MIPS) 2732 /* The bottom address bit marks a Thumb or MIPS16 symbol. */ 2733 syms[i].st_value &= ~(target_ulong)1; 2734 #endif 2735 syms[i].st_value += load_bias; 2736 i++; 2737 } 2738 } 2739 2740 /* No "useful" symbol. */ 2741 if (nsyms == 0) { 2742 goto give_up; 2743 } 2744 2745 /* 2746 * Attempt to free the storage associated with the local symbols 2747 * that we threw away. Whether or not this has any effect on the 2748 * memory allocation depends on the malloc implementation and how 2749 * many symbols we managed to discard. 2750 */ 2751 new_syms = g_try_renew(struct elf_sym, syms, nsyms); 2752 if (new_syms == NULL) { 2753 goto give_up; 2754 } 2755 syms = new_syms; 2756 2757 qsort(syms, nsyms, sizeof(*syms), symcmp); 2758 2759 { 2760 struct syminfo *s = g_new(struct syminfo, 1); 2761 2762 s->disas_strtab = strings; 2763 s->disas_num_syms = nsyms; 2764 #if ELF_CLASS == ELFCLASS32 2765 s->disas_symtab.elf32 = syms; 2766 #else 2767 s->disas_symtab.elf64 = syms; 2768 #endif 2769 s->lookup_symbol = lookup_symbolxx; 2770 s->next = syminfos; 2771 syminfos = s; 2772 } 2773 return; 2774 2775 give_up: 2776 g_free(strings); 2777 g_free(syms); 2778 } 2779 2780 uint32_t get_elf_eflags(int fd) 2781 { 2782 struct elfhdr ehdr; 2783 off_t offset; 2784 int ret; 2785 2786 /* Read ELF header */ 2787 offset = lseek(fd, 0, SEEK_SET); 2788 if (offset == (off_t) -1) { 2789 return 0; 2790 } 2791 ret = read(fd, &ehdr, sizeof(ehdr)); 2792 if (ret < sizeof(ehdr)) { 2793 return 0; 2794 } 2795 offset = lseek(fd, offset, SEEK_SET); 2796 if (offset == (off_t) -1) { 2797 return 0; 2798 } 2799 2800 /* Check ELF signature */ 2801 if (!elf_check_ident(&ehdr)) { 2802 return 0; 2803 } 2804 2805 /* check header */ 2806 bswap_ehdr(&ehdr); 2807 if (!elf_check_ehdr(&ehdr)) { 2808 return 0; 2809 } 2810 2811 /* return architecture id */ 2812 return ehdr.e_flags; 2813 } 2814 2815 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) 2816 { 2817 /* 2818 * We need a copy of the elf header for passing to create_elf_tables. 2819 * We will have overwritten the original when we re-use bprm->buf 2820 * while loading the interpreter. Allocate the storage for this now 2821 * and let elf_load_image do any swapping that may be required. 2822 */ 2823 struct elfhdr ehdr; 2824 struct image_info interp_info, vdso_info; 2825 char *elf_interpreter = NULL; 2826 char *scratch; 2827 2828 memset(&interp_info, 0, sizeof(interp_info)); 2829 #ifdef TARGET_MIPS 2830 interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN; 2831 #endif 2832 2833 load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter); 2834 2835 /* Do this so that we can load the interpreter, if need be. We will 2836 change some of these later */ 2837 bprm->p = setup_arg_pages(bprm, info); 2838 2839 scratch = g_new0(char, TARGET_PAGE_SIZE); 2840 if (STACK_GROWS_DOWN) { 2841 bprm->p = copy_elf_strings(1, &bprm->filename, scratch, 2842 bprm->p, info->stack_limit); 2843 info->file_string = bprm->p; 2844 bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch, 2845 bprm->p, info->stack_limit); 2846 info->env_strings = bprm->p; 2847 bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch, 2848 bprm->p, info->stack_limit); 2849 info->arg_strings = bprm->p; 2850 } else { 2851 info->arg_strings = bprm->p; 2852 bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch, 2853 bprm->p, info->stack_limit); 2854 info->env_strings = bprm->p; 2855 bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch, 2856 bprm->p, info->stack_limit); 2857 info->file_string = bprm->p; 2858 bprm->p = copy_elf_strings(1, &bprm->filename, scratch, 2859 bprm->p, info->stack_limit); 2860 } 2861 2862 g_free(scratch); 2863 2864 if (!bprm->p) { 2865 fprintf(stderr, "%s: %s\n", bprm->filename, strerror(E2BIG)); 2866 exit(-1); 2867 } 2868 2869 if (elf_interpreter) { 2870 load_elf_interp(elf_interpreter, &interp_info, bprm->buf); 2871 2872 /* 2873 * While unusual because of ELF_ET_DYN_BASE, if we are unlucky 2874 * with the mappings the interpreter can be loaded above but 2875 * near the main executable, which can leave very little room 2876 * for the heap. 2877 * If the current brk has less than 16MB, use the end of the 2878 * interpreter. 2879 */ 2880 if (interp_info.brk > info->brk && 2881 interp_info.load_bias - info->brk < 16 * MiB) { 2882 info->brk = interp_info.brk; 2883 } 2884 2885 /* If the program interpreter is one of these two, then assume 2886 an iBCS2 image. Otherwise assume a native linux image. */ 2887 2888 if (strcmp(elf_interpreter, "/usr/lib/libc.so.1") == 0 2889 || strcmp(elf_interpreter, "/usr/lib/ld.so.1") == 0) { 2890 info->personality = PER_SVR4; 2891 2892 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 2893 and some applications "depend" upon this behavior. Since 2894 we do not have the power to recompile these, we emulate 2895 the SVr4 behavior. Sigh. */ 2896 target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC, 2897 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, 2898 -1, 0); 2899 } 2900 #ifdef TARGET_MIPS 2901 info->interp_fp_abi = interp_info.fp_abi; 2902 #endif 2903 } 2904 2905 /* 2906 * Load a vdso if available, which will amongst other things contain the 2907 * signal trampolines. Otherwise, allocate a separate page for them. 2908 */ 2909 const VdsoImageInfo *vdso = vdso_image_info(info->elf_flags); 2910 if (vdso) { 2911 load_elf_vdso(&vdso_info, vdso); 2912 info->vdso = vdso_info.load_bias; 2913 } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { 2914 abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE, 2915 PROT_READ | PROT_WRITE, 2916 MAP_PRIVATE | MAP_ANON, -1, 0); 2917 if (tramp_page == -1) { 2918 return -errno; 2919 } 2920 2921 setup_sigtramp(tramp_page); 2922 target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC); 2923 } 2924 2925 bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info, 2926 elf_interpreter ? &interp_info : NULL, 2927 vdso ? &vdso_info : NULL); 2928 info->start_stack = bprm->p; 2929 2930 /* If we have an interpreter, set that as the program's entry point. 2931 Copy the load_bias as well, to help PPC64 interpret the entry 2932 point as a function descriptor. Do this after creating elf tables 2933 so that we copy the original program entry point into the AUXV. */ 2934 if (elf_interpreter) { 2935 info->load_bias = interp_info.load_bias; 2936 info->entry = interp_info.entry; 2937 g_free(elf_interpreter); 2938 } 2939 2940 #ifdef USE_ELF_CORE_DUMP 2941 bprm->core_dump = &elf_core_dump; 2942 #endif 2943 2944 return 0; 2945 } 2946 2947 #ifdef USE_ELF_CORE_DUMP 2948 2949 /* 2950 * Definitions to generate Intel SVR4-like core files. 2951 * These mostly have the same names as the SVR4 types with "target_elf_" 2952 * tacked on the front to prevent clashes with linux definitions, 2953 * and the typedef forms have been avoided. This is mostly like 2954 * the SVR4 structure, but more Linuxy, with things that Linux does 2955 * not support and which gdb doesn't really use excluded. 2956 * 2957 * Fields we don't dump (their contents is zero) in linux-user qemu 2958 * are marked with XXX. 2959 * 2960 * Core dump code is copied from linux kernel (fs/binfmt_elf.c). 2961 * 2962 * Porting ELF coredump for target is (quite) simple process. First you 2963 * define USE_ELF_CORE_DUMP in target ELF code (where init_thread() for 2964 * the target resides): 2965 * 2966 * #define USE_ELF_CORE_DUMP 2967 * 2968 * Next you define type of register set used for dumping. ELF specification 2969 * says that it needs to be array of elf_greg_t that has size of ELF_NREG. 2970 * 2971 * typedef <target_regtype> target_elf_greg_t; 2972 * #define ELF_NREG <number of registers> 2973 * typedef taret_elf_greg_t target_elf_gregset_t[ELF_NREG]; 2974 * 2975 * Last step is to implement target specific function that copies registers 2976 * from given cpu into just specified register set. Prototype is: 2977 * 2978 * static void elf_core_copy_regs(taret_elf_gregset_t *regs, 2979 * const CPUArchState *env); 2980 * 2981 * Parameters: 2982 * regs - copy register values into here (allocated and zeroed by caller) 2983 * env - copy registers from here 2984 * 2985 * Example for ARM target is provided in this file. 2986 */ 2987 2988 struct target_elf_siginfo { 2989 abi_int si_signo; /* signal number */ 2990 abi_int si_code; /* extra code */ 2991 abi_int si_errno; /* errno */ 2992 }; 2993 2994 struct target_elf_prstatus { 2995 struct target_elf_siginfo pr_info; /* Info associated with signal */ 2996 abi_short pr_cursig; /* Current signal */ 2997 abi_ulong pr_sigpend; /* XXX */ 2998 abi_ulong pr_sighold; /* XXX */ 2999 target_pid_t pr_pid; 3000 target_pid_t pr_ppid; 3001 target_pid_t pr_pgrp; 3002 target_pid_t pr_sid; 3003 struct target_timeval pr_utime; /* XXX User time */ 3004 struct target_timeval pr_stime; /* XXX System time */ 3005 struct target_timeval pr_cutime; /* XXX Cumulative user time */ 3006 struct target_timeval pr_cstime; /* XXX Cumulative system time */ 3007 target_elf_gregset_t pr_reg; /* GP registers */ 3008 abi_int pr_fpvalid; /* XXX */ 3009 }; 3010 3011 #define ELF_PRARGSZ (80) /* Number of chars for args */ 3012 3013 struct target_elf_prpsinfo { 3014 char pr_state; /* numeric process state */ 3015 char pr_sname; /* char for pr_state */ 3016 char pr_zomb; /* zombie */ 3017 char pr_nice; /* nice val */ 3018 abi_ulong pr_flag; /* flags */ 3019 target_uid_t pr_uid; 3020 target_gid_t pr_gid; 3021 target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; 3022 /* Lots missing */ 3023 char pr_fname[16] QEMU_NONSTRING; /* filename of executable */ 3024 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ 3025 }; 3026 3027 static void bswap_prstatus(struct target_elf_prstatus *prstatus) 3028 { 3029 if (!target_needs_bswap()) { 3030 return; 3031 } 3032 3033 prstatus->pr_info.si_signo = tswap32(prstatus->pr_info.si_signo); 3034 prstatus->pr_info.si_code = tswap32(prstatus->pr_info.si_code); 3035 prstatus->pr_info.si_errno = tswap32(prstatus->pr_info.si_errno); 3036 prstatus->pr_cursig = tswap16(prstatus->pr_cursig); 3037 prstatus->pr_sigpend = tswapal(prstatus->pr_sigpend); 3038 prstatus->pr_sighold = tswapal(prstatus->pr_sighold); 3039 prstatus->pr_pid = tswap32(prstatus->pr_pid); 3040 prstatus->pr_ppid = tswap32(prstatus->pr_ppid); 3041 prstatus->pr_pgrp = tswap32(prstatus->pr_pgrp); 3042 prstatus->pr_sid = tswap32(prstatus->pr_sid); 3043 /* cpu times are not filled, so we skip them */ 3044 /* regs should be in correct format already */ 3045 prstatus->pr_fpvalid = tswap32(prstatus->pr_fpvalid); 3046 } 3047 3048 static void bswap_psinfo(struct target_elf_prpsinfo *psinfo) 3049 { 3050 if (!target_needs_bswap()) { 3051 return; 3052 } 3053 3054 psinfo->pr_flag = tswapal(psinfo->pr_flag); 3055 psinfo->pr_uid = tswap16(psinfo->pr_uid); 3056 psinfo->pr_gid = tswap16(psinfo->pr_gid); 3057 psinfo->pr_pid = tswap32(psinfo->pr_pid); 3058 psinfo->pr_ppid = tswap32(psinfo->pr_ppid); 3059 psinfo->pr_pgrp = tswap32(psinfo->pr_pgrp); 3060 psinfo->pr_sid = tswap32(psinfo->pr_sid); 3061 } 3062 3063 static void bswap_note(struct elf_note *en) 3064 { 3065 if (!target_needs_bswap()) { 3066 return; 3067 } 3068 3069 bswap32s(&en->n_namesz); 3070 bswap32s(&en->n_descsz); 3071 bswap32s(&en->n_type); 3072 } 3073 3074 /* 3075 * Calculate file (dump) size of given memory region. 3076 */ 3077 static size_t vma_dump_size(vaddr start, vaddr end, int flags) 3078 { 3079 /* The area must be readable. */ 3080 if (!(flags & PAGE_READ)) { 3081 return 0; 3082 } 3083 3084 /* 3085 * Usually we don't dump executable pages as they contain 3086 * non-writable code that debugger can read directly from 3087 * target library etc. If there is no elf header, we dump it. 3088 */ 3089 if (!(flags & PAGE_WRITE_ORG) && 3090 (flags & PAGE_EXEC) && 3091 memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) { 3092 return 0; 3093 } 3094 3095 return end - start; 3096 } 3097 3098 static size_t size_note(const char *name, size_t datasz) 3099 { 3100 size_t namesz = strlen(name) + 1; 3101 3102 namesz = ROUND_UP(namesz, 4); 3103 datasz = ROUND_UP(datasz, 4); 3104 3105 return sizeof(struct elf_note) + namesz + datasz; 3106 } 3107 3108 static void *fill_note(void **pptr, int type, const char *name, size_t datasz) 3109 { 3110 void *ptr = *pptr; 3111 struct elf_note *n = ptr; 3112 size_t namesz = strlen(name) + 1; 3113 3114 n->n_namesz = namesz; 3115 n->n_descsz = datasz; 3116 n->n_type = type; 3117 bswap_note(n); 3118 3119 ptr += sizeof(*n); 3120 memcpy(ptr, name, namesz); 3121 3122 namesz = ROUND_UP(namesz, 4); 3123 datasz = ROUND_UP(datasz, 4); 3124 3125 *pptr = ptr + namesz + datasz; 3126 return ptr + namesz; 3127 } 3128 3129 static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, 3130 uint32_t flags) 3131 { 3132 memcpy(elf->e_ident, ELFMAG, SELFMAG); 3133 3134 elf->e_ident[EI_CLASS] = ELF_CLASS; 3135 elf->e_ident[EI_DATA] = ELF_DATA; 3136 elf->e_ident[EI_VERSION] = EV_CURRENT; 3137 elf->e_ident[EI_OSABI] = ELF_OSABI; 3138 3139 elf->e_type = ET_CORE; 3140 elf->e_machine = machine; 3141 elf->e_version = EV_CURRENT; 3142 elf->e_phoff = sizeof(struct elfhdr); 3143 elf->e_flags = flags; 3144 elf->e_ehsize = sizeof(struct elfhdr); 3145 elf->e_phentsize = sizeof(struct elf_phdr); 3146 elf->e_phnum = segs; 3147 3148 bswap_ehdr(elf); 3149 } 3150 3151 static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset) 3152 { 3153 phdr->p_type = PT_NOTE; 3154 phdr->p_offset = offset; 3155 phdr->p_filesz = sz; 3156 3157 bswap_phdr(phdr, 1); 3158 } 3159 3160 static void fill_prstatus_note(void *data, CPUState *cpu, int signr) 3161 { 3162 /* 3163 * Because note memory is only aligned to 4, and target_elf_prstatus 3164 * may well have higher alignment requirements, fill locally and 3165 * memcpy to the destination afterward. 3166 */ 3167 struct target_elf_prstatus prstatus = { 3168 .pr_info.si_signo = signr, 3169 .pr_cursig = signr, 3170 .pr_pid = get_task_state(cpu)->ts_tid, 3171 .pr_ppid = getppid(), 3172 .pr_pgrp = getpgrp(), 3173 .pr_sid = getsid(0), 3174 }; 3175 3176 elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu)); 3177 bswap_prstatus(&prstatus); 3178 memcpy(data, &prstatus, sizeof(prstatus)); 3179 } 3180 3181 static void fill_prpsinfo_note(void *data, const TaskState *ts) 3182 { 3183 /* 3184 * Because note memory is only aligned to 4, and target_elf_prpsinfo 3185 * may well have higher alignment requirements, fill locally and 3186 * memcpy to the destination afterward. 3187 */ 3188 struct target_elf_prpsinfo psinfo = { 3189 .pr_pid = getpid(), 3190 .pr_ppid = getppid(), 3191 .pr_pgrp = getpgrp(), 3192 .pr_sid = getsid(0), 3193 .pr_uid = getuid(), 3194 .pr_gid = getgid(), 3195 }; 3196 char *base_filename; 3197 size_t len; 3198 3199 len = ts->info->env_strings - ts->info->arg_strings; 3200 len = MIN(len, ELF_PRARGSZ); 3201 memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len); 3202 for (size_t i = 0; i < len; i++) { 3203 if (psinfo.pr_psargs[i] == 0) { 3204 psinfo.pr_psargs[i] = ' '; 3205 } 3206 } 3207 3208 base_filename = g_path_get_basename(ts->bprm->filename); 3209 /* 3210 * Using strncpy here is fine: at max-length, 3211 * this field is not NUL-terminated. 3212 */ 3213 strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname)); 3214 g_free(base_filename); 3215 3216 bswap_psinfo(&psinfo); 3217 memcpy(data, &psinfo, sizeof(psinfo)); 3218 } 3219 3220 static void fill_auxv_note(void *data, const TaskState *ts) 3221 { 3222 memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len); 3223 } 3224 3225 /* 3226 * Constructs name of coredump file. We have following convention 3227 * for the name: 3228 * qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core 3229 * 3230 * Returns the filename 3231 */ 3232 static char *core_dump_filename(const TaskState *ts) 3233 { 3234 g_autoptr(GDateTime) now = g_date_time_new_now_local(); 3235 g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S"); 3236 g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename); 3237 3238 return g_strdup_printf("qemu_%s_%s_%d.core", 3239 base_filename, nowstr, (int)getpid()); 3240 } 3241 3242 static int dump_write(int fd, const void *ptr, size_t size) 3243 { 3244 const char *bufp = (const char *)ptr; 3245 ssize_t bytes_written, bytes_left; 3246 3247 bytes_written = 0; 3248 bytes_left = size; 3249 3250 /* 3251 * In normal conditions, single write(2) should do but 3252 * in case of socket etc. this mechanism is more portable. 3253 */ 3254 do { 3255 bytes_written = write(fd, bufp, bytes_left); 3256 if (bytes_written < 0) { 3257 if (errno == EINTR) 3258 continue; 3259 return (-1); 3260 } else if (bytes_written == 0) { /* eof */ 3261 return (-1); 3262 } 3263 bufp += bytes_written; 3264 bytes_left -= bytes_written; 3265 } while (bytes_left > 0); 3266 3267 return (0); 3268 } 3269 3270 static int wmr_page_unprotect_regions(void *opaque, vaddr start, 3271 vaddr end, int flags) 3272 { 3273 if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) { 3274 size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size()); 3275 3276 while (1) { 3277 page_unprotect(NULL, start, 0); 3278 if (end - start <= step) { 3279 break; 3280 } 3281 start += step; 3282 } 3283 } 3284 return 0; 3285 } 3286 3287 typedef struct { 3288 unsigned count; 3289 size_t size; 3290 } CountAndSizeRegions; 3291 3292 static int wmr_count_and_size_regions(void *opaque, vaddr start, 3293 vaddr end, int flags) 3294 { 3295 CountAndSizeRegions *css = opaque; 3296 3297 css->count++; 3298 css->size += vma_dump_size(start, end, flags); 3299 return 0; 3300 } 3301 3302 typedef struct { 3303 struct elf_phdr *phdr; 3304 off_t offset; 3305 } FillRegionPhdr; 3306 3307 static int wmr_fill_region_phdr(void *opaque, vaddr start, 3308 vaddr end, int flags) 3309 { 3310 FillRegionPhdr *d = opaque; 3311 struct elf_phdr *phdr = d->phdr; 3312 3313 phdr->p_type = PT_LOAD; 3314 phdr->p_vaddr = start; 3315 phdr->p_paddr = 0; 3316 phdr->p_filesz = vma_dump_size(start, end, flags); 3317 phdr->p_offset = d->offset; 3318 d->offset += phdr->p_filesz; 3319 phdr->p_memsz = end - start; 3320 phdr->p_flags = (flags & PAGE_READ ? PF_R : 0) 3321 | (flags & PAGE_WRITE_ORG ? PF_W : 0) 3322 | (flags & PAGE_EXEC ? PF_X : 0); 3323 phdr->p_align = ELF_EXEC_PAGESIZE; 3324 3325 bswap_phdr(phdr, 1); 3326 d->phdr = phdr + 1; 3327 return 0; 3328 } 3329 3330 static int wmr_write_region(void *opaque, vaddr start, 3331 vaddr end, int flags) 3332 { 3333 int fd = *(int *)opaque; 3334 size_t size = vma_dump_size(start, end, flags); 3335 3336 if (!size) { 3337 return 0; 3338 } 3339 return dump_write(fd, g2h_untagged(start), size); 3340 } 3341 3342 /* 3343 * Write out ELF coredump. 3344 * 3345 * See documentation of ELF object file format in: 3346 * http://www.caldera.com/developers/devspecs/gabi41.pdf 3347 * 3348 * Coredump format in linux is following: 3349 * 3350 * 0 +----------------------+ \ 3351 * | ELF header | ET_CORE | 3352 * +----------------------+ | 3353 * | ELF program headers | |--- headers 3354 * | - NOTE section | | 3355 * | - PT_LOAD sections | | 3356 * +----------------------+ / 3357 * | NOTEs: | 3358 * | - NT_PRSTATUS | 3359 * | - NT_PRSINFO | 3360 * | - NT_AUXV | 3361 * +----------------------+ <-- aligned to target page 3362 * | Process memory dump | 3363 * : : 3364 * . . 3365 * : : 3366 * | | 3367 * +----------------------+ 3368 * 3369 * NT_PRSTATUS -> struct elf_prstatus (per thread) 3370 * NT_PRSINFO -> struct elf_prpsinfo 3371 * NT_AUXV is array of { type, value } pairs (see fill_auxv_note()). 3372 * 3373 * Format follows System V format as close as possible. Current 3374 * version limitations are as follows: 3375 * - no floating point registers are dumped 3376 * 3377 * Function returns 0 in case of success, negative errno otherwise. 3378 * 3379 * TODO: make this work also during runtime: it should be 3380 * possible to force coredump from running process and then 3381 * continue processing. For example qemu could set up SIGUSR2 3382 * handler (provided that target process haven't registered 3383 * handler for that) that does the dump when signal is received. 3384 */ 3385 static int elf_core_dump(int signr, const CPUArchState *env) 3386 { 3387 const CPUState *cpu = env_cpu_const(env); 3388 const TaskState *ts = (const TaskState *)get_task_state((CPUState *)cpu); 3389 struct rlimit dumpsize; 3390 CountAndSizeRegions css; 3391 off_t offset, note_offset, data_offset; 3392 size_t note_size; 3393 int cpus, ret; 3394 int fd = -1; 3395 CPUState *cpu_iter; 3396 3397 if (prctl(PR_GET_DUMPABLE) == 0) { 3398 return 0; 3399 } 3400 3401 if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) { 3402 return 0; 3403 } 3404 3405 cpu_list_lock(); 3406 mmap_lock(); 3407 3408 /* By unprotecting, we merge vmas that might be split. */ 3409 walk_memory_regions(NULL, wmr_page_unprotect_regions); 3410 3411 /* 3412 * Walk through target process memory mappings and 3413 * set up structure containing this information. 3414 */ 3415 memset(&css, 0, sizeof(css)); 3416 walk_memory_regions(&css, wmr_count_and_size_regions); 3417 3418 cpus = 0; 3419 CPU_FOREACH(cpu_iter) { 3420 cpus++; 3421 } 3422 3423 offset = sizeof(struct elfhdr); 3424 offset += (css.count + 1) * sizeof(struct elf_phdr); 3425 note_offset = offset; 3426 3427 offset += size_note("CORE", ts->info->auxv_len); 3428 offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); 3429 offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; 3430 note_size = offset - note_offset; 3431 data_offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); 3432 3433 /* Do not dump if the corefile size exceeds the limit. */ 3434 if (dumpsize.rlim_cur != RLIM_INFINITY 3435 && dumpsize.rlim_cur < data_offset + css.size) { 3436 errno = 0; 3437 goto out; 3438 } 3439 3440 { 3441 g_autofree char *corefile = core_dump_filename(ts); 3442 fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 3443 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 3444 } 3445 if (fd < 0) { 3446 goto out; 3447 } 3448 3449 /* 3450 * There is a fair amount of alignment padding within the notes 3451 * as well as preceeding the process memory. Allocate a zeroed 3452 * block to hold it all. Write all of the headers directly into 3453 * this buffer and then write it out as a block. 3454 */ 3455 { 3456 g_autofree void *header = g_malloc0(data_offset); 3457 FillRegionPhdr frp; 3458 void *hptr, *dptr; 3459 3460 /* Create elf file header. */ 3461 hptr = header; 3462 fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0); 3463 hptr += sizeof(struct elfhdr); 3464 3465 /* Create elf program headers. */ 3466 fill_elf_note_phdr(hptr, note_size, note_offset); 3467 hptr += sizeof(struct elf_phdr); 3468 3469 frp.phdr = hptr; 3470 frp.offset = data_offset; 3471 walk_memory_regions(&frp, wmr_fill_region_phdr); 3472 hptr = frp.phdr; 3473 3474 /* Create the notes. */ 3475 dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len); 3476 fill_auxv_note(dptr, ts); 3477 3478 dptr = fill_note(&hptr, NT_PRPSINFO, "CORE", 3479 sizeof(struct target_elf_prpsinfo)); 3480 fill_prpsinfo_note(dptr, ts); 3481 3482 CPU_FOREACH(cpu_iter) { 3483 dptr = fill_note(&hptr, NT_PRSTATUS, "CORE", 3484 sizeof(struct target_elf_prstatus)); 3485 fill_prstatus_note(dptr, cpu_iter, cpu_iter == cpu ? signr : 0); 3486 } 3487 3488 if (dump_write(fd, header, data_offset) < 0) { 3489 goto out; 3490 } 3491 } 3492 3493 /* 3494 * Finally write process memory into the corefile as well. 3495 */ 3496 if (walk_memory_regions(&fd, wmr_write_region) < 0) { 3497 goto out; 3498 } 3499 errno = 0; 3500 3501 out: 3502 ret = -errno; 3503 mmap_unlock(); 3504 cpu_list_unlock(); 3505 if (fd >= 0) { 3506 close(fd); 3507 } 3508 return ret; 3509 } 3510 #endif /* USE_ELF_CORE_DUMP */ 3511 3512 void do_init_main_thread(CPUState *cs, struct image_info *infop) 3513 { 3514 #ifdef HAVE_INIT_MAIN_THREAD 3515 init_main_thread(cs, infop); 3516 #else 3517 target_pt_regs regs = { }; 3518 3519 init_thread(®s, infop); 3520 target_cpu_copy_regs(cpu_env(cs), ®s); 3521 #endif 3522 } 3523