1 /* This is the Linux kernel elf-loading code, ported into user space */
2 #include "qemu/osdep.h"
3 #include <sys/param.h>
4
5 #include <sys/prctl.h>
6 #include <sys/resource.h>
7 #include <sys/shm.h>
8
9 #include "qemu.h"
10 #include "user/tswap-target.h"
11 #include "user/page-protection.h"
12 #include "exec/page-protection.h"
13 #include "exec/mmap-lock.h"
14 #include "exec/translation-block.h"
15 #include "exec/tswap.h"
16 #include "user/guest-base.h"
17 #include "user-internals.h"
18 #include "signal-common.h"
19 #include "loader.h"
20 #include "user-mmap.h"
21 #include "disas/disas.h"
22 #include "qemu/bitops.h"
23 #include "qemu/path.h"
24 #include "qemu/queue.h"
25 #include "qemu/guest-random.h"
26 #include "qemu/units.h"
27 #include "qemu/selfmap.h"
28 #include "qemu/lockable.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "target_elf.h"
32 #include "target_signal.h"
33 #include "tcg/debuginfo.h"
34
35 #ifdef TARGET_ARM
36 #include "target/arm/cpu-features.h"
37 #endif
38
39 #ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE
40 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
41 #endif
42
43 #define ELF_OSABI ELFOSABI_SYSV
44
45 /* from personality.h */
46
47 /*
48 * Flags for bug emulation.
49 *
50 * These occupy the top three bytes.
51 */
52 enum {
53 ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization of VA space */
54 FDPIC_FUNCPTRS = 0x0080000, /* userspace function ptrs point to
55 descriptors (signal handling) */
56 MMAP_PAGE_ZERO = 0x0100000,
57 ADDR_COMPAT_LAYOUT = 0x0200000,
58 READ_IMPLIES_EXEC = 0x0400000,
59 ADDR_LIMIT_32BIT = 0x0800000,
60 SHORT_INODE = 0x1000000,
61 WHOLE_SECONDS = 0x2000000,
62 STICKY_TIMEOUTS = 0x4000000,
63 ADDR_LIMIT_3GB = 0x8000000,
64 };
65
66 /*
67 * Personality types.
68 *
69 * These go in the low byte. Avoid using the top bit, it will
70 * conflict with error returns.
71 */
72 enum {
73 PER_LINUX = 0x0000,
74 PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
75 PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
76 PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
77 PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
78 PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE,
79 PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
80 PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
81 PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
82 PER_BSD = 0x0006,
83 PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
84 PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
85 PER_LINUX32 = 0x0008,
86 PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
87 PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */
88 PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */
89 PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */
90 PER_RISCOS = 0x000c,
91 PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
92 PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
93 PER_OSF4 = 0x000f, /* OSF/1 v4 */
94 PER_HPUX = 0x0010,
95 PER_MASK = 0x00ff,
96 };
97
98 /*
99 * Return the base personality without flags.
100 */
101 #define personality(pers) (pers & PER_MASK)
102
info_is_fdpic(struct image_info * info)103 int info_is_fdpic(struct image_info *info)
104 {
105 return info->personality == PER_LINUX_FDPIC;
106 }
107
108 #if TARGET_BIG_ENDIAN
109 #define ELF_DATA ELFDATA2MSB
110 #else
111 #define ELF_DATA ELFDATA2LSB
112 #endif
113
114 #ifdef USE_UID16
115 typedef abi_ushort target_uid_t;
116 typedef abi_ushort target_gid_t;
117 #else
118 typedef abi_uint target_uid_t;
119 typedef abi_uint target_gid_t;
120 #endif
121 typedef abi_int target_pid_t;
122
123 #ifndef elf_check_machine
124 #define elf_check_machine(x) ((x) == ELF_MACHINE)
125 #endif
126
127 #ifndef elf_check_abi
128 #define elf_check_abi(x) (1)
129 #endif
130
131 #ifndef STACK_GROWS_DOWN
132 #define STACK_GROWS_DOWN 1
133 #endif
134
135 #ifndef STACK_ALIGNMENT
136 #define STACK_ALIGNMENT 16
137 #endif
138
139 #ifdef TARGET_ABI32
140 #undef ELF_CLASS
141 #define ELF_CLASS ELFCLASS32
142 #undef bswaptls
143 #define bswaptls(ptr) bswap32s(ptr)
144 #endif
145
146 #ifndef EXSTACK_DEFAULT
147 #define EXSTACK_DEFAULT false
148 #endif
149
150 /*
151 * Provide fallback definitions that the target may omit.
152 * One way or another, we'll get a link error if the setting of
153 * HAVE_* doesn't match the implementation.
154 */
155 #ifndef HAVE_ELF_HWCAP
get_elf_hwcap(CPUState * cs)156 abi_ulong get_elf_hwcap(CPUState *cs) { return 0; }
157 #endif
158 #ifndef HAVE_ELF_HWCAP2
get_elf_hwcap2(CPUState * cs)159 abi_ulong get_elf_hwcap2(CPUState *cs) { g_assert_not_reached(); }
160 #define HAVE_ELF_HWCAP2 0
161 #endif
162 #ifndef HAVE_ELF_PLATFORM
get_elf_platform(CPUState * cs)163 const char *get_elf_platform(CPUState *cs) { return NULL; }
164 #endif
165 #ifndef HAVE_ELF_BASE_PLATFORM
get_elf_base_platform(CPUState * cs)166 const char *get_elf_base_platform(CPUState *cs) { return NULL; }
167 #endif
168
169 #ifndef HAVE_ELF_GNU_PROPERTY
arch_parse_elf_property(uint32_t pr_type,uint32_t pr_datasz,const uint32_t * data,struct image_info * info,Error ** errp)170 bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz,
171 const uint32_t *data, struct image_info *info,
172 Error **errp)
173 {
174 g_assert_not_reached();
175 }
176 #define HAVE_ELF_GNU_PROPERTY 0
177 #endif
178
179 #include "elf.h"
180
181 #define DLINFO_ITEMS 16
182
memcpy_fromfs(void * to,const void * from,unsigned long n)183 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
184 {
185 memcpy(to, from, n);
186 }
187
bswap_ehdr(struct elfhdr * ehdr)188 static void bswap_ehdr(struct elfhdr *ehdr)
189 {
190 if (!target_needs_bswap()) {
191 return;
192 }
193
194 bswap16s(&ehdr->e_type); /* Object file type */
195 bswap16s(&ehdr->e_machine); /* Architecture */
196 bswap32s(&ehdr->e_version); /* Object file version */
197 bswaptls(&ehdr->e_entry); /* Entry point virtual address */
198 bswaptls(&ehdr->e_phoff); /* Program header table file offset */
199 bswaptls(&ehdr->e_shoff); /* Section header table file offset */
200 bswap32s(&ehdr->e_flags); /* Processor-specific flags */
201 bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */
202 bswap16s(&ehdr->e_phentsize); /* Program header table entry size */
203 bswap16s(&ehdr->e_phnum); /* Program header table entry count */
204 bswap16s(&ehdr->e_shentsize); /* Section header table entry size */
205 bswap16s(&ehdr->e_shnum); /* Section header table entry count */
206 bswap16s(&ehdr->e_shstrndx); /* Section header string table index */
207 }
208
bswap_phdr(struct elf_phdr * phdr,int phnum)209 static void bswap_phdr(struct elf_phdr *phdr, int phnum)
210 {
211 if (!target_needs_bswap()) {
212 return;
213 }
214
215 for (int i = 0; i < phnum; ++i, ++phdr) {
216 bswap32s(&phdr->p_type); /* Segment type */
217 bswap32s(&phdr->p_flags); /* Segment flags */
218 bswaptls(&phdr->p_offset); /* Segment file offset */
219 bswaptls(&phdr->p_vaddr); /* Segment virtual address */
220 bswaptls(&phdr->p_paddr); /* Segment physical address */
221 bswaptls(&phdr->p_filesz); /* Segment size in file */
222 bswaptls(&phdr->p_memsz); /* Segment size in memory */
223 bswaptls(&phdr->p_align); /* Segment alignment */
224 }
225 }
226
bswap_shdr(struct elf_shdr * shdr,int shnum)227 static void bswap_shdr(struct elf_shdr *shdr, int shnum)
228 {
229 if (!target_needs_bswap()) {
230 return;
231 }
232
233 for (int i = 0; i < shnum; ++i, ++shdr) {
234 bswap32s(&shdr->sh_name);
235 bswap32s(&shdr->sh_type);
236 bswaptls(&shdr->sh_flags);
237 bswaptls(&shdr->sh_addr);
238 bswaptls(&shdr->sh_offset);
239 bswaptls(&shdr->sh_size);
240 bswap32s(&shdr->sh_link);
241 bswap32s(&shdr->sh_info);
242 bswaptls(&shdr->sh_addralign);
243 bswaptls(&shdr->sh_entsize);
244 }
245 }
246
bswap_sym(struct elf_sym * sym)247 static void bswap_sym(struct elf_sym *sym)
248 {
249 if (!target_needs_bswap()) {
250 return;
251 }
252
253 bswap32s(&sym->st_name);
254 bswaptls(&sym->st_value);
255 bswaptls(&sym->st_size);
256 bswap16s(&sym->st_shndx);
257 }
258
259 #ifdef TARGET_MIPS
bswap_mips_abiflags(Mips_elf_abiflags_v0 * abiflags)260 static void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags)
261 {
262 if (!target_needs_bswap()) {
263 return;
264 }
265
266 bswap16s(&abiflags->version);
267 bswap32s(&abiflags->ases);
268 bswap32s(&abiflags->isa_ext);
269 bswap32s(&abiflags->flags1);
270 bswap32s(&abiflags->flags2);
271 }
272 #endif
273
274 #ifdef HAVE_ELF_CORE_DUMP
275 static int elf_core_dump(int, const CPUArchState *);
276 #endif /* HAVE_ELF_CORE_DUMP */
277 static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
278 abi_ulong load_bias);
279
280 /* Verify the portions of EHDR within E_IDENT for the target.
281 This can be performed before bswapping the entire header. */
elf_check_ident(struct elfhdr * ehdr)282 static bool elf_check_ident(struct elfhdr *ehdr)
283 {
284 return (ehdr->e_ident[EI_MAG0] == ELFMAG0
285 && ehdr->e_ident[EI_MAG1] == ELFMAG1
286 && ehdr->e_ident[EI_MAG2] == ELFMAG2
287 && ehdr->e_ident[EI_MAG3] == ELFMAG3
288 && ehdr->e_ident[EI_CLASS] == ELF_CLASS
289 && ehdr->e_ident[EI_DATA] == ELF_DATA
290 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
291 }
292
293 /* Verify the portions of EHDR outside of E_IDENT for the target.
294 This has to wait until after bswapping the header. */
elf_check_ehdr(struct elfhdr * ehdr)295 static bool elf_check_ehdr(struct elfhdr *ehdr)
296 {
297 return (elf_check_machine(ehdr->e_machine)
298 && elf_check_abi(ehdr->e_flags)
299 && ehdr->e_ehsize == sizeof(struct elfhdr)
300 && ehdr->e_phentsize == sizeof(struct elf_phdr)
301 && (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN));
302 }
303
304 /*
305 * 'copy_elf_strings()' copies argument/envelope strings from user
306 * memory to free pages in kernel mem. These are in a format ready
307 * to be put directly into the top of new user memory.
308 *
309 */
copy_elf_strings(int argc,char ** argv,char * scratch,abi_ulong p,abi_ulong stack_limit)310 static abi_ulong copy_elf_strings(int argc, char **argv, char *scratch,
311 abi_ulong p, abi_ulong stack_limit)
312 {
313 char *tmp;
314 int len, i;
315 abi_ulong top = p;
316
317 if (!p) {
318 return 0; /* bullet-proofing */
319 }
320
321 if (STACK_GROWS_DOWN) {
322 int offset = ((p - 1) % TARGET_PAGE_SIZE) + 1;
323 for (i = argc - 1; i >= 0; --i) {
324 tmp = argv[i];
325 if (!tmp) {
326 fprintf(stderr, "VFS: argc is wrong");
327 exit(-1);
328 }
329 len = strlen(tmp) + 1;
330 tmp += len;
331
332 if (len > (p - stack_limit)) {
333 return 0;
334 }
335 while (len) {
336 int bytes_to_copy = (len > offset) ? offset : len;
337 tmp -= bytes_to_copy;
338 p -= bytes_to_copy;
339 offset -= bytes_to_copy;
340 len -= bytes_to_copy;
341
342 memcpy_fromfs(scratch + offset, tmp, bytes_to_copy);
343
344 if (offset == 0) {
345 memcpy_to_target(p, scratch, top - p);
346 top = p;
347 offset = TARGET_PAGE_SIZE;
348 }
349 }
350 }
351 if (p != top) {
352 memcpy_to_target(p, scratch + offset, top - p);
353 }
354 } else {
355 int remaining = TARGET_PAGE_SIZE - (p % TARGET_PAGE_SIZE);
356 for (i = 0; i < argc; ++i) {
357 tmp = argv[i];
358 if (!tmp) {
359 fprintf(stderr, "VFS: argc is wrong");
360 exit(-1);
361 }
362 len = strlen(tmp) + 1;
363 if (len > (stack_limit - p)) {
364 return 0;
365 }
366 while (len) {
367 int bytes_to_copy = (len > remaining) ? remaining : len;
368
369 memcpy_fromfs(scratch + (p - top), tmp, bytes_to_copy);
370
371 tmp += bytes_to_copy;
372 remaining -= bytes_to_copy;
373 p += bytes_to_copy;
374 len -= bytes_to_copy;
375
376 if (remaining == 0) {
377 memcpy_to_target(top, scratch, p - top);
378 top = p;
379 remaining = TARGET_PAGE_SIZE;
380 }
381 }
382 }
383 if (p != top) {
384 memcpy_to_target(top, scratch, p - top);
385 }
386 }
387
388 return p;
389 }
390
391 /* Older linux kernels provide up to MAX_ARG_PAGES (default: 32) of
392 * argument/environment space. Newer kernels (>2.6.33) allow more,
393 * dependent on stack size, but guarantee at least 32 pages for
394 * backwards compatibility.
395 */
396 #define STACK_LOWER_LIMIT (32 * TARGET_PAGE_SIZE)
397
setup_arg_pages(struct linux_binprm * bprm,struct image_info * info)398 static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
399 struct image_info *info)
400 {
401 abi_ulong size, error, guard;
402 int prot;
403
404 size = guest_stack_size;
405 if (size < STACK_LOWER_LIMIT) {
406 size = STACK_LOWER_LIMIT;
407 }
408
409 if (STACK_GROWS_DOWN) {
410 guard = TARGET_PAGE_SIZE;
411 if (guard < qemu_real_host_page_size()) {
412 guard = qemu_real_host_page_size();
413 }
414 } else {
415 /* no guard page for hppa target where stack grows upwards. */
416 guard = 0;
417 }
418
419 prot = PROT_READ | PROT_WRITE;
420 if (info->exec_stack) {
421 prot |= PROT_EXEC;
422 }
423 error = target_mmap(0, size + guard, prot,
424 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
425 if (error == -1) {
426 perror("mmap stack");
427 exit(-1);
428 }
429
430 /* We reserve one extra page at the top of the stack as guard. */
431 if (STACK_GROWS_DOWN) {
432 target_mprotect(error, guard, PROT_NONE);
433 info->stack_limit = error + guard;
434 return info->stack_limit + size - sizeof(void *);
435 } else {
436 info->stack_limit = error + size;
437 return error;
438 }
439 }
440
441 /**
442 * zero_bss:
443 *
444 * Map and zero the bss. We need to explicitly zero any fractional pages
445 * after the data section (i.e. bss). Return false on mapping failure.
446 */
zero_bss(abi_ulong start_bss,abi_ulong end_bss,int prot,Error ** errp)447 static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss,
448 int prot, Error **errp)
449 {
450 abi_ulong align_bss;
451
452 /* We only expect writable bss; the code segment shouldn't need this. */
453 if (!(prot & PROT_WRITE)) {
454 error_setg(errp, "PT_LOAD with non-writable bss");
455 return false;
456 }
457
458 align_bss = TARGET_PAGE_ALIGN(start_bss);
459 end_bss = TARGET_PAGE_ALIGN(end_bss);
460
461 if (start_bss < align_bss) {
462 int flags = page_get_flags(start_bss);
463
464 if (!(flags & PAGE_RWX)) {
465 /*
466 * The whole address space of the executable was reserved
467 * at the start, therefore all pages will be VALID.
468 * But assuming there are no PROT_NONE PT_LOAD segments,
469 * a PROT_NONE page means no data all bss, and we can
470 * simply extend the new anon mapping back to the start
471 * of the page of bss.
472 */
473 align_bss -= TARGET_PAGE_SIZE;
474 } else {
475 /*
476 * The start of the bss shares a page with something.
477 * The only thing that we expect is the data section,
478 * which would already be marked writable.
479 * Overlapping the RX code segment seems malformed.
480 */
481 if (!(flags & PAGE_WRITE)) {
482 error_setg(errp, "PT_LOAD with bss overlapping "
483 "non-writable page");
484 return false;
485 }
486
487 /* The page is already mapped and writable. */
488 memset(g2h_untagged(start_bss), 0, align_bss - start_bss);
489 }
490 }
491
492 if (align_bss < end_bss &&
493 target_mmap(align_bss, end_bss - align_bss, prot,
494 MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
495 error_setg_errno(errp, errno, "Error mapping bss");
496 return false;
497 }
498 return true;
499 }
500
501 #if defined(TARGET_ARM)
elf_is_fdpic(struct elfhdr * exec)502 static int elf_is_fdpic(struct elfhdr *exec)
503 {
504 return exec->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC;
505 }
506 #elif defined(TARGET_XTENSA)
elf_is_fdpic(struct elfhdr * exec)507 static int elf_is_fdpic(struct elfhdr *exec)
508 {
509 return exec->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC;
510 }
511 #else
512 /* Default implementation, always false. */
elf_is_fdpic(struct elfhdr * exec)513 static int elf_is_fdpic(struct elfhdr *exec)
514 {
515 return 0;
516 }
517 #endif
518
loader_build_fdpic_loadmap(struct image_info * info,abi_ulong sp)519 static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong sp)
520 {
521 uint16_t n;
522 struct elf32_fdpic_loadseg *loadsegs = info->loadsegs;
523
524 /* elf32_fdpic_loadseg */
525 n = info->nsegs;
526 while (n--) {
527 sp -= 12;
528 put_user_u32(loadsegs[n].addr, sp+0);
529 put_user_u32(loadsegs[n].p_vaddr, sp+4);
530 put_user_u32(loadsegs[n].p_memsz, sp+8);
531 }
532
533 /* elf32_fdpic_loadmap */
534 sp -= 4;
535 put_user_u16(0, sp+0); /* version */
536 put_user_u16(info->nsegs, sp+2); /* nsegs */
537
538 info->personality = PER_LINUX_FDPIC;
539 info->loadmap_addr = sp;
540
541 return sp;
542 }
543
create_elf_tables(abi_ulong p,int argc,int envc,struct elfhdr * exec,struct image_info * info,struct image_info * interp_info,struct image_info * vdso_info)544 static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
545 struct elfhdr *exec,
546 struct image_info *info,
547 struct image_info *interp_info,
548 struct image_info *vdso_info)
549 {
550 abi_ulong sp;
551 abi_ulong u_argc, u_argv, u_envp, u_auxv;
552 int size;
553 int i;
554 abi_ulong u_rand_bytes;
555 uint8_t k_rand_bytes[16];
556 abi_ulong u_platform, u_base_platform;
557 const char *k_platform, *k_base_platform;
558 const int n = sizeof(elf_addr_t);
559
560 sp = p;
561
562 /* Needs to be before we load the env/argc/... */
563 if (elf_is_fdpic(exec)) {
564 /* Need 4 byte alignment for these structs */
565 sp &= ~3;
566 sp = loader_build_fdpic_loadmap(info, sp);
567 info->other_info = interp_info;
568 if (interp_info) {
569 interp_info->other_info = info;
570 sp = loader_build_fdpic_loadmap(interp_info, sp);
571 info->interpreter_loadmap_addr = interp_info->loadmap_addr;
572 info->interpreter_pt_dynamic_addr = interp_info->pt_dynamic_addr;
573 } else {
574 info->interpreter_loadmap_addr = 0;
575 info->interpreter_pt_dynamic_addr = 0;
576 }
577 }
578
579 u_base_platform = 0;
580 k_base_platform = get_elf_base_platform(thread_cpu);
581 if (k_base_platform) {
582 size_t len = strlen(k_base_platform) + 1;
583 if (STACK_GROWS_DOWN) {
584 sp -= (len + n - 1) & ~(n - 1);
585 u_base_platform = sp;
586 /* FIXME - check return value of memcpy_to_target() for failure */
587 memcpy_to_target(sp, k_base_platform, len);
588 } else {
589 memcpy_to_target(sp, k_base_platform, len);
590 u_base_platform = sp;
591 sp += len + 1;
592 }
593 }
594
595 u_platform = 0;
596 k_platform = get_elf_platform(thread_cpu);
597 if (k_platform) {
598 size_t len = strlen(k_platform) + 1;
599 if (STACK_GROWS_DOWN) {
600 sp -= (len + n - 1) & ~(n - 1);
601 u_platform = sp;
602 /* FIXME - check return value of memcpy_to_target() for failure */
603 memcpy_to_target(sp, k_platform, len);
604 } else {
605 memcpy_to_target(sp, k_platform, len);
606 u_platform = sp;
607 sp += len + 1;
608 }
609 }
610
611 /* Provide 16 byte alignment for the PRNG, and basic alignment for
612 * the argv and envp pointers.
613 */
614 if (STACK_GROWS_DOWN) {
615 sp = QEMU_ALIGN_DOWN(sp, 16);
616 } else {
617 sp = QEMU_ALIGN_UP(sp, 16);
618 }
619
620 /*
621 * Generate 16 random bytes for userspace PRNG seeding.
622 */
623 qemu_guest_getrandom_nofail(k_rand_bytes, sizeof(k_rand_bytes));
624 if (STACK_GROWS_DOWN) {
625 sp -= 16;
626 u_rand_bytes = sp;
627 /* FIXME - check return value of memcpy_to_target() for failure */
628 memcpy_to_target(sp, k_rand_bytes, 16);
629 } else {
630 memcpy_to_target(sp, k_rand_bytes, 16);
631 u_rand_bytes = sp;
632 sp += 16;
633 }
634
635 size = (DLINFO_ITEMS + 1) * 2;
636 if (k_base_platform) {
637 size += 2;
638 }
639 if (k_platform) {
640 size += 2;
641 }
642 if (vdso_info) {
643 size += 2;
644 }
645 #ifdef DLINFO_ARCH_ITEMS
646 size += DLINFO_ARCH_ITEMS * 2;
647 #endif
648 if (HAVE_ELF_HWCAP2) {
649 size += 2;
650 }
651 info->auxv_len = size * n;
652
653 size += envc + argc + 2;
654 size += 1; /* argc itself */
655 size *= n;
656
657 /* Allocate space and finalize stack alignment for entry now. */
658 if (STACK_GROWS_DOWN) {
659 u_argc = QEMU_ALIGN_DOWN(sp - size, STACK_ALIGNMENT);
660 sp = u_argc;
661 } else {
662 u_argc = sp;
663 sp = QEMU_ALIGN_UP(sp + size, STACK_ALIGNMENT);
664 }
665
666 u_argv = u_argc + n;
667 u_envp = u_argv + (argc + 1) * n;
668 u_auxv = u_envp + (envc + 1) * n;
669 info->saved_auxv = u_auxv;
670 info->argc = argc;
671 info->envc = envc;
672 info->argv = u_argv;
673 info->envp = u_envp;
674
675 /* This is correct because Linux defines
676 * elf_addr_t as Elf32_Off / Elf64_Off
677 */
678 #define NEW_AUX_ENT(id, val) do { \
679 put_user_ual(id, u_auxv); u_auxv += n; \
680 put_user_ual(val, u_auxv); u_auxv += n; \
681 } while(0)
682
683 #ifdef ARCH_DLINFO
684 /*
685 * ARCH_DLINFO must come first so platform specific code can enforce
686 * special alignment requirements on the AUXV if necessary (eg. PPC).
687 */
688 ARCH_DLINFO;
689 #endif
690 /* There must be exactly DLINFO_ITEMS entries here, or the assert
691 * on info->auxv_len will trigger.
692 */
693 NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff));
694 NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr)));
695 NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
696 NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
697 NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0));
698 NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
699 NEW_AUX_ENT(AT_ENTRY, info->entry);
700 NEW_AUX_ENT(AT_UID, (abi_ulong) getuid());
701 NEW_AUX_ENT(AT_EUID, (abi_ulong) geteuid());
702 NEW_AUX_ENT(AT_GID, (abi_ulong) getgid());
703 NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid());
704 NEW_AUX_ENT(AT_HWCAP, get_elf_hwcap(thread_cpu));
705 NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
706 NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes);
707 NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE));
708 NEW_AUX_ENT(AT_EXECFN, info->file_string);
709
710 if (HAVE_ELF_HWCAP2) {
711 NEW_AUX_ENT(AT_HWCAP2, get_elf_hwcap2(thread_cpu));
712 }
713 if (u_base_platform) {
714 NEW_AUX_ENT(AT_BASE_PLATFORM, u_base_platform);
715 }
716 if (u_platform) {
717 NEW_AUX_ENT(AT_PLATFORM, u_platform);
718 }
719 if (vdso_info) {
720 NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
721 }
722 NEW_AUX_ENT (AT_NULL, 0);
723 #undef NEW_AUX_ENT
724
725 /* Check that our initial calculation of the auxv length matches how much
726 * we actually put into it.
727 */
728 assert(info->auxv_len == u_auxv - info->saved_auxv);
729
730 put_user_ual(argc, u_argc);
731
732 p = info->arg_strings;
733 for (i = 0; i < argc; ++i) {
734 put_user_ual(p, u_argv);
735 u_argv += n;
736 p += target_strlen(p) + 1;
737 }
738 put_user_ual(0, u_argv);
739
740 p = info->env_strings;
741 for (i = 0; i < envc; ++i) {
742 put_user_ual(p, u_envp);
743 u_envp += n;
744 p += target_strlen(p) + 1;
745 }
746 put_user_ual(0, u_envp);
747
748 return sp;
749 }
750
751 #if defined(HI_COMMPAGE)
752 #define LO_COMMPAGE -1
753 #elif defined(LO_COMMPAGE)
754 #define HI_COMMPAGE 0
755 #else
756 #define HI_COMMPAGE 0
757 #define LO_COMMPAGE -1
758 #ifndef HAVE_GUEST_COMMPAGE
init_guest_commpage(void)759 bool init_guest_commpage(void) { return true; }
760 #endif
761 #endif
762
763 /**
764 * pgb_try_mmap:
765 * @addr: host start address
766 * @addr_last: host last address
767 * @keep: do not unmap the probe region
768 *
769 * Return 1 if [@addr, @addr_last] is not mapped in the host,
770 * return 0 if it is not available to map, and -1 on mmap error.
771 * If @keep, the region is left mapped on success, otherwise unmapped.
772 */
pgb_try_mmap(uintptr_t addr,uintptr_t addr_last,bool keep)773 static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep)
774 {
775 size_t size = addr_last - addr + 1;
776 void *p = mmap((void *)addr, size, PROT_NONE,
777 MAP_ANONYMOUS | MAP_PRIVATE |
778 MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
779 int ret;
780
781 if (p == MAP_FAILED) {
782 return errno == EEXIST ? 0 : -1;
783 }
784 ret = p == (void *)addr;
785 if (!keep || !ret) {
786 munmap(p, size);
787 }
788 return ret;
789 }
790
791 /**
792 * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk)
793 * @addr: host address
794 * @addr_last: host last address
795 * @brk: host brk
796 *
797 * Like pgb_try_mmap, but additionally reserve some memory following brk.
798 */
pgb_try_mmap_skip_brk(uintptr_t addr,uintptr_t addr_last,uintptr_t brk,bool keep)799 static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last,
800 uintptr_t brk, bool keep)
801 {
802 uintptr_t brk_last = brk + 16 * MiB - 1;
803
804 /* Do not map anything close to the host brk. */
805 if (addr <= brk_last && brk <= addr_last) {
806 return 0;
807 }
808 return pgb_try_mmap(addr, addr_last, keep);
809 }
810
811 /**
812 * pgb_try_mmap_set:
813 * @ga: set of guest addrs
814 * @base: guest_base
815 * @brk: host brk
816 *
817 * Return true if all @ga can be mapped by the host at @base.
818 * On success, retain the mapping at index 0 for reserved_va.
819 */
820
821 typedef struct PGBAddrs {
822 uintptr_t bounds[3][2]; /* start/last pairs */
823 int nbounds;
824 } PGBAddrs;
825
pgb_try_mmap_set(const PGBAddrs * ga,uintptr_t base,uintptr_t brk)826 static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk)
827 {
828 for (int i = ga->nbounds - 1; i >= 0; --i) {
829 if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base,
830 ga->bounds[i][1] + base,
831 brk, i == 0 && reserved_va) <= 0) {
832 return false;
833 }
834 }
835 return true;
836 }
837
838 /**
839 * pgb_addr_set:
840 * @ga: output set of guest addrs
841 * @guest_loaddr: guest image low address
842 * @guest_loaddr: guest image high address
843 * @identity: create for identity mapping
844 *
845 * Fill in @ga with the image, COMMPAGE and NULL page.
846 */
pgb_addr_set(PGBAddrs * ga,abi_ulong guest_loaddr,abi_ulong guest_hiaddr,bool try_identity)847 static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr,
848 abi_ulong guest_hiaddr, bool try_identity)
849 {
850 int n;
851
852 /*
853 * With a low commpage, or a guest mapped very low,
854 * we may not be able to use the identity map.
855 */
856 if (try_identity) {
857 if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) {
858 return false;
859 }
860 if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) {
861 return false;
862 }
863 }
864
865 memset(ga, 0, sizeof(*ga));
866 n = 0;
867
868 if (reserved_va) {
869 ga->bounds[n][0] = try_identity ? mmap_min_addr : 0;
870 ga->bounds[n][1] = reserved_va;
871 n++;
872 /* LO_COMMPAGE and NULL handled by reserving from 0. */
873 } else {
874 /* Add any LO_COMMPAGE or NULL page. */
875 if (LO_COMMPAGE != -1) {
876 ga->bounds[n][0] = 0;
877 ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1;
878 n++;
879 } else if (!try_identity) {
880 ga->bounds[n][0] = 0;
881 ga->bounds[n][1] = TARGET_PAGE_SIZE - 1;
882 n++;
883 }
884
885 /* Add the guest image for ET_EXEC. */
886 if (guest_loaddr) {
887 ga->bounds[n][0] = guest_loaddr;
888 ga->bounds[n][1] = guest_hiaddr;
889 n++;
890 }
891 }
892
893 /*
894 * Temporarily disable
895 * "comparison is always false due to limited range of data type"
896 * due to comparison between unsigned and (possible) 0.
897 */
898 #pragma GCC diagnostic push
899 #pragma GCC diagnostic ignored "-Wtype-limits"
900
901 /* Add any HI_COMMPAGE not covered by reserved_va. */
902 if (reserved_va < HI_COMMPAGE) {
903 ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask();
904 ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1;
905 n++;
906 }
907
908 #pragma GCC diagnostic pop
909
910 ga->nbounds = n;
911 return true;
912 }
913
pgb_fail_in_use(const char * image_name)914 static void pgb_fail_in_use(const char *image_name)
915 {
916 error_report("%s: requires virtual address space that is in use "
917 "(omit the -B option or choose a different value)",
918 image_name);
919 exit(EXIT_FAILURE);
920 }
921
pgb_fixed(const char * image_name,uintptr_t guest_loaddr,uintptr_t guest_hiaddr,uintptr_t align)922 static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr,
923 uintptr_t guest_hiaddr, uintptr_t align)
924 {
925 PGBAddrs ga;
926 uintptr_t brk = (uintptr_t)sbrk(0);
927
928 if (!QEMU_IS_ALIGNED(guest_base, align)) {
929 fprintf(stderr, "Requested guest base %p does not satisfy "
930 "host minimum alignment (0x%" PRIxPTR ")\n",
931 (void *)guest_base, align);
932 exit(EXIT_FAILURE);
933 }
934
935 if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base)
936 || !pgb_try_mmap_set(&ga, guest_base, brk)) {
937 pgb_fail_in_use(image_name);
938 }
939 }
940
941 /**
942 * pgb_find_fallback:
943 *
944 * This is a fallback method for finding holes in the host address space
945 * if we don't have the benefit of being able to access /proc/self/map.
946 * It can potentially take a very long time as we can only dumbly iterate
947 * up the host address space seeing if the allocation would work.
948 */
pgb_find_fallback(const PGBAddrs * ga,uintptr_t align,uintptr_t brk)949 static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align,
950 uintptr_t brk)
951 {
952 /* TODO: come up with a better estimate of how much to skip. */
953 uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB;
954
955 for (uintptr_t base = skip; ; base += skip) {
956 base = ROUND_UP(base, align);
957 if (pgb_try_mmap_set(ga, base, brk)) {
958 return base;
959 }
960 if (base >= -skip) {
961 return -1;
962 }
963 }
964 }
965
pgb_try_itree(const PGBAddrs * ga,uintptr_t base,IntervalTreeRoot * root)966 static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base,
967 IntervalTreeRoot *root)
968 {
969 for (int i = ga->nbounds - 1; i >= 0; --i) {
970 uintptr_t s = base + ga->bounds[i][0];
971 uintptr_t l = base + ga->bounds[i][1];
972 IntervalTreeNode *n;
973
974 if (l < s) {
975 /* Wraparound. Skip to advance S to mmap_min_addr. */
976 return mmap_min_addr - s;
977 }
978
979 n = interval_tree_iter_first(root, s, l);
980 if (n != NULL) {
981 /* Conflict. Skip to advance S to LAST + 1. */
982 return n->last - s + 1;
983 }
984 }
985 return 0; /* success */
986 }
987
pgb_find_itree(const PGBAddrs * ga,IntervalTreeRoot * root,uintptr_t align,uintptr_t brk)988 static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root,
989 uintptr_t align, uintptr_t brk)
990 {
991 uintptr_t last = sizeof(uintptr_t) == 4 ? MiB : GiB;
992 uintptr_t base, skip;
993
994 while (true) {
995 base = ROUND_UP(last, align);
996 if (base < last) {
997 return -1;
998 }
999
1000 skip = pgb_try_itree(ga, base, root);
1001 if (skip == 0) {
1002 break;
1003 }
1004
1005 last = base + skip;
1006 if (last < base) {
1007 return -1;
1008 }
1009 }
1010
1011 /*
1012 * We've chosen 'base' based on holes in the interval tree,
1013 * but we don't yet know if it is a valid host address.
1014 * Because it is the first matching hole, if the host addresses
1015 * are invalid we know there are no further matches.
1016 */
1017 return pgb_try_mmap_set(ga, base, brk) ? base : -1;
1018 }
1019
pgb_dynamic(const char * image_name,uintptr_t guest_loaddr,uintptr_t guest_hiaddr,uintptr_t align)1020 static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr,
1021 uintptr_t guest_hiaddr, uintptr_t align)
1022 {
1023 IntervalTreeRoot *root;
1024 uintptr_t brk, ret;
1025 PGBAddrs ga;
1026
1027 /* Try the identity map first. */
1028 if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) {
1029 brk = (uintptr_t)sbrk(0);
1030 if (pgb_try_mmap_set(&ga, 0, brk)) {
1031 guest_base = 0;
1032 return;
1033 }
1034 }
1035
1036 /*
1037 * Rebuild the address set for non-identity map.
1038 * This differs in the mapping of the guest NULL page.
1039 */
1040 pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false);
1041
1042 root = read_self_maps();
1043
1044 /* Read brk after we've read the maps, which will malloc. */
1045 brk = (uintptr_t)sbrk(0);
1046
1047 if (!root) {
1048 ret = pgb_find_fallback(&ga, align, brk);
1049 } else {
1050 /*
1051 * Reserve the area close to the host brk.
1052 * This will be freed with the rest of the tree.
1053 */
1054 IntervalTreeNode *b = g_new0(IntervalTreeNode, 1);
1055 b->start = brk;
1056 b->last = brk + 16 * MiB - 1;
1057 interval_tree_insert(b, root);
1058
1059 ret = pgb_find_itree(&ga, root, align, brk);
1060 free_self_maps(root);
1061 }
1062
1063 if (ret == -1) {
1064 int w = TARGET_LONG_BITS / 4;
1065
1066 error_report("%s: Unable to find a guest_base to satisfy all "
1067 "guest address mapping requirements", image_name);
1068
1069 for (int i = 0; i < ga.nbounds; ++i) {
1070 error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n",
1071 w, (uint64_t)ga.bounds[i][0],
1072 w, (uint64_t)ga.bounds[i][1]);
1073 }
1074 exit(EXIT_FAILURE);
1075 }
1076 guest_base = ret;
1077 }
1078
probe_guest_base(const char * image_name,abi_ulong guest_loaddr,abi_ulong guest_hiaddr)1079 void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
1080 abi_ulong guest_hiaddr)
1081 {
1082 /* In order to use host shmat, we must be able to honor SHMLBA. */
1083 uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE);
1084
1085 /* Sanity check the guest binary. */
1086 if (reserved_va) {
1087 if (guest_hiaddr > reserved_va) {
1088 error_report("%s: requires more than reserved virtual "
1089 "address space (0x%" PRIx64 " > 0x%lx)",
1090 image_name, (uint64_t)guest_hiaddr, reserved_va);
1091 exit(EXIT_FAILURE);
1092 }
1093 } else {
1094 if (guest_hiaddr != (uintptr_t)guest_hiaddr) {
1095 error_report("%s: requires more virtual address space "
1096 "than the host can provide (0x%" PRIx64 ")",
1097 image_name, (uint64_t)guest_hiaddr + 1);
1098 exit(EXIT_FAILURE);
1099 }
1100 }
1101
1102 if (have_guest_base) {
1103 pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align);
1104 } else {
1105 pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align);
1106 }
1107
1108 /* Reserve and initialize the commpage. */
1109 if (!init_guest_commpage()) {
1110 /* We have already probed for the commpage being free. */
1111 g_assert_not_reached();
1112 }
1113
1114 assert(QEMU_IS_ALIGNED(guest_base, align));
1115 qemu_log_mask(CPU_LOG_PAGE, "Locating guest address space "
1116 "@ 0x%" PRIx64 "\n", (uint64_t)guest_base);
1117 }
1118
1119 enum {
1120 /* The string "GNU\0" as a magic number. */
1121 GNU0_MAGIC = const_le32('G' | 'N' << 8 | 'U' << 16),
1122 NOTE_DATA_SZ = 1 * KiB,
1123 NOTE_NAME_SZ = 4,
1124 ELF_GNU_PROPERTY_ALIGN = ELF_CLASS == ELFCLASS32 ? 4 : 8,
1125 };
1126
1127 /*
1128 * Process a single gnu_property entry.
1129 * Return false for error.
1130 */
parse_elf_property(const uint32_t * data,int * off,int datasz,struct image_info * info,bool have_prev_type,uint32_t * prev_type,Error ** errp)1131 static bool parse_elf_property(const uint32_t *data, int *off, int datasz,
1132 struct image_info *info, bool have_prev_type,
1133 uint32_t *prev_type, Error **errp)
1134 {
1135 uint32_t pr_type, pr_datasz, step;
1136
1137 if (*off > datasz || !QEMU_IS_ALIGNED(*off, ELF_GNU_PROPERTY_ALIGN)) {
1138 goto error_data;
1139 }
1140 datasz -= *off;
1141 data += *off / sizeof(uint32_t);
1142
1143 if (datasz < 2 * sizeof(uint32_t)) {
1144 goto error_data;
1145 }
1146 pr_type = data[0];
1147 pr_datasz = data[1];
1148 data += 2;
1149 datasz -= 2 * sizeof(uint32_t);
1150 step = ROUND_UP(pr_datasz, ELF_GNU_PROPERTY_ALIGN);
1151 if (step > datasz) {
1152 goto error_data;
1153 }
1154
1155 /* Properties are supposed to be unique and sorted on pr_type. */
1156 if (have_prev_type && pr_type <= *prev_type) {
1157 if (pr_type == *prev_type) {
1158 error_setg(errp, "Duplicate property in PT_GNU_PROPERTY");
1159 } else {
1160 error_setg(errp, "Unsorted property in PT_GNU_PROPERTY");
1161 }
1162 return false;
1163 }
1164 *prev_type = pr_type;
1165
1166 if (!arch_parse_elf_property(pr_type, pr_datasz, data, info, errp)) {
1167 return false;
1168 }
1169
1170 *off += 2 * sizeof(uint32_t) + step;
1171 return true;
1172
1173 error_data:
1174 error_setg(errp, "Ill-formed property in PT_GNU_PROPERTY");
1175 return false;
1176 }
1177
1178 /* Process NT_GNU_PROPERTY_TYPE_0. */
parse_elf_properties(const ImageSource * src,struct image_info * info,const struct elf_phdr * phdr,Error ** errp)1179 static bool parse_elf_properties(const ImageSource *src,
1180 struct image_info *info,
1181 const struct elf_phdr *phdr,
1182 Error **errp)
1183 {
1184 union {
1185 struct elf_note nhdr;
1186 uint32_t data[NOTE_DATA_SZ / sizeof(uint32_t)];
1187 } note;
1188
1189 int n, off, datasz;
1190 bool have_prev_type;
1191 uint32_t prev_type;
1192
1193 /* Unless the arch requires properties, ignore them. */
1194 if (!HAVE_ELF_GNU_PROPERTY) {
1195 return true;
1196 }
1197
1198 /* If the properties are crazy large, that's too bad. */
1199 n = phdr->p_filesz;
1200 if (n > sizeof(note)) {
1201 error_setg(errp, "PT_GNU_PROPERTY too large");
1202 return false;
1203 }
1204 if (n < sizeof(note.nhdr)) {
1205 error_setg(errp, "PT_GNU_PROPERTY too small");
1206 return false;
1207 }
1208
1209 if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) {
1210 return false;
1211 }
1212
1213 /*
1214 * The contents of a valid PT_GNU_PROPERTY is a sequence of uint32_t.
1215 * Swap most of them now, beyond the header and namesz.
1216 */
1217 if (target_needs_bswap()) {
1218 for (int i = 4; i < n / 4; i++) {
1219 bswap32s(note.data + i);
1220 }
1221 }
1222
1223 /*
1224 * Note that nhdr is 3 words, and that the "name" described by namesz
1225 * immediately follows nhdr and is thus at the 4th word. Further, all
1226 * of the inputs to the kernel's round_up are multiples of 4.
1227 */
1228 if (tswap32(note.nhdr.n_type) != NT_GNU_PROPERTY_TYPE_0 ||
1229 tswap32(note.nhdr.n_namesz) != NOTE_NAME_SZ ||
1230 note.data[3] != GNU0_MAGIC) {
1231 error_setg(errp, "Invalid note in PT_GNU_PROPERTY");
1232 return false;
1233 }
1234 off = sizeof(note.nhdr) + NOTE_NAME_SZ;
1235
1236 datasz = tswap32(note.nhdr.n_descsz) + off;
1237 if (datasz > n) {
1238 error_setg(errp, "Invalid note size in PT_GNU_PROPERTY");
1239 return false;
1240 }
1241
1242 have_prev_type = false;
1243 prev_type = 0;
1244 while (1) {
1245 if (off == datasz) {
1246 return true; /* end, exit ok */
1247 }
1248 if (!parse_elf_property(note.data, &off, datasz, info,
1249 have_prev_type, &prev_type, errp)) {
1250 return false;
1251 }
1252 have_prev_type = true;
1253 }
1254 }
1255
1256 /**
1257 * load_elf_image: Load an ELF image into the address space.
1258 * @image_name: the filename of the image, to use in error messages.
1259 * @src: the ImageSource from which to read.
1260 * @info: info collected from the loaded image.
1261 * @ehdr: the ELF header, not yet bswapped.
1262 * @pinterp_name: record any PT_INTERP string found.
1263 *
1264 * On return: @info values will be filled in, as necessary or available.
1265 */
1266
load_elf_image(const char * image_name,const ImageSource * src,struct image_info * info,struct elfhdr * ehdr,char ** pinterp_name)1267 static void load_elf_image(const char *image_name, const ImageSource *src,
1268 struct image_info *info, struct elfhdr *ehdr,
1269 char **pinterp_name)
1270 {
1271 g_autofree struct elf_phdr *phdr = NULL;
1272 abi_ulong load_addr, load_bias, loaddr, hiaddr, error, align;
1273 size_t reserve_size, align_size;
1274 int i, prot_exec;
1275 Error *err = NULL;
1276
1277 /*
1278 * First of all, some simple consistency checks.
1279 * Note that we rely on the bswapped ehdr staying in bprm_buf,
1280 * for later use by load_elf_binary and create_elf_tables.
1281 */
1282 if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) {
1283 goto exit_errmsg;
1284 }
1285 if (!elf_check_ident(ehdr)) {
1286 error_setg(&err, "Invalid ELF image for this architecture");
1287 goto exit_errmsg;
1288 }
1289 bswap_ehdr(ehdr);
1290 if (!elf_check_ehdr(ehdr)) {
1291 error_setg(&err, "Invalid ELF image for this architecture");
1292 goto exit_errmsg;
1293 }
1294
1295 phdr = imgsrc_read_alloc(ehdr->e_phoff,
1296 ehdr->e_phnum * sizeof(struct elf_phdr),
1297 src, &err);
1298 if (phdr == NULL) {
1299 goto exit_errmsg;
1300 }
1301 bswap_phdr(phdr, ehdr->e_phnum);
1302
1303 info->nsegs = 0;
1304 info->pt_dynamic_addr = 0;
1305
1306 mmap_lock();
1307
1308 /*
1309 * Find the maximum size of the image and allocate an appropriate
1310 * amount of memory to handle that. Locate the interpreter, if any.
1311 */
1312 loaddr = -1, hiaddr = 0;
1313 align = 0;
1314 info->exec_stack = EXSTACK_DEFAULT;
1315 for (i = 0; i < ehdr->e_phnum; ++i) {
1316 struct elf_phdr *eppnt = phdr + i;
1317 if (eppnt->p_type == PT_LOAD) {
1318 abi_ulong a = eppnt->p_vaddr & TARGET_PAGE_MASK;
1319 if (a < loaddr) {
1320 loaddr = a;
1321 }
1322 a = eppnt->p_vaddr + eppnt->p_memsz - 1;
1323 if (a > hiaddr) {
1324 hiaddr = a;
1325 }
1326 ++info->nsegs;
1327 align |= eppnt->p_align;
1328 } else if (eppnt->p_type == PT_INTERP && pinterp_name) {
1329 g_autofree char *interp_name = NULL;
1330
1331 if (*pinterp_name) {
1332 error_setg(&err, "Multiple PT_INTERP entries");
1333 goto exit_errmsg;
1334 }
1335
1336 interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz,
1337 src, &err);
1338 if (interp_name == NULL) {
1339 goto exit_errmsg;
1340 }
1341 if (interp_name[eppnt->p_filesz - 1] != 0) {
1342 error_setg(&err, "Invalid PT_INTERP entry");
1343 goto exit_errmsg;
1344 }
1345 *pinterp_name = g_steal_pointer(&interp_name);
1346 } else if (eppnt->p_type == PT_GNU_PROPERTY) {
1347 if (!parse_elf_properties(src, info, eppnt, &err)) {
1348 goto exit_errmsg;
1349 }
1350 } else if (eppnt->p_type == PT_GNU_STACK) {
1351 info->exec_stack = eppnt->p_flags & PF_X;
1352 }
1353 }
1354
1355 load_addr = loaddr;
1356
1357 align = pow2ceil(align);
1358
1359 if (pinterp_name != NULL) {
1360 if (ehdr->e_type == ET_EXEC) {
1361 /*
1362 * Make sure that the low address does not conflict with
1363 * MMAP_MIN_ADDR or the QEMU application itself.
1364 */
1365 probe_guest_base(image_name, loaddr, hiaddr);
1366 } else {
1367 /*
1368 * The binary is dynamic, but we still need to
1369 * select guest_base. In this case we pass a size.
1370 */
1371 probe_guest_base(image_name, 0, hiaddr - loaddr);
1372
1373 /*
1374 * Avoid collision with the loader by providing a different
1375 * default load address.
1376 */
1377 load_addr += elf_et_dyn_base;
1378
1379 /*
1380 * TODO: Better support for mmap alignment is desirable.
1381 * Since we do not have complete control over the guest
1382 * address space, we prefer the kernel to choose some address
1383 * rather than force the use of LOAD_ADDR via MAP_FIXED.
1384 */
1385 if (align) {
1386 load_addr &= -align;
1387 }
1388 }
1389 }
1390
1391 /*
1392 * Reserve address space for all of this.
1393 *
1394 * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get
1395 * exactly the address range that is required. Without reserved_va,
1396 * the guest address space is not isolated. We have attempted to avoid
1397 * conflict with the host program itself via probe_guest_base, but using
1398 * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check.
1399 *
1400 * Otherwise this is ET_DYN, and we are searching for a location
1401 * that can hold the memory space required. If the image is
1402 * pre-linked, LOAD_ADDR will be non-zero, and the kernel should
1403 * honor that address if it happens to be free.
1404 *
1405 * In both cases, we will overwrite pages in this range with mappings
1406 * from the executable.
1407 */
1408 reserve_size = (size_t)hiaddr - loaddr + 1;
1409 align_size = reserve_size;
1410
1411 if (ehdr->e_type != ET_EXEC && align > qemu_real_host_page_size()) {
1412 align_size += align - 1;
1413 }
1414
1415 load_addr = target_mmap(load_addr, align_size, PROT_NONE,
1416 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
1417 (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0),
1418 -1, 0);
1419 if (load_addr == -1) {
1420 goto exit_mmap;
1421 }
1422
1423 if (align_size != reserve_size) {
1424 abi_ulong align_addr = ROUND_UP(load_addr, align);
1425 abi_ulong align_end = TARGET_PAGE_ALIGN(align_addr + reserve_size);
1426 abi_ulong load_end = TARGET_PAGE_ALIGN(load_addr + align_size);
1427
1428 if (align_addr != load_addr) {
1429 target_munmap(load_addr, align_addr - load_addr);
1430 }
1431 if (align_end != load_end) {
1432 target_munmap(align_end, load_end - align_end);
1433 }
1434 load_addr = align_addr;
1435 }
1436
1437 load_bias = load_addr - loaddr;
1438
1439 if (elf_is_fdpic(ehdr)) {
1440 struct elf32_fdpic_loadseg *loadsegs = info->loadsegs =
1441 g_malloc(sizeof(*loadsegs) * info->nsegs);
1442
1443 for (i = 0; i < ehdr->e_phnum; ++i) {
1444 switch (phdr[i].p_type) {
1445 case PT_DYNAMIC:
1446 info->pt_dynamic_addr = phdr[i].p_vaddr + load_bias;
1447 break;
1448 case PT_LOAD:
1449 loadsegs->addr = phdr[i].p_vaddr + load_bias;
1450 loadsegs->p_vaddr = phdr[i].p_vaddr;
1451 loadsegs->p_memsz = phdr[i].p_memsz;
1452 ++loadsegs;
1453 break;
1454 }
1455 }
1456 }
1457
1458 info->load_bias = load_bias;
1459 info->code_offset = load_bias;
1460 info->data_offset = load_bias;
1461 info->load_addr = load_addr;
1462 info->entry = ehdr->e_entry + load_bias;
1463 info->start_code = -1;
1464 info->end_code = 0;
1465 info->start_data = -1;
1466 info->end_data = 0;
1467 /* Usual start for brk is after all sections of the main executable. */
1468 info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias);
1469 info->elf_flags = ehdr->e_flags;
1470
1471 prot_exec = PROT_EXEC;
1472 #ifdef TARGET_AARCH64
1473 /*
1474 * If the BTI feature is present, this indicates that the executable
1475 * pages of the startup binary should be mapped with PROT_BTI, so that
1476 * branch targets are enforced.
1477 *
1478 * The startup binary is either the interpreter or the static executable.
1479 * The interpreter is responsible for all pages of a dynamic executable.
1480 *
1481 * Elf notes are backward compatible to older cpus.
1482 * Do not enable BTI unless it is supported.
1483 */
1484 if ((info->note_flags & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
1485 && (pinterp_name == NULL || *pinterp_name == 0)
1486 && cpu_isar_feature(aa64_bti, ARM_CPU(thread_cpu))) {
1487 prot_exec |= TARGET_PROT_BTI;
1488 }
1489 #endif
1490
1491 for (i = 0; i < ehdr->e_phnum; i++) {
1492 struct elf_phdr *eppnt = phdr + i;
1493 if (eppnt->p_type == PT_LOAD) {
1494 abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
1495 int elf_prot = 0;
1496
1497 if (eppnt->p_flags & PF_R) {
1498 elf_prot |= PROT_READ;
1499 }
1500 if (eppnt->p_flags & PF_W) {
1501 elf_prot |= PROT_WRITE;
1502 }
1503 if (eppnt->p_flags & PF_X) {
1504 elf_prot |= prot_exec;
1505 }
1506
1507 vaddr = load_bias + eppnt->p_vaddr;
1508 vaddr_po = vaddr & ~TARGET_PAGE_MASK;
1509 vaddr_ps = vaddr & TARGET_PAGE_MASK;
1510
1511 vaddr_ef = vaddr + eppnt->p_filesz;
1512 vaddr_em = vaddr + eppnt->p_memsz;
1513
1514 /*
1515 * Some segments may be completely empty, with a non-zero p_memsz
1516 * but no backing file segment.
1517 */
1518 if (eppnt->p_filesz != 0) {
1519 error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po,
1520 elf_prot, MAP_PRIVATE | MAP_FIXED,
1521 src, eppnt->p_offset - vaddr_po);
1522 if (error == -1) {
1523 goto exit_mmap;
1524 }
1525 }
1526
1527 /* If the load segment requests extra zeros (e.g. bss), map it. */
1528 if (vaddr_ef < vaddr_em &&
1529 !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) {
1530 goto exit_errmsg;
1531 }
1532
1533 /* Find the full program boundaries. */
1534 if (elf_prot & PROT_EXEC) {
1535 if (vaddr < info->start_code) {
1536 info->start_code = vaddr;
1537 }
1538 if (vaddr_ef > info->end_code) {
1539 info->end_code = vaddr_ef;
1540 }
1541 }
1542 if (elf_prot & PROT_WRITE) {
1543 if (vaddr < info->start_data) {
1544 info->start_data = vaddr;
1545 }
1546 if (vaddr_ef > info->end_data) {
1547 info->end_data = vaddr_ef;
1548 }
1549 }
1550 #ifdef TARGET_MIPS
1551 } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
1552 Mips_elf_abiflags_v0 abiflags;
1553
1554 if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags),
1555 src, &err)) {
1556 goto exit_errmsg;
1557 }
1558 bswap_mips_abiflags(&abiflags);
1559 info->fp_abi = abiflags.fp_abi;
1560 #endif
1561 }
1562 }
1563
1564 if (info->end_data == 0) {
1565 info->start_data = info->end_code;
1566 info->end_data = info->end_code;
1567 }
1568
1569 if (qemu_log_enabled()) {
1570 load_symbols(ehdr, src, load_bias);
1571 }
1572
1573 debuginfo_report_elf(image_name, src->fd, load_bias);
1574
1575 mmap_unlock();
1576
1577 close(src->fd);
1578 return;
1579
1580 exit_mmap:
1581 error_setg_errno(&err, errno, "Error mapping file");
1582 goto exit_errmsg;
1583 exit_errmsg:
1584 error_reportf_err(err, "%s: ", image_name);
1585 exit(-1);
1586 }
1587
load_elf_interp(const char * filename,struct image_info * info,char bprm_buf[BPRM_BUF_SIZE])1588 static void load_elf_interp(const char *filename, struct image_info *info,
1589 char bprm_buf[BPRM_BUF_SIZE])
1590 {
1591 struct elfhdr ehdr;
1592 ImageSource src;
1593 int fd, retval;
1594 Error *err = NULL;
1595
1596 fd = open(path(filename), O_RDONLY);
1597 if (fd < 0) {
1598 error_setg_file_open(&err, errno, filename);
1599 error_report_err(err);
1600 exit(-1);
1601 }
1602
1603 retval = read(fd, bprm_buf, BPRM_BUF_SIZE);
1604 if (retval < 0) {
1605 error_setg_errno(&err, errno, "Error reading file header");
1606 error_reportf_err(err, "%s: ", filename);
1607 exit(-1);
1608 }
1609
1610 src.fd = fd;
1611 src.cache = bprm_buf;
1612 src.cache_size = retval;
1613
1614 load_elf_image(filename, &src, info, &ehdr, NULL);
1615 }
1616
1617 #ifndef HAVE_VDSO_IMAGE_INFO
get_vdso_image_info(uint32_t elf_flags)1618 const VdsoImageInfo *get_vdso_image_info(uint32_t elf_flags)
1619 {
1620 #ifdef VDSO_HEADER
1621 #include VDSO_HEADER
1622 return &vdso_image_info;
1623 #else
1624 return NULL;
1625 #endif
1626 }
1627 #endif /* HAVE_VDSO_IMAGE_INFO */
1628
load_elf_vdso(struct image_info * info,const VdsoImageInfo * vdso)1629 static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso)
1630 {
1631 ImageSource src;
1632 struct elfhdr ehdr;
1633 abi_ulong load_bias, load_addr;
1634
1635 src.fd = -1;
1636 src.cache = vdso->image;
1637 src.cache_size = vdso->image_size;
1638
1639 load_elf_image("<internal-vdso>", &src, info, &ehdr, NULL);
1640 load_addr = info->load_addr;
1641 load_bias = info->load_bias;
1642
1643 /*
1644 * We need to relocate the VDSO image. The one built into the kernel
1645 * is built for a fixed address. The one built for QEMU is not, since
1646 * that requires close control of the guest address space.
1647 * We pre-processed the image to locate all of the addresses that need
1648 * to be updated.
1649 */
1650 for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) {
1651 abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]);
1652 *addr = tswapal(tswapal(*addr) + load_bias);
1653 }
1654
1655 /* Install signal trampolines, if present. */
1656 if (vdso->sigreturn_ofs) {
1657 default_sigreturn = load_addr + vdso->sigreturn_ofs;
1658 }
1659 if (vdso->rt_sigreturn_ofs) {
1660 default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs;
1661 }
1662 if (vdso->sigreturn_region_start_ofs) {
1663 vdso_sigreturn_region_start =
1664 load_addr + vdso->sigreturn_region_start_ofs;
1665 vdso_sigreturn_region_end = load_addr + vdso->sigreturn_region_end_ofs;
1666 }
1667
1668 /* Remove write from VDSO segment. */
1669 target_mprotect(info->start_data, info->end_data - info->start_data,
1670 PROT_READ | PROT_EXEC);
1671 }
1672
symfind(const void * s0,const void * s1)1673 static int symfind(const void *s0, const void *s1)
1674 {
1675 struct elf_sym *sym = (struct elf_sym *)s1;
1676 __typeof(sym->st_value) addr = *(uint64_t *)s0;
1677 int result = 0;
1678
1679 if (addr < sym->st_value) {
1680 result = -1;
1681 } else if (addr >= sym->st_value + sym->st_size) {
1682 result = 1;
1683 }
1684 return result;
1685 }
1686
lookup_symbolxx(struct syminfo * s,uint64_t orig_addr)1687 static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
1688 {
1689 #if ELF_CLASS == ELFCLASS32
1690 struct elf_sym *syms = s->disas_symtab.elf32;
1691 #else
1692 struct elf_sym *syms = s->disas_symtab.elf64;
1693 #endif
1694
1695 // binary search
1696 struct elf_sym *sym;
1697
1698 sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), symfind);
1699 if (sym != NULL) {
1700 return s->disas_strtab + sym->st_name;
1701 }
1702
1703 return "";
1704 }
1705
1706 /* FIXME: This should use elf_ops.h.inc */
symcmp(const void * s0,const void * s1)1707 static int symcmp(const void *s0, const void *s1)
1708 {
1709 struct elf_sym *sym0 = (struct elf_sym *)s0;
1710 struct elf_sym *sym1 = (struct elf_sym *)s1;
1711 return (sym0->st_value < sym1->st_value)
1712 ? -1
1713 : ((sym0->st_value > sym1->st_value) ? 1 : 0);
1714 }
1715
1716 /* Best attempt to load symbols from this ELF object. */
load_symbols(struct elfhdr * hdr,const ImageSource * src,abi_ulong load_bias)1717 static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
1718 abi_ulong load_bias)
1719 {
1720 int i, shnum, nsyms, sym_idx = 0, str_idx = 0;
1721 g_autofree struct elf_shdr *shdr = NULL;
1722 char *strings = NULL;
1723 struct elf_sym *syms = NULL;
1724 struct elf_sym *new_syms;
1725 uint64_t segsz;
1726
1727 shnum = hdr->e_shnum;
1728 shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr),
1729 src, NULL);
1730 if (shdr == NULL) {
1731 return;
1732 }
1733
1734 bswap_shdr(shdr, shnum);
1735 for (i = 0; i < shnum; ++i) {
1736 if (shdr[i].sh_type == SHT_SYMTAB) {
1737 sym_idx = i;
1738 str_idx = shdr[i].sh_link;
1739 goto found;
1740 }
1741 }
1742
1743 /* There will be no symbol table if the file was stripped. */
1744 return;
1745
1746 found:
1747 /* Now know where the strtab and symtab are. Snarf them. */
1748
1749 segsz = shdr[str_idx].sh_size;
1750 strings = g_try_malloc(segsz);
1751 if (!strings) {
1752 goto give_up;
1753 }
1754 if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) {
1755 goto give_up;
1756 }
1757
1758 segsz = shdr[sym_idx].sh_size;
1759 if (segsz / sizeof(struct elf_sym) > INT_MAX) {
1760 /*
1761 * Implausibly large symbol table: give up rather than ploughing
1762 * on with the number of symbols calculation overflowing.
1763 */
1764 goto give_up;
1765 }
1766 nsyms = segsz / sizeof(struct elf_sym);
1767 syms = g_try_malloc(segsz);
1768 if (!syms) {
1769 goto give_up;
1770 }
1771 if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) {
1772 goto give_up;
1773 }
1774
1775 for (i = 0; i < nsyms; ) {
1776 bswap_sym(syms + i);
1777 /* Throw away entries which we do not need. */
1778 if (syms[i].st_shndx == SHN_UNDEF
1779 || syms[i].st_shndx >= SHN_LORESERVE
1780 || ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) {
1781 if (i < --nsyms) {
1782 syms[i] = syms[nsyms];
1783 }
1784 } else {
1785 #if defined(TARGET_ARM) || defined (TARGET_MIPS)
1786 /* The bottom address bit marks a Thumb or MIPS16 symbol. */
1787 syms[i].st_value &= ~(target_ulong)1;
1788 #endif
1789 syms[i].st_value += load_bias;
1790 i++;
1791 }
1792 }
1793
1794 /* No "useful" symbol. */
1795 if (nsyms == 0) {
1796 goto give_up;
1797 }
1798
1799 /*
1800 * Attempt to free the storage associated with the local symbols
1801 * that we threw away. Whether or not this has any effect on the
1802 * memory allocation depends on the malloc implementation and how
1803 * many symbols we managed to discard.
1804 */
1805 new_syms = g_try_renew(struct elf_sym, syms, nsyms);
1806 if (new_syms == NULL) {
1807 goto give_up;
1808 }
1809 syms = new_syms;
1810
1811 qsort(syms, nsyms, sizeof(*syms), symcmp);
1812
1813 {
1814 struct syminfo *s = g_new(struct syminfo, 1);
1815
1816 s->disas_strtab = strings;
1817 s->disas_num_syms = nsyms;
1818 #if ELF_CLASS == ELFCLASS32
1819 s->disas_symtab.elf32 = syms;
1820 #else
1821 s->disas_symtab.elf64 = syms;
1822 #endif
1823 s->lookup_symbol = lookup_symbolxx;
1824 s->next = syminfos;
1825 syminfos = s;
1826 }
1827 return;
1828
1829 give_up:
1830 g_free(strings);
1831 g_free(syms);
1832 }
1833
get_elf_eflags(int fd)1834 uint32_t get_elf_eflags(int fd)
1835 {
1836 struct elfhdr ehdr;
1837 off_t offset;
1838 int ret;
1839
1840 /* Read ELF header */
1841 offset = lseek(fd, 0, SEEK_SET);
1842 if (offset == (off_t) -1) {
1843 return 0;
1844 }
1845 ret = read(fd, &ehdr, sizeof(ehdr));
1846 if (ret < sizeof(ehdr)) {
1847 return 0;
1848 }
1849 offset = lseek(fd, offset, SEEK_SET);
1850 if (offset == (off_t) -1) {
1851 return 0;
1852 }
1853
1854 /* Check ELF signature */
1855 if (!elf_check_ident(&ehdr)) {
1856 return 0;
1857 }
1858
1859 /* check header */
1860 bswap_ehdr(&ehdr);
1861 if (!elf_check_ehdr(&ehdr)) {
1862 return 0;
1863 }
1864
1865 /* return architecture id */
1866 return ehdr.e_flags;
1867 }
1868
load_elf_binary(struct linux_binprm * bprm,struct image_info * info)1869 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
1870 {
1871 /*
1872 * We need a copy of the elf header for passing to create_elf_tables.
1873 * We will have overwritten the original when we re-use bprm->buf
1874 * while loading the interpreter. Allocate the storage for this now
1875 * and let elf_load_image do any swapping that may be required.
1876 */
1877 struct elfhdr ehdr;
1878 struct image_info interp_info, vdso_info;
1879 char *elf_interpreter = NULL;
1880 char *scratch;
1881
1882 memset(&interp_info, 0, sizeof(interp_info));
1883 #ifdef TARGET_MIPS
1884 interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN;
1885 #endif
1886
1887 load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter);
1888
1889 /* Do this so that we can load the interpreter, if need be. We will
1890 change some of these later */
1891 bprm->p = setup_arg_pages(bprm, info);
1892
1893 scratch = g_new0(char, TARGET_PAGE_SIZE);
1894 if (STACK_GROWS_DOWN) {
1895 bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
1896 bprm->p, info->stack_limit);
1897 info->file_string = bprm->p;
1898 bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch,
1899 bprm->p, info->stack_limit);
1900 info->env_strings = bprm->p;
1901 bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch,
1902 bprm->p, info->stack_limit);
1903 info->arg_strings = bprm->p;
1904 } else {
1905 info->arg_strings = bprm->p;
1906 bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch,
1907 bprm->p, info->stack_limit);
1908 info->env_strings = bprm->p;
1909 bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch,
1910 bprm->p, info->stack_limit);
1911 info->file_string = bprm->p;
1912 bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
1913 bprm->p, info->stack_limit);
1914 }
1915
1916 g_free(scratch);
1917
1918 if (!bprm->p) {
1919 fprintf(stderr, "%s: %s\n", bprm->filename, strerror(E2BIG));
1920 exit(-1);
1921 }
1922
1923 if (elf_interpreter) {
1924 load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
1925
1926 /*
1927 * While unusual because of ELF_ET_DYN_BASE, if we are unlucky
1928 * with the mappings the interpreter can be loaded above but
1929 * near the main executable, which can leave very little room
1930 * for the heap.
1931 * If the current brk has less than 16MB, use the end of the
1932 * interpreter.
1933 */
1934 if (interp_info.brk > info->brk &&
1935 interp_info.load_bias - info->brk < 16 * MiB) {
1936 info->brk = interp_info.brk;
1937 }
1938
1939 /* If the program interpreter is one of these two, then assume
1940 an iBCS2 image. Otherwise assume a native linux image. */
1941
1942 if (strcmp(elf_interpreter, "/usr/lib/libc.so.1") == 0
1943 || strcmp(elf_interpreter, "/usr/lib/ld.so.1") == 0) {
1944 info->personality = PER_SVR4;
1945
1946 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1947 and some applications "depend" upon this behavior. Since
1948 we do not have the power to recompile these, we emulate
1949 the SVr4 behavior. Sigh. */
1950 target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC,
1951 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS,
1952 -1, 0);
1953 }
1954 #ifdef TARGET_MIPS
1955 info->interp_fp_abi = interp_info.fp_abi;
1956 #endif
1957 }
1958
1959 /*
1960 * Load a vdso if available, which will amongst other things contain the
1961 * signal trampolines. Otherwise, allocate a separate page for them.
1962 */
1963 const VdsoImageInfo *vdso = get_vdso_image_info(info->elf_flags);
1964 if (vdso) {
1965 load_elf_vdso(&vdso_info, vdso);
1966 info->vdso = vdso_info.load_bias;
1967 } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
1968 abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE,
1969 PROT_READ | PROT_WRITE,
1970 MAP_PRIVATE | MAP_ANON, -1, 0);
1971 if (tramp_page == -1) {
1972 return -errno;
1973 }
1974
1975 setup_sigtramp(tramp_page);
1976 target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC);
1977 vdso_sigreturn_region_start = tramp_page;
1978 vdso_sigreturn_region_end = tramp_page + TARGET_PAGE_SIZE;
1979 }
1980
1981 bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info,
1982 elf_interpreter ? &interp_info : NULL,
1983 vdso ? &vdso_info : NULL);
1984 info->start_stack = bprm->p;
1985
1986 /* If we have an interpreter, set that as the program's entry point.
1987 Copy the load_bias as well, to help PPC64 interpret the entry
1988 point as a function descriptor. Do this after creating elf tables
1989 so that we copy the original program entry point into the AUXV. */
1990 if (elf_interpreter) {
1991 info->load_bias = interp_info.load_bias;
1992 info->entry = interp_info.entry;
1993 g_free(elf_interpreter);
1994 }
1995
1996 #ifdef HAVE_ELF_CORE_DUMP
1997 bprm->core_dump = &elf_core_dump;
1998 #endif
1999
2000 return 0;
2001 }
2002
2003 #ifdef HAVE_ELF_CORE_DUMP
2004
2005 /*
2006 * Definitions to generate Intel SVR4-like core files.
2007 * These mostly have the same names as the SVR4 types with "target_elf_"
2008 * tacked on the front to prevent clashes with linux definitions,
2009 * and the typedef forms have been avoided. This is mostly like
2010 * the SVR4 structure, but more Linuxy, with things that Linux does
2011 * not support and which gdb doesn't really use excluded.
2012 *
2013 * Fields we don't dump (their contents is zero) in linux-user qemu
2014 * are marked with XXX.
2015 *
2016 * Core dump code is copied from linux kernel (fs/binfmt_elf.c).
2017 *
2018 * Porting ELF coredump for target is (quite) simple process. First you
2019 * define HAVE_ELF_CORE_DUMP in target ELF code (where init_thread() for
2020 * the target resides):
2021 *
2022 * #define HAVE_ELF_CORE_DUMP
2023 *
2024 * Next you define type of register set used for dumping:
2025 * typedef struct target_elf_gregset_t { ... } target_elf_gregset_t;
2026 *
2027 * Last step is to implement target specific function that copies registers
2028 * from given cpu into just specified register set. Prototype is:
2029 *
2030 * void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUArchState *env);
2031 *
2032 * Parameters:
2033 * regs - copy register values into here (allocated and zeroed by caller)
2034 * env - copy registers from here
2035 *
2036 * Example for ARM target is provided in this file.
2037 */
2038
2039 struct target_elf_siginfo {
2040 abi_int si_signo; /* signal number */
2041 abi_int si_code; /* extra code */
2042 abi_int si_errno; /* errno */
2043 };
2044
2045 struct target_elf_prstatus {
2046 struct target_elf_siginfo pr_info; /* Info associated with signal */
2047 abi_short pr_cursig; /* Current signal */
2048 abi_ulong pr_sigpend; /* XXX */
2049 abi_ulong pr_sighold; /* XXX */
2050 target_pid_t pr_pid;
2051 target_pid_t pr_ppid;
2052 target_pid_t pr_pgrp;
2053 target_pid_t pr_sid;
2054 struct target_timeval pr_utime; /* XXX User time */
2055 struct target_timeval pr_stime; /* XXX System time */
2056 struct target_timeval pr_cutime; /* XXX Cumulative user time */
2057 struct target_timeval pr_cstime; /* XXX Cumulative system time */
2058 target_elf_gregset_t pr_reg; /* GP registers */
2059 abi_int pr_fpvalid; /* XXX */
2060 };
2061
2062 #define ELF_PRARGSZ (80) /* Number of chars for args */
2063
2064 struct target_elf_prpsinfo {
2065 char pr_state; /* numeric process state */
2066 char pr_sname; /* char for pr_state */
2067 char pr_zomb; /* zombie */
2068 char pr_nice; /* nice val */
2069 abi_ulong pr_flag; /* flags */
2070 target_uid_t pr_uid;
2071 target_gid_t pr_gid;
2072 target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
2073 /* Lots missing */
2074 char pr_fname[16] QEMU_NONSTRING; /* filename of executable */
2075 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
2076 };
2077
bswap_prstatus(struct target_elf_prstatus * prstatus)2078 static void bswap_prstatus(struct target_elf_prstatus *prstatus)
2079 {
2080 if (!target_needs_bswap()) {
2081 return;
2082 }
2083
2084 prstatus->pr_info.si_signo = tswap32(prstatus->pr_info.si_signo);
2085 prstatus->pr_info.si_code = tswap32(prstatus->pr_info.si_code);
2086 prstatus->pr_info.si_errno = tswap32(prstatus->pr_info.si_errno);
2087 prstatus->pr_cursig = tswap16(prstatus->pr_cursig);
2088 prstatus->pr_sigpend = tswapal(prstatus->pr_sigpend);
2089 prstatus->pr_sighold = tswapal(prstatus->pr_sighold);
2090 prstatus->pr_pid = tswap32(prstatus->pr_pid);
2091 prstatus->pr_ppid = tswap32(prstatus->pr_ppid);
2092 prstatus->pr_pgrp = tswap32(prstatus->pr_pgrp);
2093 prstatus->pr_sid = tswap32(prstatus->pr_sid);
2094 /* cpu times are not filled, so we skip them */
2095 /* regs should be in correct format already */
2096 prstatus->pr_fpvalid = tswap32(prstatus->pr_fpvalid);
2097 }
2098
bswap_psinfo(struct target_elf_prpsinfo * psinfo)2099 static void bswap_psinfo(struct target_elf_prpsinfo *psinfo)
2100 {
2101 if (!target_needs_bswap()) {
2102 return;
2103 }
2104
2105 psinfo->pr_flag = tswapal(psinfo->pr_flag);
2106 psinfo->pr_uid = tswap16(psinfo->pr_uid);
2107 psinfo->pr_gid = tswap16(psinfo->pr_gid);
2108 psinfo->pr_pid = tswap32(psinfo->pr_pid);
2109 psinfo->pr_ppid = tswap32(psinfo->pr_ppid);
2110 psinfo->pr_pgrp = tswap32(psinfo->pr_pgrp);
2111 psinfo->pr_sid = tswap32(psinfo->pr_sid);
2112 }
2113
bswap_note(struct elf_note * en)2114 static void bswap_note(struct elf_note *en)
2115 {
2116 if (!target_needs_bswap()) {
2117 return;
2118 }
2119
2120 bswap32s(&en->n_namesz);
2121 bswap32s(&en->n_descsz);
2122 bswap32s(&en->n_type);
2123 }
2124
2125 /*
2126 * Calculate file (dump) size of given memory region.
2127 */
vma_dump_size(vaddr start,vaddr end,int flags)2128 static size_t vma_dump_size(vaddr start, vaddr end, int flags)
2129 {
2130 /* The area must be readable and dumpable. */
2131 if (!(flags & PAGE_READ) || (flags & PAGE_DONTDUMP)) {
2132 return 0;
2133 }
2134
2135 /*
2136 * Usually we don't dump executable pages as they contain
2137 * non-writable code that debugger can read directly from
2138 * target library etc. If there is no elf header, we dump it.
2139 */
2140 if (!(flags & PAGE_WRITE_ORG) &&
2141 (flags & PAGE_EXEC) &&
2142 memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) {
2143 return 0;
2144 }
2145
2146 return end - start;
2147 }
2148
size_note(const char * name,size_t datasz)2149 static size_t size_note(const char *name, size_t datasz)
2150 {
2151 size_t namesz = strlen(name) + 1;
2152
2153 namesz = ROUND_UP(namesz, 4);
2154 datasz = ROUND_UP(datasz, 4);
2155
2156 return sizeof(struct elf_note) + namesz + datasz;
2157 }
2158
fill_note(void ** pptr,int type,const char * name,size_t datasz)2159 static void *fill_note(void **pptr, int type, const char *name, size_t datasz)
2160 {
2161 void *ptr = *pptr;
2162 struct elf_note *n = ptr;
2163 size_t namesz = strlen(name) + 1;
2164
2165 n->n_namesz = namesz;
2166 n->n_descsz = datasz;
2167 n->n_type = type;
2168 bswap_note(n);
2169
2170 ptr += sizeof(*n);
2171 memcpy(ptr, name, namesz);
2172
2173 namesz = ROUND_UP(namesz, 4);
2174 datasz = ROUND_UP(datasz, 4);
2175
2176 *pptr = ptr + namesz + datasz;
2177 return ptr + namesz;
2178 }
2179
fill_elf_header(struct elfhdr * elf,int segs,uint16_t machine,uint32_t flags)2180 static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine,
2181 uint32_t flags)
2182 {
2183 memcpy(elf->e_ident, ELFMAG, SELFMAG);
2184
2185 elf->e_ident[EI_CLASS] = ELF_CLASS;
2186 elf->e_ident[EI_DATA] = ELF_DATA;
2187 elf->e_ident[EI_VERSION] = EV_CURRENT;
2188 elf->e_ident[EI_OSABI] = ELF_OSABI;
2189
2190 elf->e_type = ET_CORE;
2191 elf->e_machine = machine;
2192 elf->e_version = EV_CURRENT;
2193 elf->e_phoff = sizeof(struct elfhdr);
2194 elf->e_flags = flags;
2195 elf->e_ehsize = sizeof(struct elfhdr);
2196 elf->e_phentsize = sizeof(struct elf_phdr);
2197 elf->e_phnum = segs;
2198
2199 bswap_ehdr(elf);
2200 }
2201
fill_elf_note_phdr(struct elf_phdr * phdr,size_t sz,off_t offset)2202 static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset)
2203 {
2204 phdr->p_type = PT_NOTE;
2205 phdr->p_offset = offset;
2206 phdr->p_filesz = sz;
2207
2208 bswap_phdr(phdr, 1);
2209 }
2210
fill_prstatus_note(void * data,CPUState * cpu,int signr)2211 static void fill_prstatus_note(void *data, CPUState *cpu, int signr)
2212 {
2213 /*
2214 * Because note memory is only aligned to 4, and target_elf_prstatus
2215 * may well have higher alignment requirements, fill locally and
2216 * memcpy to the destination afterward.
2217 */
2218 struct target_elf_prstatus prstatus = {
2219 .pr_info.si_signo = signr,
2220 .pr_cursig = signr,
2221 .pr_pid = get_task_state(cpu)->ts_tid,
2222 .pr_ppid = getppid(),
2223 .pr_pgrp = getpgrp(),
2224 .pr_sid = getsid(0),
2225 };
2226
2227 elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu));
2228 bswap_prstatus(&prstatus);
2229 memcpy(data, &prstatus, sizeof(prstatus));
2230 }
2231
fill_prpsinfo_note(void * data,const TaskState * ts)2232 static void fill_prpsinfo_note(void *data, const TaskState *ts)
2233 {
2234 /*
2235 * Because note memory is only aligned to 4, and target_elf_prpsinfo
2236 * may well have higher alignment requirements, fill locally and
2237 * memcpy to the destination afterward.
2238 */
2239 struct target_elf_prpsinfo psinfo = {
2240 .pr_pid = getpid(),
2241 .pr_ppid = getppid(),
2242 .pr_pgrp = getpgrp(),
2243 .pr_sid = getsid(0),
2244 .pr_uid = getuid(),
2245 .pr_gid = getgid(),
2246 };
2247 char *base_filename;
2248 size_t len;
2249
2250 len = ts->info->env_strings - ts->info->arg_strings;
2251 len = MIN(len, ELF_PRARGSZ);
2252 memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len);
2253 for (size_t i = 0; i < len; i++) {
2254 if (psinfo.pr_psargs[i] == 0) {
2255 psinfo.pr_psargs[i] = ' ';
2256 }
2257 }
2258
2259 base_filename = g_path_get_basename(ts->bprm->filename);
2260 /*
2261 * Using strncpy here is fine: at max-length,
2262 * this field is not NUL-terminated.
2263 */
2264 strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname));
2265 g_free(base_filename);
2266
2267 bswap_psinfo(&psinfo);
2268 memcpy(data, &psinfo, sizeof(psinfo));
2269 }
2270
fill_auxv_note(void * data,const TaskState * ts)2271 static void fill_auxv_note(void *data, const TaskState *ts)
2272 {
2273 memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len);
2274 }
2275
2276 /*
2277 * Constructs name of coredump file. We have following convention
2278 * for the name:
2279 * qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
2280 *
2281 * Returns the filename
2282 */
core_dump_filename(const TaskState * ts)2283 static char *core_dump_filename(const TaskState *ts)
2284 {
2285 g_autoptr(GDateTime) now = g_date_time_new_now_local();
2286 g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S");
2287 g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename);
2288
2289 return g_strdup_printf("qemu_%s_%s_%d.core",
2290 base_filename, nowstr, (int)getpid());
2291 }
2292
dump_write(int fd,const void * ptr,size_t size)2293 static int dump_write(int fd, const void *ptr, size_t size)
2294 {
2295 const char *bufp = (const char *)ptr;
2296 ssize_t bytes_written, bytes_left;
2297
2298 bytes_written = 0;
2299 bytes_left = size;
2300
2301 /*
2302 * In normal conditions, single write(2) should do but
2303 * in case of socket etc. this mechanism is more portable.
2304 */
2305 do {
2306 bytes_written = write(fd, bufp, bytes_left);
2307 if (bytes_written < 0) {
2308 if (errno == EINTR)
2309 continue;
2310 return (-1);
2311 } else if (bytes_written == 0) { /* eof */
2312 return (-1);
2313 }
2314 bufp += bytes_written;
2315 bytes_left -= bytes_written;
2316 } while (bytes_left > 0);
2317
2318 return (0);
2319 }
2320
wmr_page_unprotect_regions(void * opaque,vaddr start,vaddr end,int flags)2321 static int wmr_page_unprotect_regions(void *opaque, vaddr start,
2322 vaddr end, int flags)
2323 {
2324 if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) {
2325 size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size());
2326
2327 while (1) {
2328 page_unprotect(NULL, start, 0);
2329 if (end - start <= step) {
2330 break;
2331 }
2332 start += step;
2333 }
2334 }
2335 return 0;
2336 }
2337
2338 typedef struct {
2339 unsigned count;
2340 size_t size;
2341 } CountAndSizeRegions;
2342
wmr_count_and_size_regions(void * opaque,vaddr start,vaddr end,int flags)2343 static int wmr_count_and_size_regions(void *opaque, vaddr start,
2344 vaddr end, int flags)
2345 {
2346 CountAndSizeRegions *css = opaque;
2347
2348 css->count++;
2349 css->size += vma_dump_size(start, end, flags);
2350 return 0;
2351 }
2352
2353 typedef struct {
2354 struct elf_phdr *phdr;
2355 off_t offset;
2356 } FillRegionPhdr;
2357
wmr_fill_region_phdr(void * opaque,vaddr start,vaddr end,int flags)2358 static int wmr_fill_region_phdr(void *opaque, vaddr start,
2359 vaddr end, int flags)
2360 {
2361 FillRegionPhdr *d = opaque;
2362 struct elf_phdr *phdr = d->phdr;
2363
2364 phdr->p_type = PT_LOAD;
2365 phdr->p_vaddr = start;
2366 phdr->p_paddr = 0;
2367 phdr->p_filesz = vma_dump_size(start, end, flags);
2368 phdr->p_offset = d->offset;
2369 d->offset += phdr->p_filesz;
2370 phdr->p_memsz = end - start;
2371 phdr->p_flags = (flags & PAGE_READ ? PF_R : 0)
2372 | (flags & PAGE_WRITE_ORG ? PF_W : 0)
2373 | (flags & PAGE_EXEC ? PF_X : 0);
2374 phdr->p_align = TARGET_PAGE_SIZE;
2375
2376 bswap_phdr(phdr, 1);
2377 d->phdr = phdr + 1;
2378 return 0;
2379 }
2380
wmr_write_region(void * opaque,vaddr start,vaddr end,int flags)2381 static int wmr_write_region(void *opaque, vaddr start,
2382 vaddr end, int flags)
2383 {
2384 int fd = *(int *)opaque;
2385 size_t size = vma_dump_size(start, end, flags);
2386
2387 if (!size) {
2388 return 0;
2389 }
2390 return dump_write(fd, g2h_untagged(start), size);
2391 }
2392
2393 /*
2394 * Write out ELF coredump.
2395 *
2396 * See documentation of ELF object file format in:
2397 * http://www.caldera.com/developers/devspecs/gabi41.pdf
2398 *
2399 * Coredump format in linux is following:
2400 *
2401 * 0 +----------------------+ \
2402 * | ELF header | ET_CORE |
2403 * +----------------------+ |
2404 * | ELF program headers | |--- headers
2405 * | - NOTE section | |
2406 * | - PT_LOAD sections | |
2407 * +----------------------+ /
2408 * | NOTEs: |
2409 * | - NT_PRSTATUS |
2410 * | - NT_PRSINFO |
2411 * | - NT_AUXV |
2412 * +----------------------+ <-- aligned to target page
2413 * | Process memory dump |
2414 * : :
2415 * . .
2416 * : :
2417 * | |
2418 * +----------------------+
2419 *
2420 * NT_PRSTATUS -> struct elf_prstatus (per thread)
2421 * NT_PRSINFO -> struct elf_prpsinfo
2422 * NT_AUXV is array of { type, value } pairs (see fill_auxv_note()).
2423 *
2424 * Format follows System V format as close as possible. Current
2425 * version limitations are as follows:
2426 * - no floating point registers are dumped
2427 *
2428 * Function returns 0 in case of success, negative errno otherwise.
2429 *
2430 * TODO: make this work also during runtime: it should be
2431 * possible to force coredump from running process and then
2432 * continue processing. For example qemu could set up SIGUSR2
2433 * handler (provided that target process haven't registered
2434 * handler for that) that does the dump when signal is received.
2435 */
elf_core_dump(int signr,const CPUArchState * env)2436 static int elf_core_dump(int signr, const CPUArchState *env)
2437 {
2438 const CPUState *cpu = env_cpu_const(env);
2439 const TaskState *ts = (const TaskState *)get_task_state((CPUState *)cpu);
2440 struct rlimit dumpsize;
2441 CountAndSizeRegions css;
2442 off_t offset, note_offset, data_offset;
2443 size_t note_size;
2444 int cpus, ret;
2445 int fd = -1;
2446 CPUState *cpu_iter;
2447
2448 if (prctl(PR_GET_DUMPABLE) == 0) {
2449 return 0;
2450 }
2451
2452 if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) {
2453 return 0;
2454 }
2455
2456 cpu_list_lock();
2457 mmap_lock();
2458
2459 /* By unprotecting, we merge vmas that might be split. */
2460 walk_memory_regions(NULL, wmr_page_unprotect_regions);
2461
2462 /*
2463 * Walk through target process memory mappings and
2464 * set up structure containing this information.
2465 */
2466 memset(&css, 0, sizeof(css));
2467 walk_memory_regions(&css, wmr_count_and_size_regions);
2468
2469 cpus = 0;
2470 CPU_FOREACH(cpu_iter) {
2471 cpus++;
2472 }
2473
2474 offset = sizeof(struct elfhdr);
2475 offset += (css.count + 1) * sizeof(struct elf_phdr);
2476 note_offset = offset;
2477
2478 offset += size_note("CORE", ts->info->auxv_len);
2479 offset += size_note("CORE", sizeof(struct target_elf_prpsinfo));
2480 offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus;
2481 note_size = offset - note_offset;
2482 data_offset = TARGET_PAGE_ALIGN(offset);
2483
2484 /* Do not dump if the corefile size exceeds the limit. */
2485 if (dumpsize.rlim_cur != RLIM_INFINITY
2486 && dumpsize.rlim_cur < data_offset + css.size) {
2487 errno = 0;
2488 goto out;
2489 }
2490
2491 {
2492 g_autofree char *corefile = core_dump_filename(ts);
2493 fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC,
2494 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
2495 }
2496 if (fd < 0) {
2497 goto out;
2498 }
2499
2500 /*
2501 * There is a fair amount of alignment padding within the notes
2502 * as well as preceeding the process memory. Allocate a zeroed
2503 * block to hold it all. Write all of the headers directly into
2504 * this buffer and then write it out as a block.
2505 */
2506 {
2507 g_autofree void *header = g_malloc0(data_offset);
2508 FillRegionPhdr frp;
2509 void *hptr, *dptr;
2510
2511 /* Create elf file header. */
2512 hptr = header;
2513 fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0);
2514 hptr += sizeof(struct elfhdr);
2515
2516 /* Create elf program headers. */
2517 fill_elf_note_phdr(hptr, note_size, note_offset);
2518 hptr += sizeof(struct elf_phdr);
2519
2520 frp.phdr = hptr;
2521 frp.offset = data_offset;
2522 walk_memory_regions(&frp, wmr_fill_region_phdr);
2523 hptr = frp.phdr;
2524
2525 /* Create the notes. */
2526 dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len);
2527 fill_auxv_note(dptr, ts);
2528
2529 dptr = fill_note(&hptr, NT_PRPSINFO, "CORE",
2530 sizeof(struct target_elf_prpsinfo));
2531 fill_prpsinfo_note(dptr, ts);
2532
2533 CPU_FOREACH(cpu_iter) {
2534 dptr = fill_note(&hptr, NT_PRSTATUS, "CORE",
2535 sizeof(struct target_elf_prstatus));
2536 fill_prstatus_note(dptr, cpu_iter, cpu_iter == cpu ? signr : 0);
2537 }
2538
2539 if (dump_write(fd, header, data_offset) < 0) {
2540 goto out;
2541 }
2542 }
2543
2544 /*
2545 * Finally write process memory into the corefile as well.
2546 */
2547 if (walk_memory_regions(&fd, wmr_write_region) < 0) {
2548 goto out;
2549 }
2550 errno = 0;
2551
2552 out:
2553 ret = -errno;
2554 mmap_unlock();
2555 cpu_list_unlock();
2556 if (fd >= 0) {
2557 close(fd);
2558 }
2559 return ret;
2560 }
2561 #endif /* HAVE_ELF_CORE_DUMP */
2562