xref: /openbmc/qemu/linux-user/elfload.c (revision 2ffaa3f70dfbf07a3bdb1035fb4ba4e0f7f94de1)
1 /* This is the Linux kernel elf-loading code, ported into user space */
2 #include "qemu/osdep.h"
3 #include <sys/param.h>
4 
5 #include <sys/prctl.h>
6 #include <sys/resource.h>
7 #include <sys/shm.h>
8 
9 #include "qemu.h"
10 #include "user/tswap-target.h"
11 #include "user/page-protection.h"
12 #include "exec/page-protection.h"
13 #include "exec/mmap-lock.h"
14 #include "exec/translation-block.h"
15 #include "exec/tswap.h"
16 #include "user/guest-base.h"
17 #include "user-internals.h"
18 #include "signal-common.h"
19 #include "loader.h"
20 #include "user-mmap.h"
21 #include "disas/disas.h"
22 #include "qemu/bitops.h"
23 #include "qemu/path.h"
24 #include "qemu/queue.h"
25 #include "qemu/guest-random.h"
26 #include "qemu/units.h"
27 #include "qemu/selfmap.h"
28 #include "qemu/lockable.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "target_elf.h"
32 #include "target_signal.h"
33 #include "tcg/debuginfo.h"
34 
35 #ifdef TARGET_ARM
36 #include "target/arm/cpu-features.h"
37 #endif
38 
39 #ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE
40 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
41 #endif
42 
43 typedef struct {
44     const uint8_t *image;
45     const uint32_t *relocs;
46     unsigned image_size;
47     unsigned reloc_count;
48     unsigned sigreturn_ofs;
49     unsigned rt_sigreturn_ofs;
50 } VdsoImageInfo;
51 
52 #define ELF_OSABI   ELFOSABI_SYSV
53 
54 /* from personality.h */
55 
56 /*
57  * Flags for bug emulation.
58  *
59  * These occupy the top three bytes.
60  */
61 enum {
62     ADDR_NO_RANDOMIZE = 0x0040000,      /* disable randomization of VA space */
63     FDPIC_FUNCPTRS =    0x0080000,      /* userspace function ptrs point to
64                                            descriptors (signal handling) */
65     MMAP_PAGE_ZERO =    0x0100000,
66     ADDR_COMPAT_LAYOUT = 0x0200000,
67     READ_IMPLIES_EXEC = 0x0400000,
68     ADDR_LIMIT_32BIT =  0x0800000,
69     SHORT_INODE =       0x1000000,
70     WHOLE_SECONDS =     0x2000000,
71     STICKY_TIMEOUTS =   0x4000000,
72     ADDR_LIMIT_3GB =    0x8000000,
73 };
74 
75 /*
76  * Personality types.
77  *
78  * These go in the low byte.  Avoid using the top bit, it will
79  * conflict with error returns.
80  */
81 enum {
82     PER_LINUX =         0x0000,
83     PER_LINUX_32BIT =   0x0000 | ADDR_LIMIT_32BIT,
84     PER_LINUX_FDPIC =   0x0000 | FDPIC_FUNCPTRS,
85     PER_SVR4 =          0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
86     PER_SVR3 =          0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
87     PER_SCOSVR3 =       0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE,
88     PER_OSR5 =          0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
89     PER_WYSEV386 =      0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
90     PER_ISCR4 =         0x0005 | STICKY_TIMEOUTS,
91     PER_BSD =           0x0006,
92     PER_SUNOS =         0x0006 | STICKY_TIMEOUTS,
93     PER_XENIX =         0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
94     PER_LINUX32 =       0x0008,
95     PER_LINUX32_3GB =   0x0008 | ADDR_LIMIT_3GB,
96     PER_IRIX32 =        0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */
97     PER_IRIXN32 =       0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */
98     PER_IRIX64 =        0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */
99     PER_RISCOS =        0x000c,
100     PER_SOLARIS =       0x000d | STICKY_TIMEOUTS,
101     PER_UW7 =           0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
102     PER_OSF4 =          0x000f,                  /* OSF/1 v4 */
103     PER_HPUX =          0x0010,
104     PER_MASK =          0x00ff,
105 };
106 
107 /*
108  * Return the base personality without flags.
109  */
110 #define personality(pers)       (pers & PER_MASK)
111 
112 int info_is_fdpic(struct image_info *info)
113 {
114     return info->personality == PER_LINUX_FDPIC;
115 }
116 
117 /* this flag is uneffective under linux too, should be deleted */
118 #ifndef MAP_DENYWRITE
119 #define MAP_DENYWRITE 0
120 #endif
121 
122 /* should probably go in elf.h */
123 #ifndef ELIBBAD
124 #define ELIBBAD 80
125 #endif
126 
127 #if TARGET_BIG_ENDIAN
128 #define ELF_DATA        ELFDATA2MSB
129 #else
130 #define ELF_DATA        ELFDATA2LSB
131 #endif
132 
133 #ifdef TARGET_ABI_MIPSN32
134 typedef abi_ullong      target_elf_greg_t;
135 #define tswapreg(ptr)   tswap64(ptr)
136 #else
137 typedef abi_ulong       target_elf_greg_t;
138 #define tswapreg(ptr)   tswapal(ptr)
139 #endif
140 
141 #ifdef USE_UID16
142 typedef abi_ushort      target_uid_t;
143 typedef abi_ushort      target_gid_t;
144 #else
145 typedef abi_uint        target_uid_t;
146 typedef abi_uint        target_gid_t;
147 #endif
148 typedef abi_int         target_pid_t;
149 
150 #ifdef TARGET_I386
151 
152 #define HAVE_INIT_MAIN_THREAD
153 
154 #ifdef TARGET_X86_64
155 #define ELF_CLASS      ELFCLASS64
156 #define ELF_ARCH       EM_X86_64
157 
158 #define ELF_NREG    27
159 typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
160 
161 /*
162  * Note that ELF_NREG should be 29 as there should be place for
163  * TRAPNO and ERR "registers" as well but linux doesn't dump
164  * those.
165  *
166  * See linux kernel: arch/x86/include/asm/elf.h
167  */
168 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env)
169 {
170     (*regs)[0] = tswapreg(env->regs[15]);
171     (*regs)[1] = tswapreg(env->regs[14]);
172     (*regs)[2] = tswapreg(env->regs[13]);
173     (*regs)[3] = tswapreg(env->regs[12]);
174     (*regs)[4] = tswapreg(env->regs[R_EBP]);
175     (*regs)[5] = tswapreg(env->regs[R_EBX]);
176     (*regs)[6] = tswapreg(env->regs[11]);
177     (*regs)[7] = tswapreg(env->regs[10]);
178     (*regs)[8] = tswapreg(env->regs[9]);
179     (*regs)[9] = tswapreg(env->regs[8]);
180     (*regs)[10] = tswapreg(env->regs[R_EAX]);
181     (*regs)[11] = tswapreg(env->regs[R_ECX]);
182     (*regs)[12] = tswapreg(env->regs[R_EDX]);
183     (*regs)[13] = tswapreg(env->regs[R_ESI]);
184     (*regs)[14] = tswapreg(env->regs[R_EDI]);
185     (*regs)[15] = tswapreg(get_task_state(env_cpu_const(env))->orig_ax);
186     (*regs)[16] = tswapreg(env->eip);
187     (*regs)[17] = tswapreg(env->segs[R_CS].selector & 0xffff);
188     (*regs)[18] = tswapreg(env->eflags);
189     (*regs)[19] = tswapreg(env->regs[R_ESP]);
190     (*regs)[20] = tswapreg(env->segs[R_SS].selector & 0xffff);
191     (*regs)[21] = tswapreg(env->segs[R_FS].selector & 0xffff);
192     (*regs)[22] = tswapreg(env->segs[R_GS].selector & 0xffff);
193     (*regs)[23] = tswapreg(env->segs[R_DS].selector & 0xffff);
194     (*regs)[24] = tswapreg(env->segs[R_ES].selector & 0xffff);
195     (*regs)[25] = tswapreg(env->segs[R_FS].selector & 0xffff);
196     (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff);
197 }
198 
199 #if ULONG_MAX > UINT32_MAX
200 #define INIT_GUEST_COMMPAGE
201 static bool init_guest_commpage(void)
202 {
203     /*
204      * The vsyscall page is at a high negative address aka kernel space,
205      * which means that we cannot actually allocate it with target_mmap.
206      * We still should be able to use page_set_flags, unless the user
207      * has specified -R reserved_va, which would trigger an assert().
208      */
209     if (reserved_va != 0 &&
210         TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE - 1 > reserved_va) {
211         error_report("Cannot allocate vsyscall page");
212         exit(EXIT_FAILURE);
213     }
214     page_set_flags(TARGET_VSYSCALL_PAGE,
215                    TARGET_VSYSCALL_PAGE | ~TARGET_PAGE_MASK,
216                    PAGE_EXEC | PAGE_VALID);
217     return true;
218 }
219 #endif
220 #else
221 
222 /*
223  * This is used to ensure we don't load something for the wrong architecture.
224  */
225 #define elf_check_arch(x) ( ((x) == EM_386) || ((x) == EM_486) )
226 
227 /*
228  * These are used to set parameters in the core dumps.
229  */
230 #define ELF_CLASS       ELFCLASS32
231 #define ELF_ARCH        EM_386
232 
233 #define EXSTACK_DEFAULT true
234 
235 #define ELF_NREG    17
236 typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
237 
238 /*
239  * Note that ELF_NREG should be 19 as there should be place for
240  * TRAPNO and ERR "registers" as well but linux doesn't dump
241  * those.
242  *
243  * See linux kernel: arch/x86/include/asm/elf.h
244  */
245 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env)
246 {
247     (*regs)[0] = tswapreg(env->regs[R_EBX]);
248     (*regs)[1] = tswapreg(env->regs[R_ECX]);
249     (*regs)[2] = tswapreg(env->regs[R_EDX]);
250     (*regs)[3] = tswapreg(env->regs[R_ESI]);
251     (*regs)[4] = tswapreg(env->regs[R_EDI]);
252     (*regs)[5] = tswapreg(env->regs[R_EBP]);
253     (*regs)[6] = tswapreg(env->regs[R_EAX]);
254     (*regs)[7] = tswapreg(env->segs[R_DS].selector & 0xffff);
255     (*regs)[8] = tswapreg(env->segs[R_ES].selector & 0xffff);
256     (*regs)[9] = tswapreg(env->segs[R_FS].selector & 0xffff);
257     (*regs)[10] = tswapreg(env->segs[R_GS].selector & 0xffff);
258     (*regs)[11] = tswapreg(get_task_state(env_cpu_const(env))->orig_ax);
259     (*regs)[12] = tswapreg(env->eip);
260     (*regs)[13] = tswapreg(env->segs[R_CS].selector & 0xffff);
261     (*regs)[14] = tswapreg(env->eflags);
262     (*regs)[15] = tswapreg(env->regs[R_ESP]);
263     (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff);
264 }
265 
266 /*
267  * i386 is the only target which supplies AT_SYSINFO for the vdso.
268  * All others only supply AT_SYSINFO_EHDR.
269  */
270 #define DLINFO_ARCH_ITEMS (vdso_info != NULL)
271 #define ARCH_DLINFO                                     \
272     do {                                                \
273         if (vdso_info) {                                \
274             NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry);  \
275         }                                               \
276     } while (0)
277 
278 #endif /* TARGET_X86_64 */
279 
280 #define VDSO_HEADER "vdso.c.inc"
281 
282 #define USE_ELF_CORE_DUMP
283 #define ELF_EXEC_PAGESIZE       4096
284 
285 #endif /* TARGET_I386 */
286 
287 #ifdef TARGET_ARM
288 
289 #ifndef TARGET_AARCH64
290 /* 32 bit ARM definitions */
291 
292 #define ELF_ARCH        EM_ARM
293 #define ELF_CLASS       ELFCLASS32
294 #define EXSTACK_DEFAULT true
295 
296 #define HAVE_INIT_MAIN_THREAD
297 
298 #define ELF_NREG    18
299 typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
300 
301 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUARMState *env)
302 {
303     (*regs)[0] = tswapreg(env->regs[0]);
304     (*regs)[1] = tswapreg(env->regs[1]);
305     (*regs)[2] = tswapreg(env->regs[2]);
306     (*regs)[3] = tswapreg(env->regs[3]);
307     (*regs)[4] = tswapreg(env->regs[4]);
308     (*regs)[5] = tswapreg(env->regs[5]);
309     (*regs)[6] = tswapreg(env->regs[6]);
310     (*regs)[7] = tswapreg(env->regs[7]);
311     (*regs)[8] = tswapreg(env->regs[8]);
312     (*regs)[9] = tswapreg(env->regs[9]);
313     (*regs)[10] = tswapreg(env->regs[10]);
314     (*regs)[11] = tswapreg(env->regs[11]);
315     (*regs)[12] = tswapreg(env->regs[12]);
316     (*regs)[13] = tswapreg(env->regs[13]);
317     (*regs)[14] = tswapreg(env->regs[14]);
318     (*regs)[15] = tswapreg(env->regs[15]);
319 
320     (*regs)[16] = tswapreg(cpsr_read((CPUARMState *)env));
321     (*regs)[17] = tswapreg(env->regs[0]); /* XXX */
322 }
323 
324 #define USE_ELF_CORE_DUMP
325 #define ELF_EXEC_PAGESIZE       4096
326 
327 /* The commpage only exists for 32 bit kernels */
328 
329 #define HI_COMMPAGE (intptr_t)0xffff0f00u
330 
331 static bool init_guest_commpage(void)
332 {
333     ARMCPU *cpu = ARM_CPU(thread_cpu);
334     int host_page_size = qemu_real_host_page_size();
335     abi_ptr commpage;
336     void *want;
337     void *addr;
338 
339     /*
340      * M-profile allocates maximum of 2GB address space, so can never
341      * allocate the commpage.  Skip it.
342      */
343     if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
344         return true;
345     }
346 
347     commpage = HI_COMMPAGE & -host_page_size;
348     want = g2h_untagged(commpage);
349     addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE,
350                 MAP_ANONYMOUS | MAP_PRIVATE |
351                 (commpage < reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE),
352                 -1, 0);
353 
354     if (addr == MAP_FAILED) {
355         perror("Allocating guest commpage");
356         exit(EXIT_FAILURE);
357     }
358     if (addr != want) {
359         return false;
360     }
361 
362     /* Set kernel helper versions; rest of page is 0.  */
363     __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu));
364 
365     if (mprotect(addr, host_page_size, PROT_READ)) {
366         perror("Protecting guest commpage");
367         exit(EXIT_FAILURE);
368     }
369 
370     page_set_flags(commpage, commpage | (host_page_size - 1),
371                    PAGE_READ | PAGE_EXEC | PAGE_VALID);
372     return true;
373 }
374 
375 #if TARGET_BIG_ENDIAN
376 #include "elf.h"
377 #include "vdso-be8.c.inc"
378 #include "vdso-be32.c.inc"
379 
380 static const VdsoImageInfo *vdso_image_info(uint32_t elf_flags)
381 {
382     return (EF_ARM_EABI_VERSION(elf_flags) >= EF_ARM_EABI_VER4
383             && (elf_flags & EF_ARM_BE8)
384             ? &vdso_be8_image_info
385             : &vdso_be32_image_info);
386 }
387 #define vdso_image_info vdso_image_info
388 #else
389 # define VDSO_HEADER  "vdso-le.c.inc"
390 #endif
391 
392 #else
393 /* 64 bit ARM definitions */
394 
395 #define ELF_ARCH        EM_AARCH64
396 #define ELF_CLASS       ELFCLASS64
397 
398 #define HAVE_INIT_MAIN_THREAD
399 
400 #define ELF_NREG    34
401 typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
402 
403 static void elf_core_copy_regs(target_elf_gregset_t *regs,
404                                const CPUARMState *env)
405 {
406     int i;
407 
408     for (i = 0; i < 32; i++) {
409         (*regs)[i] = tswapreg(env->xregs[i]);
410     }
411     (*regs)[32] = tswapreg(env->pc);
412     (*regs)[33] = tswapreg(pstate_read((CPUARMState *)env));
413 }
414 
415 #define USE_ELF_CORE_DUMP
416 #define ELF_EXEC_PAGESIZE       4096
417 
418 #if TARGET_BIG_ENDIAN
419 # define VDSO_HEADER  "vdso-be.c.inc"
420 #else
421 # define VDSO_HEADER  "vdso-le.c.inc"
422 #endif
423 
424 #endif /* not TARGET_AARCH64 */
425 
426 #endif /* TARGET_ARM */
427 
428 #ifdef TARGET_SPARC
429 
430 #ifndef TARGET_SPARC64
431 # define ELF_CLASS  ELFCLASS32
432 # define ELF_ARCH   EM_SPARC
433 #elif defined(TARGET_ABI32)
434 # define ELF_CLASS  ELFCLASS32
435 # define elf_check_arch(x) ((x) == EM_SPARC32PLUS || (x) == EM_SPARC)
436 #else
437 # define ELF_CLASS  ELFCLASS64
438 # define ELF_ARCH   EM_SPARCV9
439 #endif
440 
441 #define HAVE_INIT_MAIN_THREAD
442 
443 #endif /* TARGET_SPARC */
444 
445 #ifdef TARGET_PPC
446 
447 #define ELF_MACHINE    PPC_ELF_MACHINE
448 
449 #if defined(TARGET_PPC64)
450 
451 #define elf_check_arch(x) ( (x) == EM_PPC64 )
452 
453 #define ELF_CLASS       ELFCLASS64
454 
455 #else
456 
457 #define ELF_CLASS       ELFCLASS32
458 #define EXSTACK_DEFAULT true
459 
460 #endif
461 
462 #define ELF_ARCH        EM_PPC
463 
464 /*
465  * The requirements here are:
466  * - keep the final alignment of sp (sp & 0xf)
467  * - make sure the 32-bit value at the first 16 byte aligned position of
468  *   AUXV is greater than 16 for glibc compatibility.
469  *   AT_IGNOREPPC is used for that.
470  * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
471  *   even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
472  */
473 #define DLINFO_ARCH_ITEMS       5
474 #define ARCH_DLINFO                                     \
475     do {                                                \
476         PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);              \
477         /*                                              \
478          * Handle glibc compatibility: these magic entries must \
479          * be at the lowest addresses in the final auxv.        \
480          */                                             \
481         NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);        \
482         NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);        \
483         NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \
484         NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \
485         NEW_AUX_ENT(AT_UCACHEBSIZE, 0);                 \
486     } while (0)
487 
488 #define HAVE_INIT_MAIN_THREAD
489 
490 /* See linux kernel: arch/powerpc/include/asm/elf.h.  */
491 #define ELF_NREG 48
492 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
493 
494 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *env)
495 {
496     int i;
497     target_ulong ccr = 0;
498 
499     for (i = 0; i < ARRAY_SIZE(env->gpr); i++) {
500         (*regs)[i] = tswapreg(env->gpr[i]);
501     }
502 
503     (*regs)[32] = tswapreg(env->nip);
504     (*regs)[33] = tswapreg(env->msr);
505     (*regs)[35] = tswapreg(env->ctr);
506     (*regs)[36] = tswapreg(env->lr);
507     (*regs)[37] = tswapreg(cpu_read_xer(env));
508 
509     ccr = ppc_get_cr(env);
510     (*regs)[38] = tswapreg(ccr);
511 }
512 
513 #define USE_ELF_CORE_DUMP
514 #define ELF_EXEC_PAGESIZE       4096
515 
516 #ifndef TARGET_PPC64
517 # define VDSO_HEADER  "vdso-32.c.inc"
518 #elif TARGET_BIG_ENDIAN
519 # define VDSO_HEADER  "vdso-64.c.inc"
520 #else
521 # define VDSO_HEADER  "vdso-64le.c.inc"
522 #endif
523 
524 #endif
525 
526 #ifdef TARGET_LOONGARCH64
527 
528 #define ELF_CLASS   ELFCLASS64
529 #define ELF_ARCH    EM_LOONGARCH
530 #define EXSTACK_DEFAULT true
531 
532 #define elf_check_arch(x) ((x) == EM_LOONGARCH)
533 
534 #define VDSO_HEADER "vdso.c.inc"
535 
536 #define HAVE_INIT_MAIN_THREAD
537 
538 /* See linux kernel: arch/loongarch/include/asm/elf.h */
539 #define ELF_NREG 45
540 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
541 
542 enum {
543     TARGET_EF_R0 = 0,
544     TARGET_EF_CSR_ERA = TARGET_EF_R0 + 33,
545     TARGET_EF_CSR_BADV = TARGET_EF_R0 + 34,
546 };
547 
548 static void elf_core_copy_regs(target_elf_gregset_t *regs,
549                                const CPULoongArchState *env)
550 {
551     int i;
552 
553     (*regs)[TARGET_EF_R0] = 0;
554 
555     for (i = 1; i < ARRAY_SIZE(env->gpr); i++) {
556         (*regs)[TARGET_EF_R0 + i] = tswapreg(env->gpr[i]);
557     }
558 
559     (*regs)[TARGET_EF_CSR_ERA] = tswapreg(env->pc);
560     (*regs)[TARGET_EF_CSR_BADV] = tswapreg(env->CSR_BADV);
561 }
562 
563 #define USE_ELF_CORE_DUMP
564 #define ELF_EXEC_PAGESIZE        4096
565 
566 #endif /* TARGET_LOONGARCH64 */
567 
568 #ifdef TARGET_MIPS
569 
570 #ifdef TARGET_MIPS64
571 #define ELF_CLASS   ELFCLASS64
572 #else
573 #define ELF_CLASS   ELFCLASS32
574 #endif
575 #define ELF_ARCH    EM_MIPS
576 #define EXSTACK_DEFAULT true
577 
578 #ifdef TARGET_ABI_MIPSN32
579 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
580 #else
581 #define elf_check_abi(x) (!((x) & EF_MIPS_ABI2))
582 #endif
583 
584 #define HAVE_INIT_MAIN_THREAD
585 
586 /* See linux kernel: arch/mips/include/asm/elf.h.  */
587 #define ELF_NREG 45
588 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
589 
590 /* See linux kernel: arch/mips/include/asm/reg.h.  */
591 enum {
592 #ifdef TARGET_MIPS64
593     TARGET_EF_R0 = 0,
594 #else
595     TARGET_EF_R0 = 6,
596 #endif
597     TARGET_EF_R26 = TARGET_EF_R0 + 26,
598     TARGET_EF_R27 = TARGET_EF_R0 + 27,
599     TARGET_EF_LO = TARGET_EF_R0 + 32,
600     TARGET_EF_HI = TARGET_EF_R0 + 33,
601     TARGET_EF_CP0_EPC = TARGET_EF_R0 + 34,
602     TARGET_EF_CP0_BADVADDR = TARGET_EF_R0 + 35,
603     TARGET_EF_CP0_STATUS = TARGET_EF_R0 + 36,
604     TARGET_EF_CP0_CAUSE = TARGET_EF_R0 + 37
605 };
606 
607 /* See linux kernel: arch/mips/kernel/process.c:elf_dump_regs.  */
608 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMIPSState *env)
609 {
610     int i;
611 
612     for (i = 0; i < TARGET_EF_R0; i++) {
613         (*regs)[i] = 0;
614     }
615     (*regs)[TARGET_EF_R0] = 0;
616 
617     for (i = 1; i < ARRAY_SIZE(env->active_tc.gpr); i++) {
618         (*regs)[TARGET_EF_R0 + i] = tswapreg(env->active_tc.gpr[i]);
619     }
620 
621     (*regs)[TARGET_EF_R26] = 0;
622     (*regs)[TARGET_EF_R27] = 0;
623     (*regs)[TARGET_EF_LO] = tswapreg(env->active_tc.LO[0]);
624     (*regs)[TARGET_EF_HI] = tswapreg(env->active_tc.HI[0]);
625     (*regs)[TARGET_EF_CP0_EPC] = tswapreg(env->active_tc.PC);
626     (*regs)[TARGET_EF_CP0_BADVADDR] = tswapreg(env->CP0_BadVAddr);
627     (*regs)[TARGET_EF_CP0_STATUS] = tswapreg(env->CP0_Status);
628     (*regs)[TARGET_EF_CP0_CAUSE] = tswapreg(env->CP0_Cause);
629 }
630 
631 #define USE_ELF_CORE_DUMP
632 #define ELF_EXEC_PAGESIZE        4096
633 
634 #endif /* TARGET_MIPS */
635 
636 #ifdef TARGET_MICROBLAZE
637 
638 #define elf_check_arch(x) ( (x) == EM_MICROBLAZE || (x) == EM_MICROBLAZE_OLD)
639 
640 #define ELF_CLASS   ELFCLASS32
641 #define ELF_ARCH    EM_MICROBLAZE
642 
643 #define HAVE_INIT_MAIN_THREAD
644 
645 #define ELF_EXEC_PAGESIZE        4096
646 
647 #define USE_ELF_CORE_DUMP
648 #define ELF_NREG 38
649 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
650 
651 /* See linux kernel: arch/mips/kernel/process.c:elf_dump_regs.  */
652 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMBState *env)
653 {
654     int i, pos = 0;
655 
656     for (i = 0; i < 32; i++) {
657         (*regs)[pos++] = tswapreg(env->regs[i]);
658     }
659 
660     (*regs)[pos++] = tswapreg(env->pc);
661     (*regs)[pos++] = tswapreg(mb_cpu_read_msr(env));
662     (*regs)[pos++] = 0;
663     (*regs)[pos++] = tswapreg(env->ear);
664     (*regs)[pos++] = 0;
665     (*regs)[pos++] = tswapreg(env->esr);
666 }
667 
668 #endif /* TARGET_MICROBLAZE */
669 
670 #ifdef TARGET_OPENRISC
671 
672 #define ELF_ARCH EM_OPENRISC
673 #define ELF_CLASS ELFCLASS32
674 #define ELF_DATA  ELFDATA2MSB
675 
676 static inline void init_thread(struct target_pt_regs *regs,
677                                struct image_info *infop)
678 {
679     regs->pc = infop->entry;
680     regs->gpr[1] = infop->start_stack;
681 }
682 
683 #define USE_ELF_CORE_DUMP
684 #define ELF_EXEC_PAGESIZE 8192
685 
686 /* See linux kernel arch/openrisc/include/asm/elf.h.  */
687 #define ELF_NREG 34 /* gprs and pc, sr */
688 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
689 
690 static void elf_core_copy_regs(target_elf_gregset_t *regs,
691                                const CPUOpenRISCState *env)
692 {
693     int i;
694 
695     for (i = 0; i < 32; i++) {
696         (*regs)[i] = tswapreg(cpu_get_gpr(env, i));
697     }
698     (*regs)[32] = tswapreg(env->pc);
699     (*regs)[33] = tswapreg(cpu_get_sr(env));
700 }
701 
702 #endif /* TARGET_OPENRISC */
703 
704 #ifdef TARGET_SH4
705 
706 #define ELF_CLASS ELFCLASS32
707 #define ELF_ARCH  EM_SH
708 
709 static inline void init_thread(struct target_pt_regs *regs,
710                                struct image_info *infop)
711 {
712     /* Check other registers XXXXX */
713     regs->pc = infop->entry;
714     regs->regs[15] = infop->start_stack;
715 }
716 
717 /* See linux kernel: arch/sh/include/asm/elf.h.  */
718 #define ELF_NREG 23
719 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
720 
721 /* See linux kernel: arch/sh/include/asm/ptrace.h.  */
722 enum {
723     TARGET_REG_PC = 16,
724     TARGET_REG_PR = 17,
725     TARGET_REG_SR = 18,
726     TARGET_REG_GBR = 19,
727     TARGET_REG_MACH = 20,
728     TARGET_REG_MACL = 21,
729     TARGET_REG_SYSCALL = 22
730 };
731 
732 static inline void elf_core_copy_regs(target_elf_gregset_t *regs,
733                                       const CPUSH4State *env)
734 {
735     int i;
736 
737     for (i = 0; i < 16; i++) {
738         (*regs)[i] = tswapreg(env->gregs[i]);
739     }
740 
741     (*regs)[TARGET_REG_PC] = tswapreg(env->pc);
742     (*regs)[TARGET_REG_PR] = tswapreg(env->pr);
743     (*regs)[TARGET_REG_SR] = tswapreg(env->sr);
744     (*regs)[TARGET_REG_GBR] = tswapreg(env->gbr);
745     (*regs)[TARGET_REG_MACH] = tswapreg(env->mach);
746     (*regs)[TARGET_REG_MACL] = tswapreg(env->macl);
747     (*regs)[TARGET_REG_SYSCALL] = 0; /* FIXME */
748 }
749 
750 #define USE_ELF_CORE_DUMP
751 #define ELF_EXEC_PAGESIZE        4096
752 
753 #endif
754 
755 #ifdef TARGET_M68K
756 
757 #define ELF_CLASS       ELFCLASS32
758 #define ELF_ARCH        EM_68K
759 
760 /* ??? Does this need to do anything?
761    #define ELF_PLAT_INIT(_r) */
762 
763 static inline void init_thread(struct target_pt_regs *regs,
764                                struct image_info *infop)
765 {
766     regs->usp = infop->start_stack;
767     regs->sr = 0;
768     regs->pc = infop->entry;
769 }
770 
771 /* See linux kernel: arch/m68k/include/asm/elf.h.  */
772 #define ELF_NREG 20
773 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
774 
775 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUM68KState *env)
776 {
777     (*regs)[0] = tswapreg(env->dregs[1]);
778     (*regs)[1] = tswapreg(env->dregs[2]);
779     (*regs)[2] = tswapreg(env->dregs[3]);
780     (*regs)[3] = tswapreg(env->dregs[4]);
781     (*regs)[4] = tswapreg(env->dregs[5]);
782     (*regs)[5] = tswapreg(env->dregs[6]);
783     (*regs)[6] = tswapreg(env->dregs[7]);
784     (*regs)[7] = tswapreg(env->aregs[0]);
785     (*regs)[8] = tswapreg(env->aregs[1]);
786     (*regs)[9] = tswapreg(env->aregs[2]);
787     (*regs)[10] = tswapreg(env->aregs[3]);
788     (*regs)[11] = tswapreg(env->aregs[4]);
789     (*regs)[12] = tswapreg(env->aregs[5]);
790     (*regs)[13] = tswapreg(env->aregs[6]);
791     (*regs)[14] = tswapreg(env->dregs[0]);
792     (*regs)[15] = tswapreg(env->aregs[7]);
793     (*regs)[16] = tswapreg(env->dregs[0]); /* FIXME: orig_d0 */
794     (*regs)[17] = tswapreg(env->sr);
795     (*regs)[18] = tswapreg(env->pc);
796     (*regs)[19] = 0;  /* FIXME: regs->format | regs->vector */
797 }
798 
799 #define USE_ELF_CORE_DUMP
800 #define ELF_EXEC_PAGESIZE       8192
801 
802 #endif
803 
804 #ifdef TARGET_ALPHA
805 
806 #define ELF_CLASS      ELFCLASS64
807 #define ELF_ARCH       EM_ALPHA
808 
809 static inline void init_thread(struct target_pt_regs *regs,
810                                struct image_info *infop)
811 {
812     regs->pc = infop->entry;
813     regs->ps = 8;
814     regs->usp = infop->start_stack;
815 }
816 
817 #define ELF_EXEC_PAGESIZE        8192
818 
819 #endif /* TARGET_ALPHA */
820 
821 #ifdef TARGET_S390X
822 
823 #define ELF_CLASS	ELFCLASS64
824 #define ELF_DATA	ELFDATA2MSB
825 #define ELF_ARCH	EM_S390
826 
827 static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
828 {
829     regs->psw.addr = infop->entry;
830     regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
831                      PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \
832                      PSW_MASK_32;
833     regs->gprs[15] = infop->start_stack;
834 }
835 
836 /* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs).  */
837 #define ELF_NREG 27
838 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
839 
840 enum {
841     TARGET_REG_PSWM = 0,
842     TARGET_REG_PSWA = 1,
843     TARGET_REG_GPRS = 2,
844     TARGET_REG_ARS = 18,
845     TARGET_REG_ORIG_R2 = 26,
846 };
847 
848 static void elf_core_copy_regs(target_elf_gregset_t *regs,
849                                const CPUS390XState *env)
850 {
851     int i;
852     uint32_t *aregs;
853 
854     (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask);
855     (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr);
856     for (i = 0; i < 16; i++) {
857         (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]);
858     }
859     aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]);
860     for (i = 0; i < 16; i++) {
861         aregs[i] = tswap32(env->aregs[i]);
862     }
863     (*regs)[TARGET_REG_ORIG_R2] = 0;
864 }
865 
866 #define USE_ELF_CORE_DUMP
867 #define ELF_EXEC_PAGESIZE 4096
868 
869 #define VDSO_HEADER "vdso.c.inc"
870 
871 #endif /* TARGET_S390X */
872 
873 #ifdef TARGET_RISCV
874 
875 #define ELF_ARCH  EM_RISCV
876 
877 #ifdef TARGET_RISCV32
878 #define ELF_CLASS ELFCLASS32
879 #define VDSO_HEADER "vdso-32.c.inc"
880 #else
881 #define ELF_CLASS ELFCLASS64
882 #define VDSO_HEADER "vdso-64.c.inc"
883 #endif
884 
885 static inline void init_thread(struct target_pt_regs *regs,
886                                struct image_info *infop)
887 {
888     regs->sepc = infop->entry;
889     regs->sp = infop->start_stack;
890 }
891 
892 #define ELF_EXEC_PAGESIZE 4096
893 
894 #endif /* TARGET_RISCV */
895 
896 #ifdef TARGET_HPPA
897 
898 #define ELF_CLASS       ELFCLASS32
899 #define ELF_ARCH        EM_PARISC
900 #define STACK_GROWS_DOWN 0
901 #define STACK_ALIGNMENT  64
902 
903 #define VDSO_HEADER "vdso.c.inc"
904 
905 static inline void init_thread(struct target_pt_regs *regs,
906                                struct image_info *infop)
907 {
908     regs->iaoq[0] = infop->entry | PRIV_USER;
909     regs->iaoq[1] = regs->iaoq[0] + 4;
910     regs->gr[23] = 0;
911     regs->gr[24] = infop->argv;
912     regs->gr[25] = infop->argc;
913     /* The top-of-stack contains a linkage buffer.  */
914     regs->gr[30] = infop->start_stack + 64;
915     regs->gr[31] = infop->entry;
916 }
917 
918 #define LO_COMMPAGE  0
919 
920 static bool init_guest_commpage(void)
921 {
922     /* If reserved_va, then we have already mapped 0 page on the host. */
923     if (!reserved_va) {
924         void *want, *addr;
925 
926         want = g2h_untagged(LO_COMMPAGE);
927         addr = mmap(want, TARGET_PAGE_SIZE, PROT_NONE,
928                     MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0);
929         if (addr == MAP_FAILED) {
930             perror("Allocating guest commpage");
931             exit(EXIT_FAILURE);
932         }
933         if (addr != want) {
934             return false;
935         }
936     }
937 
938     /*
939      * On Linux, page zero is normally marked execute only + gateway.
940      * Normal read or write is supposed to fail (thus PROT_NONE above),
941      * but specific offsets have kernel code mapped to raise permissions
942      * and implement syscalls.  Here, simply mark the page executable.
943      * Special case the entry points during translation (see do_page_zero).
944      */
945     page_set_flags(LO_COMMPAGE, LO_COMMPAGE | ~TARGET_PAGE_MASK,
946                    PAGE_EXEC | PAGE_VALID);
947     return true;
948 }
949 
950 #endif /* TARGET_HPPA */
951 
952 #ifdef TARGET_XTENSA
953 
954 #define ELF_CLASS       ELFCLASS32
955 #define ELF_ARCH        EM_XTENSA
956 
957 static inline void init_thread(struct target_pt_regs *regs,
958                                struct image_info *infop)
959 {
960     regs->windowbase = 0;
961     regs->windowstart = 1;
962     regs->areg[1] = infop->start_stack;
963     regs->pc = infop->entry;
964     if (info_is_fdpic(infop)) {
965         regs->areg[4] = infop->loadmap_addr;
966         regs->areg[5] = infop->interpreter_loadmap_addr;
967         if (infop->interpreter_loadmap_addr) {
968             regs->areg[6] = infop->interpreter_pt_dynamic_addr;
969         } else {
970             regs->areg[6] = infop->pt_dynamic_addr;
971         }
972     }
973 }
974 
975 /* See linux kernel: arch/xtensa/include/asm/elf.h.  */
976 #define ELF_NREG 128
977 typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
978 
979 enum {
980     TARGET_REG_PC,
981     TARGET_REG_PS,
982     TARGET_REG_LBEG,
983     TARGET_REG_LEND,
984     TARGET_REG_LCOUNT,
985     TARGET_REG_SAR,
986     TARGET_REG_WINDOWSTART,
987     TARGET_REG_WINDOWBASE,
988     TARGET_REG_THREADPTR,
989     TARGET_REG_AR0 = 64,
990 };
991 
992 static void elf_core_copy_regs(target_elf_gregset_t *regs,
993                                const CPUXtensaState *env)
994 {
995     unsigned i;
996 
997     (*regs)[TARGET_REG_PC] = tswapreg(env->pc);
998     (*regs)[TARGET_REG_PS] = tswapreg(env->sregs[PS] & ~PS_EXCM);
999     (*regs)[TARGET_REG_LBEG] = tswapreg(env->sregs[LBEG]);
1000     (*regs)[TARGET_REG_LEND] = tswapreg(env->sregs[LEND]);
1001     (*regs)[TARGET_REG_LCOUNT] = tswapreg(env->sregs[LCOUNT]);
1002     (*regs)[TARGET_REG_SAR] = tswapreg(env->sregs[SAR]);
1003     (*regs)[TARGET_REG_WINDOWSTART] = tswapreg(env->sregs[WINDOW_START]);
1004     (*regs)[TARGET_REG_WINDOWBASE] = tswapreg(env->sregs[WINDOW_BASE]);
1005     (*regs)[TARGET_REG_THREADPTR] = tswapreg(env->uregs[THREADPTR]);
1006     xtensa_sync_phys_from_window((CPUXtensaState *)env);
1007     for (i = 0; i < env->config->nareg; ++i) {
1008         (*regs)[TARGET_REG_AR0 + i] = tswapreg(env->phys_regs[i]);
1009     }
1010 }
1011 
1012 #define USE_ELF_CORE_DUMP
1013 #define ELF_EXEC_PAGESIZE       4096
1014 
1015 #endif /* TARGET_XTENSA */
1016 
1017 #ifdef TARGET_HEXAGON
1018 
1019 #define ELF_CLASS       ELFCLASS32
1020 #define ELF_ARCH        EM_HEXAGON
1021 
1022 static inline void init_thread(struct target_pt_regs *regs,
1023                                struct image_info *infop)
1024 {
1025     regs->sepc = infop->entry;
1026     regs->sp = infop->start_stack;
1027 }
1028 
1029 #endif /* TARGET_HEXAGON */
1030 
1031 #ifndef ELF_MACHINE
1032 #define ELF_MACHINE ELF_ARCH
1033 #endif
1034 
1035 #ifndef elf_check_arch
1036 #define elf_check_arch(x) ((x) == ELF_ARCH)
1037 #endif
1038 
1039 #ifndef elf_check_abi
1040 #define elf_check_abi(x) (1)
1041 #endif
1042 
1043 #ifndef STACK_GROWS_DOWN
1044 #define STACK_GROWS_DOWN 1
1045 #endif
1046 
1047 #ifndef STACK_ALIGNMENT
1048 #define STACK_ALIGNMENT 16
1049 #endif
1050 
1051 #ifdef TARGET_ABI32
1052 #undef ELF_CLASS
1053 #define ELF_CLASS ELFCLASS32
1054 #undef bswaptls
1055 #define bswaptls(ptr) bswap32s(ptr)
1056 #endif
1057 
1058 #ifndef EXSTACK_DEFAULT
1059 #define EXSTACK_DEFAULT false
1060 #endif
1061 
1062 /*
1063  * Provide fallback definitions that the target may omit.
1064  * One way or another, we'll get a link error if the setting of
1065  * HAVE_* doesn't match the implementation.
1066  */
1067 #ifndef HAVE_ELF_HWCAP
1068 abi_ulong get_elf_hwcap(CPUState *cs) { return 0; }
1069 #endif
1070 #ifndef HAVE_ELF_HWCAP2
1071 abi_ulong get_elf_hwcap2(CPUState *cs) { g_assert_not_reached(); }
1072 #define HAVE_ELF_HWCAP2 0
1073 #endif
1074 #ifndef HAVE_ELF_PLATFORM
1075 const char *get_elf_platform(CPUState *cs) { return NULL; }
1076 #endif
1077 #ifndef HAVE_ELF_BASE_PLATFORM
1078 const char *get_elf_base_platform(CPUState *cs) { return NULL; }
1079 #endif
1080 
1081 #include "elf.h"
1082 
1083 /* We must delay the following stanzas until after "elf.h". */
1084 #if defined(TARGET_AARCH64)
1085 
1086 static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz,
1087                                     const uint32_t *data,
1088                                     struct image_info *info,
1089                                     Error **errp)
1090 {
1091     if (pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
1092         if (pr_datasz != sizeof(uint32_t)) {
1093             error_setg(errp, "Ill-formed GNU_PROPERTY_AARCH64_FEATURE_1_AND");
1094             return false;
1095         }
1096         /* We will extract GNU_PROPERTY_AARCH64_FEATURE_1_BTI later. */
1097         info->note_flags = *data;
1098     }
1099     return true;
1100 }
1101 #define ARCH_USE_GNU_PROPERTY 1
1102 
1103 #else
1104 
1105 static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz,
1106                                     const uint32_t *data,
1107                                     struct image_info *info,
1108                                     Error **errp)
1109 {
1110     g_assert_not_reached();
1111 }
1112 #define ARCH_USE_GNU_PROPERTY 0
1113 
1114 #endif
1115 
1116 struct exec
1117 {
1118     unsigned int a_info;   /* Use macros N_MAGIC, etc for access */
1119     unsigned int a_text;   /* length of text, in bytes */
1120     unsigned int a_data;   /* length of data, in bytes */
1121     unsigned int a_bss;    /* length of uninitialized data area, in bytes */
1122     unsigned int a_syms;   /* length of symbol table data in file, in bytes */
1123     unsigned int a_entry;  /* start address */
1124     unsigned int a_trsize; /* length of relocation info for text, in bytes */
1125     unsigned int a_drsize; /* length of relocation info for data, in bytes */
1126 };
1127 
1128 
1129 #define N_MAGIC(exec) ((exec).a_info & 0xffff)
1130 #define OMAGIC 0407
1131 #define NMAGIC 0410
1132 #define ZMAGIC 0413
1133 #define QMAGIC 0314
1134 
1135 #define DLINFO_ITEMS 16
1136 
1137 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
1138 {
1139     memcpy(to, from, n);
1140 }
1141 
1142 static void bswap_ehdr(struct elfhdr *ehdr)
1143 {
1144     if (!target_needs_bswap()) {
1145         return;
1146     }
1147 
1148     bswap16s(&ehdr->e_type);            /* Object file type */
1149     bswap16s(&ehdr->e_machine);         /* Architecture */
1150     bswap32s(&ehdr->e_version);         /* Object file version */
1151     bswaptls(&ehdr->e_entry);           /* Entry point virtual address */
1152     bswaptls(&ehdr->e_phoff);           /* Program header table file offset */
1153     bswaptls(&ehdr->e_shoff);           /* Section header table file offset */
1154     bswap32s(&ehdr->e_flags);           /* Processor-specific flags */
1155     bswap16s(&ehdr->e_ehsize);          /* ELF header size in bytes */
1156     bswap16s(&ehdr->e_phentsize);       /* Program header table entry size */
1157     bswap16s(&ehdr->e_phnum);           /* Program header table entry count */
1158     bswap16s(&ehdr->e_shentsize);       /* Section header table entry size */
1159     bswap16s(&ehdr->e_shnum);           /* Section header table entry count */
1160     bswap16s(&ehdr->e_shstrndx);        /* Section header string table index */
1161 }
1162 
1163 static void bswap_phdr(struct elf_phdr *phdr, int phnum)
1164 {
1165     if (!target_needs_bswap()) {
1166         return;
1167     }
1168 
1169     for (int i = 0; i < phnum; ++i, ++phdr) {
1170         bswap32s(&phdr->p_type);        /* Segment type */
1171         bswap32s(&phdr->p_flags);       /* Segment flags */
1172         bswaptls(&phdr->p_offset);      /* Segment file offset */
1173         bswaptls(&phdr->p_vaddr);       /* Segment virtual address */
1174         bswaptls(&phdr->p_paddr);       /* Segment physical address */
1175         bswaptls(&phdr->p_filesz);      /* Segment size in file */
1176         bswaptls(&phdr->p_memsz);       /* Segment size in memory */
1177         bswaptls(&phdr->p_align);       /* Segment alignment */
1178     }
1179 }
1180 
1181 static void bswap_shdr(struct elf_shdr *shdr, int shnum)
1182 {
1183     if (!target_needs_bswap()) {
1184         return;
1185     }
1186 
1187     for (int i = 0; i < shnum; ++i, ++shdr) {
1188         bswap32s(&shdr->sh_name);
1189         bswap32s(&shdr->sh_type);
1190         bswaptls(&shdr->sh_flags);
1191         bswaptls(&shdr->sh_addr);
1192         bswaptls(&shdr->sh_offset);
1193         bswaptls(&shdr->sh_size);
1194         bswap32s(&shdr->sh_link);
1195         bswap32s(&shdr->sh_info);
1196         bswaptls(&shdr->sh_addralign);
1197         bswaptls(&shdr->sh_entsize);
1198     }
1199 }
1200 
1201 static void bswap_sym(struct elf_sym *sym)
1202 {
1203     if (!target_needs_bswap()) {
1204         return;
1205     }
1206 
1207     bswap32s(&sym->st_name);
1208     bswaptls(&sym->st_value);
1209     bswaptls(&sym->st_size);
1210     bswap16s(&sym->st_shndx);
1211 }
1212 
1213 #ifdef TARGET_MIPS
1214 static void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags)
1215 {
1216     if (!target_needs_bswap()) {
1217         return;
1218     }
1219 
1220     bswap16s(&abiflags->version);
1221     bswap32s(&abiflags->ases);
1222     bswap32s(&abiflags->isa_ext);
1223     bswap32s(&abiflags->flags1);
1224     bswap32s(&abiflags->flags2);
1225 }
1226 #endif
1227 
1228 #ifdef USE_ELF_CORE_DUMP
1229 static int elf_core_dump(int, const CPUArchState *);
1230 #endif /* USE_ELF_CORE_DUMP */
1231 static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
1232                          abi_ulong load_bias);
1233 
1234 /* Verify the portions of EHDR within E_IDENT for the target.
1235    This can be performed before bswapping the entire header.  */
1236 static bool elf_check_ident(struct elfhdr *ehdr)
1237 {
1238     return (ehdr->e_ident[EI_MAG0] == ELFMAG0
1239             && ehdr->e_ident[EI_MAG1] == ELFMAG1
1240             && ehdr->e_ident[EI_MAG2] == ELFMAG2
1241             && ehdr->e_ident[EI_MAG3] == ELFMAG3
1242             && ehdr->e_ident[EI_CLASS] == ELF_CLASS
1243             && ehdr->e_ident[EI_DATA] == ELF_DATA
1244             && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
1245 }
1246 
1247 /* Verify the portions of EHDR outside of E_IDENT for the target.
1248    This has to wait until after bswapping the header.  */
1249 static bool elf_check_ehdr(struct elfhdr *ehdr)
1250 {
1251     return (elf_check_arch(ehdr->e_machine)
1252             && elf_check_abi(ehdr->e_flags)
1253             && ehdr->e_ehsize == sizeof(struct elfhdr)
1254             && ehdr->e_phentsize == sizeof(struct elf_phdr)
1255             && (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN));
1256 }
1257 
1258 /*
1259  * 'copy_elf_strings()' copies argument/envelope strings from user
1260  * memory to free pages in kernel mem. These are in a format ready
1261  * to be put directly into the top of new user memory.
1262  *
1263  */
1264 static abi_ulong copy_elf_strings(int argc, char **argv, char *scratch,
1265                                   abi_ulong p, abi_ulong stack_limit)
1266 {
1267     char *tmp;
1268     int len, i;
1269     abi_ulong top = p;
1270 
1271     if (!p) {
1272         return 0;       /* bullet-proofing */
1273     }
1274 
1275     if (STACK_GROWS_DOWN) {
1276         int offset = ((p - 1) % TARGET_PAGE_SIZE) + 1;
1277         for (i = argc - 1; i >= 0; --i) {
1278             tmp = argv[i];
1279             if (!tmp) {
1280                 fprintf(stderr, "VFS: argc is wrong");
1281                 exit(-1);
1282             }
1283             len = strlen(tmp) + 1;
1284             tmp += len;
1285 
1286             if (len > (p - stack_limit)) {
1287                 return 0;
1288             }
1289             while (len) {
1290                 int bytes_to_copy = (len > offset) ? offset : len;
1291                 tmp -= bytes_to_copy;
1292                 p -= bytes_to_copy;
1293                 offset -= bytes_to_copy;
1294                 len -= bytes_to_copy;
1295 
1296                 memcpy_fromfs(scratch + offset, tmp, bytes_to_copy);
1297 
1298                 if (offset == 0) {
1299                     memcpy_to_target(p, scratch, top - p);
1300                     top = p;
1301                     offset = TARGET_PAGE_SIZE;
1302                 }
1303             }
1304         }
1305         if (p != top) {
1306             memcpy_to_target(p, scratch + offset, top - p);
1307         }
1308     } else {
1309         int remaining = TARGET_PAGE_SIZE - (p % TARGET_PAGE_SIZE);
1310         for (i = 0; i < argc; ++i) {
1311             tmp = argv[i];
1312             if (!tmp) {
1313                 fprintf(stderr, "VFS: argc is wrong");
1314                 exit(-1);
1315             }
1316             len = strlen(tmp) + 1;
1317             if (len > (stack_limit - p)) {
1318                 return 0;
1319             }
1320             while (len) {
1321                 int bytes_to_copy = (len > remaining) ? remaining : len;
1322 
1323                 memcpy_fromfs(scratch + (p - top), tmp, bytes_to_copy);
1324 
1325                 tmp += bytes_to_copy;
1326                 remaining -= bytes_to_copy;
1327                 p += bytes_to_copy;
1328                 len -= bytes_to_copy;
1329 
1330                 if (remaining == 0) {
1331                     memcpy_to_target(top, scratch, p - top);
1332                     top = p;
1333                     remaining = TARGET_PAGE_SIZE;
1334                 }
1335             }
1336         }
1337         if (p != top) {
1338             memcpy_to_target(top, scratch, p - top);
1339         }
1340     }
1341 
1342     return p;
1343 }
1344 
1345 /* Older linux kernels provide up to MAX_ARG_PAGES (default: 32) of
1346  * argument/environment space. Newer kernels (>2.6.33) allow more,
1347  * dependent on stack size, but guarantee at least 32 pages for
1348  * backwards compatibility.
1349  */
1350 #define STACK_LOWER_LIMIT (32 * TARGET_PAGE_SIZE)
1351 
1352 static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
1353                                  struct image_info *info)
1354 {
1355     abi_ulong size, error, guard;
1356     int prot;
1357 
1358     size = guest_stack_size;
1359     if (size < STACK_LOWER_LIMIT) {
1360         size = STACK_LOWER_LIMIT;
1361     }
1362 
1363     if (STACK_GROWS_DOWN) {
1364         guard = TARGET_PAGE_SIZE;
1365         if (guard < qemu_real_host_page_size()) {
1366             guard = qemu_real_host_page_size();
1367         }
1368     } else {
1369         /* no guard page for hppa target where stack grows upwards. */
1370         guard = 0;
1371     }
1372 
1373     prot = PROT_READ | PROT_WRITE;
1374     if (info->exec_stack) {
1375         prot |= PROT_EXEC;
1376     }
1377     error = target_mmap(0, size + guard, prot,
1378                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1379     if (error == -1) {
1380         perror("mmap stack");
1381         exit(-1);
1382     }
1383 
1384     /* We reserve one extra page at the top of the stack as guard.  */
1385     if (STACK_GROWS_DOWN) {
1386         target_mprotect(error, guard, PROT_NONE);
1387         info->stack_limit = error + guard;
1388         return info->stack_limit + size - sizeof(void *);
1389     } else {
1390         info->stack_limit = error + size;
1391         return error;
1392     }
1393 }
1394 
1395 /**
1396  * zero_bss:
1397  *
1398  * Map and zero the bss.  We need to explicitly zero any fractional pages
1399  * after the data section (i.e. bss).  Return false on mapping failure.
1400  */
1401 static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss,
1402                      int prot, Error **errp)
1403 {
1404     abi_ulong align_bss;
1405 
1406     /* We only expect writable bss; the code segment shouldn't need this. */
1407     if (!(prot & PROT_WRITE)) {
1408         error_setg(errp, "PT_LOAD with non-writable bss");
1409         return false;
1410     }
1411 
1412     align_bss = TARGET_PAGE_ALIGN(start_bss);
1413     end_bss = TARGET_PAGE_ALIGN(end_bss);
1414 
1415     if (start_bss < align_bss) {
1416         int flags = page_get_flags(start_bss);
1417 
1418         if (!(flags & PAGE_RWX)) {
1419             /*
1420              * The whole address space of the executable was reserved
1421              * at the start, therefore all pages will be VALID.
1422              * But assuming there are no PROT_NONE PT_LOAD segments,
1423              * a PROT_NONE page means no data all bss, and we can
1424              * simply extend the new anon mapping back to the start
1425              * of the page of bss.
1426              */
1427             align_bss -= TARGET_PAGE_SIZE;
1428         } else {
1429             /*
1430              * The start of the bss shares a page with something.
1431              * The only thing that we expect is the data section,
1432              * which would already be marked writable.
1433              * Overlapping the RX code segment seems malformed.
1434              */
1435             if (!(flags & PAGE_WRITE)) {
1436                 error_setg(errp, "PT_LOAD with bss overlapping "
1437                            "non-writable page");
1438                 return false;
1439             }
1440 
1441             /* The page is already mapped and writable. */
1442             memset(g2h_untagged(start_bss), 0, align_bss - start_bss);
1443         }
1444     }
1445 
1446     if (align_bss < end_bss &&
1447         target_mmap(align_bss, end_bss - align_bss, prot,
1448                     MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
1449         error_setg_errno(errp, errno, "Error mapping bss");
1450         return false;
1451     }
1452     return true;
1453 }
1454 
1455 #if defined(TARGET_ARM)
1456 static int elf_is_fdpic(struct elfhdr *exec)
1457 {
1458     return exec->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC;
1459 }
1460 #elif defined(TARGET_XTENSA)
1461 static int elf_is_fdpic(struct elfhdr *exec)
1462 {
1463     return exec->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC;
1464 }
1465 #else
1466 /* Default implementation, always false.  */
1467 static int elf_is_fdpic(struct elfhdr *exec)
1468 {
1469     return 0;
1470 }
1471 #endif
1472 
1473 static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong sp)
1474 {
1475     uint16_t n;
1476     struct elf32_fdpic_loadseg *loadsegs = info->loadsegs;
1477 
1478     /* elf32_fdpic_loadseg */
1479     n = info->nsegs;
1480     while (n--) {
1481         sp -= 12;
1482         put_user_u32(loadsegs[n].addr, sp+0);
1483         put_user_u32(loadsegs[n].p_vaddr, sp+4);
1484         put_user_u32(loadsegs[n].p_memsz, sp+8);
1485     }
1486 
1487     /* elf32_fdpic_loadmap */
1488     sp -= 4;
1489     put_user_u16(0, sp+0); /* version */
1490     put_user_u16(info->nsegs, sp+2); /* nsegs */
1491 
1492     info->personality = PER_LINUX_FDPIC;
1493     info->loadmap_addr = sp;
1494 
1495     return sp;
1496 }
1497 
1498 static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
1499                                    struct elfhdr *exec,
1500                                    struct image_info *info,
1501                                    struct image_info *interp_info,
1502                                    struct image_info *vdso_info)
1503 {
1504     abi_ulong sp;
1505     abi_ulong u_argc, u_argv, u_envp, u_auxv;
1506     int size;
1507     int i;
1508     abi_ulong u_rand_bytes;
1509     uint8_t k_rand_bytes[16];
1510     abi_ulong u_platform, u_base_platform;
1511     const char *k_platform, *k_base_platform;
1512     const int n = sizeof(elf_addr_t);
1513 
1514     sp = p;
1515 
1516     /* Needs to be before we load the env/argc/... */
1517     if (elf_is_fdpic(exec)) {
1518         /* Need 4 byte alignment for these structs */
1519         sp &= ~3;
1520         sp = loader_build_fdpic_loadmap(info, sp);
1521         info->other_info = interp_info;
1522         if (interp_info) {
1523             interp_info->other_info = info;
1524             sp = loader_build_fdpic_loadmap(interp_info, sp);
1525             info->interpreter_loadmap_addr = interp_info->loadmap_addr;
1526             info->interpreter_pt_dynamic_addr = interp_info->pt_dynamic_addr;
1527         } else {
1528             info->interpreter_loadmap_addr = 0;
1529             info->interpreter_pt_dynamic_addr = 0;
1530         }
1531     }
1532 
1533     u_base_platform = 0;
1534     k_base_platform = get_elf_base_platform(thread_cpu);
1535     if (k_base_platform) {
1536         size_t len = strlen(k_base_platform) + 1;
1537         if (STACK_GROWS_DOWN) {
1538             sp -= (len + n - 1) & ~(n - 1);
1539             u_base_platform = sp;
1540             /* FIXME - check return value of memcpy_to_target() for failure */
1541             memcpy_to_target(sp, k_base_platform, len);
1542         } else {
1543             memcpy_to_target(sp, k_base_platform, len);
1544             u_base_platform = sp;
1545             sp += len + 1;
1546         }
1547     }
1548 
1549     u_platform = 0;
1550     k_platform = get_elf_platform(thread_cpu);
1551     if (k_platform) {
1552         size_t len = strlen(k_platform) + 1;
1553         if (STACK_GROWS_DOWN) {
1554             sp -= (len + n - 1) & ~(n - 1);
1555             u_platform = sp;
1556             /* FIXME - check return value of memcpy_to_target() for failure */
1557             memcpy_to_target(sp, k_platform, len);
1558         } else {
1559             memcpy_to_target(sp, k_platform, len);
1560             u_platform = sp;
1561             sp += len + 1;
1562         }
1563     }
1564 
1565     /* Provide 16 byte alignment for the PRNG, and basic alignment for
1566      * the argv and envp pointers.
1567      */
1568     if (STACK_GROWS_DOWN) {
1569         sp = QEMU_ALIGN_DOWN(sp, 16);
1570     } else {
1571         sp = QEMU_ALIGN_UP(sp, 16);
1572     }
1573 
1574     /*
1575      * Generate 16 random bytes for userspace PRNG seeding.
1576      */
1577     qemu_guest_getrandom_nofail(k_rand_bytes, sizeof(k_rand_bytes));
1578     if (STACK_GROWS_DOWN) {
1579         sp -= 16;
1580         u_rand_bytes = sp;
1581         /* FIXME - check return value of memcpy_to_target() for failure */
1582         memcpy_to_target(sp, k_rand_bytes, 16);
1583     } else {
1584         memcpy_to_target(sp, k_rand_bytes, 16);
1585         u_rand_bytes = sp;
1586         sp += 16;
1587     }
1588 
1589     size = (DLINFO_ITEMS + 1) * 2;
1590     if (k_base_platform) {
1591         size += 2;
1592     }
1593     if (k_platform) {
1594         size += 2;
1595     }
1596     if (vdso_info) {
1597         size += 2;
1598     }
1599 #ifdef DLINFO_ARCH_ITEMS
1600     size += DLINFO_ARCH_ITEMS * 2;
1601 #endif
1602     if (HAVE_ELF_HWCAP2) {
1603         size += 2;
1604     }
1605     info->auxv_len = size * n;
1606 
1607     size += envc + argc + 2;
1608     size += 1;  /* argc itself */
1609     size *= n;
1610 
1611     /* Allocate space and finalize stack alignment for entry now.  */
1612     if (STACK_GROWS_DOWN) {
1613         u_argc = QEMU_ALIGN_DOWN(sp - size, STACK_ALIGNMENT);
1614         sp = u_argc;
1615     } else {
1616         u_argc = sp;
1617         sp = QEMU_ALIGN_UP(sp + size, STACK_ALIGNMENT);
1618     }
1619 
1620     u_argv = u_argc + n;
1621     u_envp = u_argv + (argc + 1) * n;
1622     u_auxv = u_envp + (envc + 1) * n;
1623     info->saved_auxv = u_auxv;
1624     info->argc = argc;
1625     info->envc = envc;
1626     info->argv = u_argv;
1627     info->envp = u_envp;
1628 
1629     /* This is correct because Linux defines
1630      * elf_addr_t as Elf32_Off / Elf64_Off
1631      */
1632 #define NEW_AUX_ENT(id, val) do {               \
1633         put_user_ual(id, u_auxv);  u_auxv += n; \
1634         put_user_ual(val, u_auxv); u_auxv += n; \
1635     } while(0)
1636 
1637 #ifdef ARCH_DLINFO
1638     /*
1639      * ARCH_DLINFO must come first so platform specific code can enforce
1640      * special alignment requirements on the AUXV if necessary (eg. PPC).
1641      */
1642     ARCH_DLINFO;
1643 #endif
1644     /* There must be exactly DLINFO_ITEMS entries here, or the assert
1645      * on info->auxv_len will trigger.
1646      */
1647     NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff));
1648     NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr)));
1649     NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
1650     NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
1651     NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0));
1652     NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
1653     NEW_AUX_ENT(AT_ENTRY, info->entry);
1654     NEW_AUX_ENT(AT_UID, (abi_ulong) getuid());
1655     NEW_AUX_ENT(AT_EUID, (abi_ulong) geteuid());
1656     NEW_AUX_ENT(AT_GID, (abi_ulong) getgid());
1657     NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid());
1658     NEW_AUX_ENT(AT_HWCAP, get_elf_hwcap(thread_cpu));
1659     NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
1660     NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes);
1661     NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE));
1662     NEW_AUX_ENT(AT_EXECFN, info->file_string);
1663 
1664     if (HAVE_ELF_HWCAP2) {
1665         NEW_AUX_ENT(AT_HWCAP2, get_elf_hwcap(thread_cpu));
1666     }
1667     if (u_base_platform) {
1668         NEW_AUX_ENT(AT_BASE_PLATFORM, u_base_platform);
1669     }
1670     if (u_platform) {
1671         NEW_AUX_ENT(AT_PLATFORM, u_platform);
1672     }
1673     if (vdso_info) {
1674         NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
1675     }
1676     NEW_AUX_ENT (AT_NULL, 0);
1677 #undef NEW_AUX_ENT
1678 
1679     /* Check that our initial calculation of the auxv length matches how much
1680      * we actually put into it.
1681      */
1682     assert(info->auxv_len == u_auxv - info->saved_auxv);
1683 
1684     put_user_ual(argc, u_argc);
1685 
1686     p = info->arg_strings;
1687     for (i = 0; i < argc; ++i) {
1688         put_user_ual(p, u_argv);
1689         u_argv += n;
1690         p += target_strlen(p) + 1;
1691     }
1692     put_user_ual(0, u_argv);
1693 
1694     p = info->env_strings;
1695     for (i = 0; i < envc; ++i) {
1696         put_user_ual(p, u_envp);
1697         u_envp += n;
1698         p += target_strlen(p) + 1;
1699     }
1700     put_user_ual(0, u_envp);
1701 
1702     return sp;
1703 }
1704 
1705 #if defined(HI_COMMPAGE)
1706 #define LO_COMMPAGE -1
1707 #elif defined(LO_COMMPAGE)
1708 #define HI_COMMPAGE 0
1709 #else
1710 #define HI_COMMPAGE 0
1711 #define LO_COMMPAGE -1
1712 #ifndef INIT_GUEST_COMMPAGE
1713 #define init_guest_commpage() true
1714 #endif
1715 #endif
1716 
1717 /**
1718  * pgb_try_mmap:
1719  * @addr: host start address
1720  * @addr_last: host last address
1721  * @keep: do not unmap the probe region
1722  *
1723  * Return 1 if [@addr, @addr_last] is not mapped in the host,
1724  * return 0 if it is not available to map, and -1 on mmap error.
1725  * If @keep, the region is left mapped on success, otherwise unmapped.
1726  */
1727 static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep)
1728 {
1729     size_t size = addr_last - addr + 1;
1730     void *p = mmap((void *)addr, size, PROT_NONE,
1731                    MAP_ANONYMOUS | MAP_PRIVATE |
1732                    MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
1733     int ret;
1734 
1735     if (p == MAP_FAILED) {
1736         return errno == EEXIST ? 0 : -1;
1737     }
1738     ret = p == (void *)addr;
1739     if (!keep || !ret) {
1740         munmap(p, size);
1741     }
1742     return ret;
1743 }
1744 
1745 /**
1746  * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk)
1747  * @addr: host address
1748  * @addr_last: host last address
1749  * @brk: host brk
1750  *
1751  * Like pgb_try_mmap, but additionally reserve some memory following brk.
1752  */
1753 static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last,
1754                                  uintptr_t brk, bool keep)
1755 {
1756     uintptr_t brk_last = brk + 16 * MiB - 1;
1757 
1758     /* Do not map anything close to the host brk. */
1759     if (addr <= brk_last && brk <= addr_last) {
1760         return 0;
1761     }
1762     return pgb_try_mmap(addr, addr_last, keep);
1763 }
1764 
1765 /**
1766  * pgb_try_mmap_set:
1767  * @ga: set of guest addrs
1768  * @base: guest_base
1769  * @brk: host brk
1770  *
1771  * Return true if all @ga can be mapped by the host at @base.
1772  * On success, retain the mapping at index 0 for reserved_va.
1773  */
1774 
1775 typedef struct PGBAddrs {
1776     uintptr_t bounds[3][2]; /* start/last pairs */
1777     int nbounds;
1778 } PGBAddrs;
1779 
1780 static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk)
1781 {
1782     for (int i = ga->nbounds - 1; i >= 0; --i) {
1783         if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base,
1784                                   ga->bounds[i][1] + base,
1785                                   brk, i == 0 && reserved_va) <= 0) {
1786             return false;
1787         }
1788     }
1789     return true;
1790 }
1791 
1792 /**
1793  * pgb_addr_set:
1794  * @ga: output set of guest addrs
1795  * @guest_loaddr: guest image low address
1796  * @guest_loaddr: guest image high address
1797  * @identity: create for identity mapping
1798  *
1799  * Fill in @ga with the image, COMMPAGE and NULL page.
1800  */
1801 static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr,
1802                          abi_ulong guest_hiaddr, bool try_identity)
1803 {
1804     int n;
1805 
1806     /*
1807      * With a low commpage, or a guest mapped very low,
1808      * we may not be able to use the identity map.
1809      */
1810     if (try_identity) {
1811         if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) {
1812             return false;
1813         }
1814         if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) {
1815             return false;
1816         }
1817     }
1818 
1819     memset(ga, 0, sizeof(*ga));
1820     n = 0;
1821 
1822     if (reserved_va) {
1823         ga->bounds[n][0] = try_identity ? mmap_min_addr : 0;
1824         ga->bounds[n][1] = reserved_va;
1825         n++;
1826         /* LO_COMMPAGE and NULL handled by reserving from 0. */
1827     } else {
1828         /* Add any LO_COMMPAGE or NULL page. */
1829         if (LO_COMMPAGE != -1) {
1830             ga->bounds[n][0] = 0;
1831             ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1;
1832             n++;
1833         } else if (!try_identity) {
1834             ga->bounds[n][0] = 0;
1835             ga->bounds[n][1] = TARGET_PAGE_SIZE - 1;
1836             n++;
1837         }
1838 
1839         /* Add the guest image for ET_EXEC. */
1840         if (guest_loaddr) {
1841             ga->bounds[n][0] = guest_loaddr;
1842             ga->bounds[n][1] = guest_hiaddr;
1843             n++;
1844         }
1845     }
1846 
1847     /*
1848      * Temporarily disable
1849      *   "comparison is always false due to limited range of data type"
1850      * due to comparison between unsigned and (possible) 0.
1851      */
1852 #pragma GCC diagnostic push
1853 #pragma GCC diagnostic ignored "-Wtype-limits"
1854 
1855     /* Add any HI_COMMPAGE not covered by reserved_va. */
1856     if (reserved_va < HI_COMMPAGE) {
1857         ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask();
1858         ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1;
1859         n++;
1860     }
1861 
1862 #pragma GCC diagnostic pop
1863 
1864     ga->nbounds = n;
1865     return true;
1866 }
1867 
1868 static void pgb_fail_in_use(const char *image_name)
1869 {
1870     error_report("%s: requires virtual address space that is in use "
1871                  "(omit the -B option or choose a different value)",
1872                  image_name);
1873     exit(EXIT_FAILURE);
1874 }
1875 
1876 static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr,
1877                       uintptr_t guest_hiaddr, uintptr_t align)
1878 {
1879     PGBAddrs ga;
1880     uintptr_t brk = (uintptr_t)sbrk(0);
1881 
1882     if (!QEMU_IS_ALIGNED(guest_base, align)) {
1883         fprintf(stderr, "Requested guest base %p does not satisfy "
1884                 "host minimum alignment (0x%" PRIxPTR ")\n",
1885                 (void *)guest_base, align);
1886         exit(EXIT_FAILURE);
1887     }
1888 
1889     if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base)
1890         || !pgb_try_mmap_set(&ga, guest_base, brk)) {
1891         pgb_fail_in_use(image_name);
1892     }
1893 }
1894 
1895 /**
1896  * pgb_find_fallback:
1897  *
1898  * This is a fallback method for finding holes in the host address space
1899  * if we don't have the benefit of being able to access /proc/self/map.
1900  * It can potentially take a very long time as we can only dumbly iterate
1901  * up the host address space seeing if the allocation would work.
1902  */
1903 static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align,
1904                                    uintptr_t brk)
1905 {
1906     /* TODO: come up with a better estimate of how much to skip. */
1907     uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB;
1908 
1909     for (uintptr_t base = skip; ; base += skip) {
1910         base = ROUND_UP(base, align);
1911         if (pgb_try_mmap_set(ga, base, brk)) {
1912             return base;
1913         }
1914         if (base >= -skip) {
1915             return -1;
1916         }
1917     }
1918 }
1919 
1920 static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base,
1921                                IntervalTreeRoot *root)
1922 {
1923     for (int i = ga->nbounds - 1; i >= 0; --i) {
1924         uintptr_t s = base + ga->bounds[i][0];
1925         uintptr_t l = base + ga->bounds[i][1];
1926         IntervalTreeNode *n;
1927 
1928         if (l < s) {
1929             /* Wraparound. Skip to advance S to mmap_min_addr. */
1930             return mmap_min_addr - s;
1931         }
1932 
1933         n = interval_tree_iter_first(root, s, l);
1934         if (n != NULL) {
1935             /* Conflict.  Skip to advance S to LAST + 1. */
1936             return n->last - s + 1;
1937         }
1938     }
1939     return 0;  /* success */
1940 }
1941 
1942 static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root,
1943                                 uintptr_t align, uintptr_t brk)
1944 {
1945     uintptr_t last = sizeof(uintptr_t) == 4 ? MiB : GiB;
1946     uintptr_t base, skip;
1947 
1948     while (true) {
1949         base = ROUND_UP(last, align);
1950         if (base < last) {
1951             return -1;
1952         }
1953 
1954         skip = pgb_try_itree(ga, base, root);
1955         if (skip == 0) {
1956             break;
1957         }
1958 
1959         last = base + skip;
1960         if (last < base) {
1961             return -1;
1962         }
1963     }
1964 
1965     /*
1966      * We've chosen 'base' based on holes in the interval tree,
1967      * but we don't yet know if it is a valid host address.
1968      * Because it is the first matching hole, if the host addresses
1969      * are invalid we know there are no further matches.
1970      */
1971     return pgb_try_mmap_set(ga, base, brk) ? base : -1;
1972 }
1973 
1974 static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr,
1975                         uintptr_t guest_hiaddr, uintptr_t align)
1976 {
1977     IntervalTreeRoot *root;
1978     uintptr_t brk, ret;
1979     PGBAddrs ga;
1980 
1981     /* Try the identity map first. */
1982     if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) {
1983         brk = (uintptr_t)sbrk(0);
1984         if (pgb_try_mmap_set(&ga, 0, brk)) {
1985             guest_base = 0;
1986             return;
1987         }
1988     }
1989 
1990     /*
1991      * Rebuild the address set for non-identity map.
1992      * This differs in the mapping of the guest NULL page.
1993      */
1994     pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false);
1995 
1996     root = read_self_maps();
1997 
1998     /* Read brk after we've read the maps, which will malloc. */
1999     brk = (uintptr_t)sbrk(0);
2000 
2001     if (!root) {
2002         ret = pgb_find_fallback(&ga, align, brk);
2003     } else {
2004         /*
2005          * Reserve the area close to the host brk.
2006          * This will be freed with the rest of the tree.
2007          */
2008         IntervalTreeNode *b = g_new0(IntervalTreeNode, 1);
2009         b->start = brk;
2010         b->last = brk + 16 * MiB - 1;
2011         interval_tree_insert(b, root);
2012 
2013         ret = pgb_find_itree(&ga, root, align, brk);
2014         free_self_maps(root);
2015     }
2016 
2017     if (ret == -1) {
2018         int w = TARGET_LONG_BITS / 4;
2019 
2020         error_report("%s: Unable to find a guest_base to satisfy all "
2021                      "guest address mapping requirements", image_name);
2022 
2023         for (int i = 0; i < ga.nbounds; ++i) {
2024             error_printf("  %0*" PRIx64 "-%0*" PRIx64 "\n",
2025                          w, (uint64_t)ga.bounds[i][0],
2026                          w, (uint64_t)ga.bounds[i][1]);
2027         }
2028         exit(EXIT_FAILURE);
2029     }
2030     guest_base = ret;
2031 }
2032 
2033 void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
2034                       abi_ulong guest_hiaddr)
2035 {
2036     /* In order to use host shmat, we must be able to honor SHMLBA.  */
2037     uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE);
2038 
2039     /* Sanity check the guest binary. */
2040     if (reserved_va) {
2041         if (guest_hiaddr > reserved_va) {
2042             error_report("%s: requires more than reserved virtual "
2043                          "address space (0x%" PRIx64 " > 0x%lx)",
2044                          image_name, (uint64_t)guest_hiaddr, reserved_va);
2045             exit(EXIT_FAILURE);
2046         }
2047     } else {
2048         if (guest_hiaddr != (uintptr_t)guest_hiaddr) {
2049             error_report("%s: requires more virtual address space "
2050                          "than the host can provide (0x%" PRIx64 ")",
2051                          image_name, (uint64_t)guest_hiaddr + 1);
2052             exit(EXIT_FAILURE);
2053         }
2054     }
2055 
2056     if (have_guest_base) {
2057         pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align);
2058     } else {
2059         pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align);
2060     }
2061 
2062     /* Reserve and initialize the commpage. */
2063     if (!init_guest_commpage()) {
2064         /* We have already probed for the commpage being free. */
2065         g_assert_not_reached();
2066     }
2067 
2068     assert(QEMU_IS_ALIGNED(guest_base, align));
2069     qemu_log_mask(CPU_LOG_PAGE, "Locating guest address space "
2070                   "@ 0x%" PRIx64 "\n", (uint64_t)guest_base);
2071 }
2072 
2073 enum {
2074     /* The string "GNU\0" as a magic number. */
2075     GNU0_MAGIC = const_le32('G' | 'N' << 8 | 'U' << 16),
2076     NOTE_DATA_SZ = 1 * KiB,
2077     NOTE_NAME_SZ = 4,
2078     ELF_GNU_PROPERTY_ALIGN = ELF_CLASS == ELFCLASS32 ? 4 : 8,
2079 };
2080 
2081 /*
2082  * Process a single gnu_property entry.
2083  * Return false for error.
2084  */
2085 static bool parse_elf_property(const uint32_t *data, int *off, int datasz,
2086                                struct image_info *info, bool have_prev_type,
2087                                uint32_t *prev_type, Error **errp)
2088 {
2089     uint32_t pr_type, pr_datasz, step;
2090 
2091     if (*off > datasz || !QEMU_IS_ALIGNED(*off, ELF_GNU_PROPERTY_ALIGN)) {
2092         goto error_data;
2093     }
2094     datasz -= *off;
2095     data += *off / sizeof(uint32_t);
2096 
2097     if (datasz < 2 * sizeof(uint32_t)) {
2098         goto error_data;
2099     }
2100     pr_type = data[0];
2101     pr_datasz = data[1];
2102     data += 2;
2103     datasz -= 2 * sizeof(uint32_t);
2104     step = ROUND_UP(pr_datasz, ELF_GNU_PROPERTY_ALIGN);
2105     if (step > datasz) {
2106         goto error_data;
2107     }
2108 
2109     /* Properties are supposed to be unique and sorted on pr_type. */
2110     if (have_prev_type && pr_type <= *prev_type) {
2111         if (pr_type == *prev_type) {
2112             error_setg(errp, "Duplicate property in PT_GNU_PROPERTY");
2113         } else {
2114             error_setg(errp, "Unsorted property in PT_GNU_PROPERTY");
2115         }
2116         return false;
2117     }
2118     *prev_type = pr_type;
2119 
2120     if (!arch_parse_elf_property(pr_type, pr_datasz, data, info, errp)) {
2121         return false;
2122     }
2123 
2124     *off += 2 * sizeof(uint32_t) + step;
2125     return true;
2126 
2127  error_data:
2128     error_setg(errp, "Ill-formed property in PT_GNU_PROPERTY");
2129     return false;
2130 }
2131 
2132 /* Process NT_GNU_PROPERTY_TYPE_0. */
2133 static bool parse_elf_properties(const ImageSource *src,
2134                                  struct image_info *info,
2135                                  const struct elf_phdr *phdr,
2136                                  Error **errp)
2137 {
2138     union {
2139         struct elf_note nhdr;
2140         uint32_t data[NOTE_DATA_SZ / sizeof(uint32_t)];
2141     } note;
2142 
2143     int n, off, datasz;
2144     bool have_prev_type;
2145     uint32_t prev_type;
2146 
2147     /* Unless the arch requires properties, ignore them. */
2148     if (!ARCH_USE_GNU_PROPERTY) {
2149         return true;
2150     }
2151 
2152     /* If the properties are crazy large, that's too bad. */
2153     n = phdr->p_filesz;
2154     if (n > sizeof(note)) {
2155         error_setg(errp, "PT_GNU_PROPERTY too large");
2156         return false;
2157     }
2158     if (n < sizeof(note.nhdr)) {
2159         error_setg(errp, "PT_GNU_PROPERTY too small");
2160         return false;
2161     }
2162 
2163     if (!imgsrc_read(&note, phdr->p_offset, n, src, errp)) {
2164         return false;
2165     }
2166 
2167     /*
2168      * The contents of a valid PT_GNU_PROPERTY is a sequence of uint32_t.
2169      * Swap most of them now, beyond the header and namesz.
2170      */
2171     if (target_needs_bswap()) {
2172         for (int i = 4; i < n / 4; i++) {
2173             bswap32s(note.data + i);
2174         }
2175     }
2176 
2177     /*
2178      * Note that nhdr is 3 words, and that the "name" described by namesz
2179      * immediately follows nhdr and is thus at the 4th word.  Further, all
2180      * of the inputs to the kernel's round_up are multiples of 4.
2181      */
2182     if (tswap32(note.nhdr.n_type) != NT_GNU_PROPERTY_TYPE_0 ||
2183         tswap32(note.nhdr.n_namesz) != NOTE_NAME_SZ ||
2184         note.data[3] != GNU0_MAGIC) {
2185         error_setg(errp, "Invalid note in PT_GNU_PROPERTY");
2186         return false;
2187     }
2188     off = sizeof(note.nhdr) + NOTE_NAME_SZ;
2189 
2190     datasz = tswap32(note.nhdr.n_descsz) + off;
2191     if (datasz > n) {
2192         error_setg(errp, "Invalid note size in PT_GNU_PROPERTY");
2193         return false;
2194     }
2195 
2196     have_prev_type = false;
2197     prev_type = 0;
2198     while (1) {
2199         if (off == datasz) {
2200             return true;  /* end, exit ok */
2201         }
2202         if (!parse_elf_property(note.data, &off, datasz, info,
2203                                 have_prev_type, &prev_type, errp)) {
2204             return false;
2205         }
2206         have_prev_type = true;
2207     }
2208 }
2209 
2210 /**
2211  * load_elf_image: Load an ELF image into the address space.
2212  * @image_name: the filename of the image, to use in error messages.
2213  * @src: the ImageSource from which to read.
2214  * @info: info collected from the loaded image.
2215  * @ehdr: the ELF header, not yet bswapped.
2216  * @pinterp_name: record any PT_INTERP string found.
2217  *
2218  * On return: @info values will be filled in, as necessary or available.
2219  */
2220 
2221 static void load_elf_image(const char *image_name, const ImageSource *src,
2222                            struct image_info *info, struct elfhdr *ehdr,
2223                            char **pinterp_name)
2224 {
2225     g_autofree struct elf_phdr *phdr = NULL;
2226     abi_ulong load_addr, load_bias, loaddr, hiaddr, error, align;
2227     size_t reserve_size, align_size;
2228     int i, prot_exec;
2229     Error *err = NULL;
2230 
2231     /*
2232      * First of all, some simple consistency checks.
2233      * Note that we rely on the bswapped ehdr staying in bprm_buf,
2234      * for later use by load_elf_binary and create_elf_tables.
2235      */
2236     if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) {
2237         goto exit_errmsg;
2238     }
2239     if (!elf_check_ident(ehdr)) {
2240         error_setg(&err, "Invalid ELF image for this architecture");
2241         goto exit_errmsg;
2242     }
2243     bswap_ehdr(ehdr);
2244     if (!elf_check_ehdr(ehdr)) {
2245         error_setg(&err, "Invalid ELF image for this architecture");
2246         goto exit_errmsg;
2247     }
2248 
2249     phdr = imgsrc_read_alloc(ehdr->e_phoff,
2250                              ehdr->e_phnum * sizeof(struct elf_phdr),
2251                              src, &err);
2252     if (phdr == NULL) {
2253         goto exit_errmsg;
2254     }
2255     bswap_phdr(phdr, ehdr->e_phnum);
2256 
2257     info->nsegs = 0;
2258     info->pt_dynamic_addr = 0;
2259 
2260     mmap_lock();
2261 
2262     /*
2263      * Find the maximum size of the image and allocate an appropriate
2264      * amount of memory to handle that.  Locate the interpreter, if any.
2265      */
2266     loaddr = -1, hiaddr = 0;
2267     align = 0;
2268     info->exec_stack = EXSTACK_DEFAULT;
2269     for (i = 0; i < ehdr->e_phnum; ++i) {
2270         struct elf_phdr *eppnt = phdr + i;
2271         if (eppnt->p_type == PT_LOAD) {
2272             abi_ulong a = eppnt->p_vaddr & TARGET_PAGE_MASK;
2273             if (a < loaddr) {
2274                 loaddr = a;
2275             }
2276             a = eppnt->p_vaddr + eppnt->p_memsz - 1;
2277             if (a > hiaddr) {
2278                 hiaddr = a;
2279             }
2280             ++info->nsegs;
2281             align |= eppnt->p_align;
2282         } else if (eppnt->p_type == PT_INTERP && pinterp_name) {
2283             g_autofree char *interp_name = NULL;
2284 
2285             if (*pinterp_name) {
2286                 error_setg(&err, "Multiple PT_INTERP entries");
2287                 goto exit_errmsg;
2288             }
2289 
2290             interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz,
2291                                             src, &err);
2292             if (interp_name == NULL) {
2293                 goto exit_errmsg;
2294             }
2295             if (interp_name[eppnt->p_filesz - 1] != 0) {
2296                 error_setg(&err, "Invalid PT_INTERP entry");
2297                 goto exit_errmsg;
2298             }
2299             *pinterp_name = g_steal_pointer(&interp_name);
2300         } else if (eppnt->p_type == PT_GNU_PROPERTY) {
2301             if (!parse_elf_properties(src, info, eppnt, &err)) {
2302                 goto exit_errmsg;
2303             }
2304         } else if (eppnt->p_type == PT_GNU_STACK) {
2305             info->exec_stack = eppnt->p_flags & PF_X;
2306         }
2307     }
2308 
2309     load_addr = loaddr;
2310 
2311     align = pow2ceil(align);
2312 
2313     if (pinterp_name != NULL) {
2314         if (ehdr->e_type == ET_EXEC) {
2315             /*
2316              * Make sure that the low address does not conflict with
2317              * MMAP_MIN_ADDR or the QEMU application itself.
2318              */
2319             probe_guest_base(image_name, loaddr, hiaddr);
2320         } else {
2321             /*
2322              * The binary is dynamic, but we still need to
2323              * select guest_base.  In this case we pass a size.
2324              */
2325             probe_guest_base(image_name, 0, hiaddr - loaddr);
2326 
2327             /*
2328              * Avoid collision with the loader by providing a different
2329              * default load address.
2330              */
2331             load_addr += elf_et_dyn_base;
2332 
2333             /*
2334              * TODO: Better support for mmap alignment is desirable.
2335              * Since we do not have complete control over the guest
2336              * address space, we prefer the kernel to choose some address
2337              * rather than force the use of LOAD_ADDR via MAP_FIXED.
2338              */
2339             if (align) {
2340                 load_addr &= -align;
2341             }
2342         }
2343     }
2344 
2345     /*
2346      * Reserve address space for all of this.
2347      *
2348      * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get
2349      * exactly the address range that is required.  Without reserved_va,
2350      * the guest address space is not isolated.  We have attempted to avoid
2351      * conflict with the host program itself via probe_guest_base, but using
2352      * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check.
2353      *
2354      * Otherwise this is ET_DYN, and we are searching for a location
2355      * that can hold the memory space required.  If the image is
2356      * pre-linked, LOAD_ADDR will be non-zero, and the kernel should
2357      * honor that address if it happens to be free.
2358      *
2359      * In both cases, we will overwrite pages in this range with mappings
2360      * from the executable.
2361      */
2362     reserve_size = (size_t)hiaddr - loaddr + 1;
2363     align_size = reserve_size;
2364 
2365     if (ehdr->e_type != ET_EXEC && align > qemu_real_host_page_size()) {
2366         align_size += align - 1;
2367     }
2368 
2369     load_addr = target_mmap(load_addr, align_size, PROT_NONE,
2370                             MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
2371                             (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0),
2372                             -1, 0);
2373     if (load_addr == -1) {
2374         goto exit_mmap;
2375     }
2376 
2377     if (align_size != reserve_size) {
2378         abi_ulong align_addr = ROUND_UP(load_addr, align);
2379         abi_ulong align_end = TARGET_PAGE_ALIGN(align_addr + reserve_size);
2380         abi_ulong load_end = TARGET_PAGE_ALIGN(load_addr + align_size);
2381 
2382         if (align_addr != load_addr) {
2383             target_munmap(load_addr, align_addr - load_addr);
2384         }
2385         if (align_end != load_end) {
2386             target_munmap(align_end, load_end - align_end);
2387         }
2388         load_addr = align_addr;
2389     }
2390 
2391     load_bias = load_addr - loaddr;
2392 
2393     if (elf_is_fdpic(ehdr)) {
2394         struct elf32_fdpic_loadseg *loadsegs = info->loadsegs =
2395             g_malloc(sizeof(*loadsegs) * info->nsegs);
2396 
2397         for (i = 0; i < ehdr->e_phnum; ++i) {
2398             switch (phdr[i].p_type) {
2399             case PT_DYNAMIC:
2400                 info->pt_dynamic_addr = phdr[i].p_vaddr + load_bias;
2401                 break;
2402             case PT_LOAD:
2403                 loadsegs->addr = phdr[i].p_vaddr + load_bias;
2404                 loadsegs->p_vaddr = phdr[i].p_vaddr;
2405                 loadsegs->p_memsz = phdr[i].p_memsz;
2406                 ++loadsegs;
2407                 break;
2408             }
2409         }
2410     }
2411 
2412     info->load_bias = load_bias;
2413     info->code_offset = load_bias;
2414     info->data_offset = load_bias;
2415     info->load_addr = load_addr;
2416     info->entry = ehdr->e_entry + load_bias;
2417     info->start_code = -1;
2418     info->end_code = 0;
2419     info->start_data = -1;
2420     info->end_data = 0;
2421     /* Usual start for brk is after all sections of the main executable. */
2422     info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias);
2423     info->elf_flags = ehdr->e_flags;
2424 
2425     prot_exec = PROT_EXEC;
2426 #ifdef TARGET_AARCH64
2427     /*
2428      * If the BTI feature is present, this indicates that the executable
2429      * pages of the startup binary should be mapped with PROT_BTI, so that
2430      * branch targets are enforced.
2431      *
2432      * The startup binary is either the interpreter or the static executable.
2433      * The interpreter is responsible for all pages of a dynamic executable.
2434      *
2435      * Elf notes are backward compatible to older cpus.
2436      * Do not enable BTI unless it is supported.
2437      */
2438     if ((info->note_flags & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
2439         && (pinterp_name == NULL || *pinterp_name == 0)
2440         && cpu_isar_feature(aa64_bti, ARM_CPU(thread_cpu))) {
2441         prot_exec |= TARGET_PROT_BTI;
2442     }
2443 #endif
2444 
2445     for (i = 0; i < ehdr->e_phnum; i++) {
2446         struct elf_phdr *eppnt = phdr + i;
2447         if (eppnt->p_type == PT_LOAD) {
2448             abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
2449             int elf_prot = 0;
2450 
2451             if (eppnt->p_flags & PF_R) {
2452                 elf_prot |= PROT_READ;
2453             }
2454             if (eppnt->p_flags & PF_W) {
2455                 elf_prot |= PROT_WRITE;
2456             }
2457             if (eppnt->p_flags & PF_X) {
2458                 elf_prot |= prot_exec;
2459             }
2460 
2461             vaddr = load_bias + eppnt->p_vaddr;
2462             vaddr_po = vaddr & ~TARGET_PAGE_MASK;
2463             vaddr_ps = vaddr & TARGET_PAGE_MASK;
2464 
2465             vaddr_ef = vaddr + eppnt->p_filesz;
2466             vaddr_em = vaddr + eppnt->p_memsz;
2467 
2468             /*
2469              * Some segments may be completely empty, with a non-zero p_memsz
2470              * but no backing file segment.
2471              */
2472             if (eppnt->p_filesz != 0) {
2473                 error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po,
2474                                     elf_prot, MAP_PRIVATE | MAP_FIXED,
2475                                     src, eppnt->p_offset - vaddr_po);
2476                 if (error == -1) {
2477                     goto exit_mmap;
2478                 }
2479             }
2480 
2481             /* If the load segment requests extra zeros (e.g. bss), map it. */
2482             if (vaddr_ef < vaddr_em &&
2483                 !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) {
2484                 goto exit_errmsg;
2485             }
2486 
2487             /* Find the full program boundaries.  */
2488             if (elf_prot & PROT_EXEC) {
2489                 if (vaddr < info->start_code) {
2490                     info->start_code = vaddr;
2491                 }
2492                 if (vaddr_ef > info->end_code) {
2493                     info->end_code = vaddr_ef;
2494                 }
2495             }
2496             if (elf_prot & PROT_WRITE) {
2497                 if (vaddr < info->start_data) {
2498                     info->start_data = vaddr;
2499                 }
2500                 if (vaddr_ef > info->end_data) {
2501                     info->end_data = vaddr_ef;
2502                 }
2503             }
2504 #ifdef TARGET_MIPS
2505         } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
2506             Mips_elf_abiflags_v0 abiflags;
2507 
2508             if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags),
2509                              src, &err)) {
2510                 goto exit_errmsg;
2511             }
2512             bswap_mips_abiflags(&abiflags);
2513             info->fp_abi = abiflags.fp_abi;
2514 #endif
2515         }
2516     }
2517 
2518     if (info->end_data == 0) {
2519         info->start_data = info->end_code;
2520         info->end_data = info->end_code;
2521     }
2522 
2523     if (qemu_log_enabled()) {
2524         load_symbols(ehdr, src, load_bias);
2525     }
2526 
2527     debuginfo_report_elf(image_name, src->fd, load_bias);
2528 
2529     mmap_unlock();
2530 
2531     close(src->fd);
2532     return;
2533 
2534  exit_mmap:
2535     error_setg_errno(&err, errno, "Error mapping file");
2536     goto exit_errmsg;
2537  exit_errmsg:
2538     error_reportf_err(err, "%s: ", image_name);
2539     exit(-1);
2540 }
2541 
2542 static void load_elf_interp(const char *filename, struct image_info *info,
2543                             char bprm_buf[BPRM_BUF_SIZE])
2544 {
2545     struct elfhdr ehdr;
2546     ImageSource src;
2547     int fd, retval;
2548     Error *err = NULL;
2549 
2550     fd = open(path(filename), O_RDONLY);
2551     if (fd < 0) {
2552         error_setg_file_open(&err, errno, filename);
2553         error_report_err(err);
2554         exit(-1);
2555     }
2556 
2557     retval = read(fd, bprm_buf, BPRM_BUF_SIZE);
2558     if (retval < 0) {
2559         error_setg_errno(&err, errno, "Error reading file header");
2560         error_reportf_err(err, "%s: ", filename);
2561         exit(-1);
2562     }
2563 
2564     src.fd = fd;
2565     src.cache = bprm_buf;
2566     src.cache_size = retval;
2567 
2568     load_elf_image(filename, &src, info, &ehdr, NULL);
2569 }
2570 
2571 #ifndef vdso_image_info
2572 #ifdef VDSO_HEADER
2573 #include VDSO_HEADER
2574 #define  vdso_image_info(flags)  &vdso_image_info
2575 #else
2576 #define  vdso_image_info(flags)  NULL
2577 #endif /* VDSO_HEADER */
2578 #endif /* vdso_image_info */
2579 
2580 static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso)
2581 {
2582     ImageSource src;
2583     struct elfhdr ehdr;
2584     abi_ulong load_bias, load_addr;
2585 
2586     src.fd = -1;
2587     src.cache = vdso->image;
2588     src.cache_size = vdso->image_size;
2589 
2590     load_elf_image("<internal-vdso>", &src, info, &ehdr, NULL);
2591     load_addr = info->load_addr;
2592     load_bias = info->load_bias;
2593 
2594     /*
2595      * We need to relocate the VDSO image.  The one built into the kernel
2596      * is built for a fixed address.  The one built for QEMU is not, since
2597      * that requires close control of the guest address space.
2598      * We pre-processed the image to locate all of the addresses that need
2599      * to be updated.
2600      */
2601     for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) {
2602         abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]);
2603         *addr = tswapal(tswapal(*addr) + load_bias);
2604     }
2605 
2606     /* Install signal trampolines, if present. */
2607     if (vdso->sigreturn_ofs) {
2608         default_sigreturn = load_addr + vdso->sigreturn_ofs;
2609     }
2610     if (vdso->rt_sigreturn_ofs) {
2611         default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs;
2612     }
2613 
2614     /* Remove write from VDSO segment. */
2615     target_mprotect(info->start_data, info->end_data - info->start_data,
2616                     PROT_READ | PROT_EXEC);
2617 }
2618 
2619 static int symfind(const void *s0, const void *s1)
2620 {
2621     struct elf_sym *sym = (struct elf_sym *)s1;
2622     __typeof(sym->st_value) addr = *(uint64_t *)s0;
2623     int result = 0;
2624 
2625     if (addr < sym->st_value) {
2626         result = -1;
2627     } else if (addr >= sym->st_value + sym->st_size) {
2628         result = 1;
2629     }
2630     return result;
2631 }
2632 
2633 static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
2634 {
2635 #if ELF_CLASS == ELFCLASS32
2636     struct elf_sym *syms = s->disas_symtab.elf32;
2637 #else
2638     struct elf_sym *syms = s->disas_symtab.elf64;
2639 #endif
2640 
2641     // binary search
2642     struct elf_sym *sym;
2643 
2644     sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), symfind);
2645     if (sym != NULL) {
2646         return s->disas_strtab + sym->st_name;
2647     }
2648 
2649     return "";
2650 }
2651 
2652 /* FIXME: This should use elf_ops.h.inc  */
2653 static int symcmp(const void *s0, const void *s1)
2654 {
2655     struct elf_sym *sym0 = (struct elf_sym *)s0;
2656     struct elf_sym *sym1 = (struct elf_sym *)s1;
2657     return (sym0->st_value < sym1->st_value)
2658         ? -1
2659         : ((sym0->st_value > sym1->st_value) ? 1 : 0);
2660 }
2661 
2662 /* Best attempt to load symbols from this ELF object. */
2663 static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
2664                          abi_ulong load_bias)
2665 {
2666     int i, shnum, nsyms, sym_idx = 0, str_idx = 0;
2667     g_autofree struct elf_shdr *shdr = NULL;
2668     char *strings = NULL;
2669     struct elf_sym *syms = NULL;
2670     struct elf_sym *new_syms;
2671     uint64_t segsz;
2672 
2673     shnum = hdr->e_shnum;
2674     shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr),
2675                              src, NULL);
2676     if (shdr == NULL) {
2677         return;
2678     }
2679 
2680     bswap_shdr(shdr, shnum);
2681     for (i = 0; i < shnum; ++i) {
2682         if (shdr[i].sh_type == SHT_SYMTAB) {
2683             sym_idx = i;
2684             str_idx = shdr[i].sh_link;
2685             goto found;
2686         }
2687     }
2688 
2689     /* There will be no symbol table if the file was stripped.  */
2690     return;
2691 
2692  found:
2693     /* Now know where the strtab and symtab are.  Snarf them.  */
2694 
2695     segsz = shdr[str_idx].sh_size;
2696     strings = g_try_malloc(segsz);
2697     if (!strings) {
2698         goto give_up;
2699     }
2700     if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) {
2701         goto give_up;
2702     }
2703 
2704     segsz = shdr[sym_idx].sh_size;
2705     if (segsz / sizeof(struct elf_sym) > INT_MAX) {
2706         /*
2707          * Implausibly large symbol table: give up rather than ploughing
2708          * on with the number of symbols calculation overflowing.
2709          */
2710         goto give_up;
2711     }
2712     nsyms = segsz / sizeof(struct elf_sym);
2713     syms = g_try_malloc(segsz);
2714     if (!syms) {
2715         goto give_up;
2716     }
2717     if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) {
2718         goto give_up;
2719     }
2720 
2721     for (i = 0; i < nsyms; ) {
2722         bswap_sym(syms + i);
2723         /* Throw away entries which we do not need.  */
2724         if (syms[i].st_shndx == SHN_UNDEF
2725             || syms[i].st_shndx >= SHN_LORESERVE
2726             || ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) {
2727             if (i < --nsyms) {
2728                 syms[i] = syms[nsyms];
2729             }
2730         } else {
2731 #if defined(TARGET_ARM) || defined (TARGET_MIPS)
2732             /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
2733             syms[i].st_value &= ~(target_ulong)1;
2734 #endif
2735             syms[i].st_value += load_bias;
2736             i++;
2737         }
2738     }
2739 
2740     /* No "useful" symbol.  */
2741     if (nsyms == 0) {
2742         goto give_up;
2743     }
2744 
2745     /*
2746      * Attempt to free the storage associated with the local symbols
2747      * that we threw away.  Whether or not this has any effect on the
2748      * memory allocation depends on the malloc implementation and how
2749      * many symbols we managed to discard.
2750      */
2751     new_syms = g_try_renew(struct elf_sym, syms, nsyms);
2752     if (new_syms == NULL) {
2753         goto give_up;
2754     }
2755     syms = new_syms;
2756 
2757     qsort(syms, nsyms, sizeof(*syms), symcmp);
2758 
2759     {
2760         struct syminfo *s = g_new(struct syminfo, 1);
2761 
2762         s->disas_strtab = strings;
2763         s->disas_num_syms = nsyms;
2764 #if ELF_CLASS == ELFCLASS32
2765         s->disas_symtab.elf32 = syms;
2766 #else
2767         s->disas_symtab.elf64 = syms;
2768 #endif
2769         s->lookup_symbol = lookup_symbolxx;
2770         s->next = syminfos;
2771         syminfos = s;
2772     }
2773     return;
2774 
2775  give_up:
2776     g_free(strings);
2777     g_free(syms);
2778 }
2779 
2780 uint32_t get_elf_eflags(int fd)
2781 {
2782     struct elfhdr ehdr;
2783     off_t offset;
2784     int ret;
2785 
2786     /* Read ELF header */
2787     offset = lseek(fd, 0, SEEK_SET);
2788     if (offset == (off_t) -1) {
2789         return 0;
2790     }
2791     ret = read(fd, &ehdr, sizeof(ehdr));
2792     if (ret < sizeof(ehdr)) {
2793         return 0;
2794     }
2795     offset = lseek(fd, offset, SEEK_SET);
2796     if (offset == (off_t) -1) {
2797         return 0;
2798     }
2799 
2800     /* Check ELF signature */
2801     if (!elf_check_ident(&ehdr)) {
2802         return 0;
2803     }
2804 
2805     /* check header */
2806     bswap_ehdr(&ehdr);
2807     if (!elf_check_ehdr(&ehdr)) {
2808         return 0;
2809     }
2810 
2811     /* return architecture id */
2812     return ehdr.e_flags;
2813 }
2814 
2815 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
2816 {
2817     /*
2818      * We need a copy of the elf header for passing to create_elf_tables.
2819      * We will have overwritten the original when we re-use bprm->buf
2820      * while loading the interpreter.  Allocate the storage for this now
2821      * and let elf_load_image do any swapping that may be required.
2822      */
2823     struct elfhdr ehdr;
2824     struct image_info interp_info, vdso_info;
2825     char *elf_interpreter = NULL;
2826     char *scratch;
2827 
2828     memset(&interp_info, 0, sizeof(interp_info));
2829 #ifdef TARGET_MIPS
2830     interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN;
2831 #endif
2832 
2833     load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter);
2834 
2835     /* Do this so that we can load the interpreter, if need be.  We will
2836        change some of these later */
2837     bprm->p = setup_arg_pages(bprm, info);
2838 
2839     scratch = g_new0(char, TARGET_PAGE_SIZE);
2840     if (STACK_GROWS_DOWN) {
2841         bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
2842                                    bprm->p, info->stack_limit);
2843         info->file_string = bprm->p;
2844         bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch,
2845                                    bprm->p, info->stack_limit);
2846         info->env_strings = bprm->p;
2847         bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch,
2848                                    bprm->p, info->stack_limit);
2849         info->arg_strings = bprm->p;
2850     } else {
2851         info->arg_strings = bprm->p;
2852         bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch,
2853                                    bprm->p, info->stack_limit);
2854         info->env_strings = bprm->p;
2855         bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch,
2856                                    bprm->p, info->stack_limit);
2857         info->file_string = bprm->p;
2858         bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
2859                                    bprm->p, info->stack_limit);
2860     }
2861 
2862     g_free(scratch);
2863 
2864     if (!bprm->p) {
2865         fprintf(stderr, "%s: %s\n", bprm->filename, strerror(E2BIG));
2866         exit(-1);
2867     }
2868 
2869     if (elf_interpreter) {
2870         load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
2871 
2872         /*
2873          * While unusual because of ELF_ET_DYN_BASE, if we are unlucky
2874          * with the mappings the interpreter can be loaded above but
2875          * near the main executable, which can leave very little room
2876          * for the heap.
2877          * If the current brk has less than 16MB, use the end of the
2878          * interpreter.
2879          */
2880         if (interp_info.brk > info->brk &&
2881             interp_info.load_bias - info->brk < 16 * MiB)  {
2882             info->brk = interp_info.brk;
2883         }
2884 
2885         /* If the program interpreter is one of these two, then assume
2886            an iBCS2 image.  Otherwise assume a native linux image.  */
2887 
2888         if (strcmp(elf_interpreter, "/usr/lib/libc.so.1") == 0
2889             || strcmp(elf_interpreter, "/usr/lib/ld.so.1") == 0) {
2890             info->personality = PER_SVR4;
2891 
2892             /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
2893                and some applications "depend" upon this behavior.  Since
2894                we do not have the power to recompile these, we emulate
2895                the SVr4 behavior.  Sigh.  */
2896             target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC,
2897                         MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS,
2898                         -1, 0);
2899         }
2900 #ifdef TARGET_MIPS
2901         info->interp_fp_abi = interp_info.fp_abi;
2902 #endif
2903     }
2904 
2905     /*
2906      * Load a vdso if available, which will amongst other things contain the
2907      * signal trampolines.  Otherwise, allocate a separate page for them.
2908      */
2909     const VdsoImageInfo *vdso = vdso_image_info(info->elf_flags);
2910     if (vdso) {
2911         load_elf_vdso(&vdso_info, vdso);
2912         info->vdso = vdso_info.load_bias;
2913     } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
2914         abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE,
2915                                           PROT_READ | PROT_WRITE,
2916                                           MAP_PRIVATE | MAP_ANON, -1, 0);
2917         if (tramp_page == -1) {
2918             return -errno;
2919         }
2920 
2921         setup_sigtramp(tramp_page);
2922         target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC);
2923     }
2924 
2925     bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info,
2926                                 elf_interpreter ? &interp_info : NULL,
2927                                 vdso ? &vdso_info : NULL);
2928     info->start_stack = bprm->p;
2929 
2930     /* If we have an interpreter, set that as the program's entry point.
2931        Copy the load_bias as well, to help PPC64 interpret the entry
2932        point as a function descriptor.  Do this after creating elf tables
2933        so that we copy the original program entry point into the AUXV.  */
2934     if (elf_interpreter) {
2935         info->load_bias = interp_info.load_bias;
2936         info->entry = interp_info.entry;
2937         g_free(elf_interpreter);
2938     }
2939 
2940 #ifdef USE_ELF_CORE_DUMP
2941     bprm->core_dump = &elf_core_dump;
2942 #endif
2943 
2944     return 0;
2945 }
2946 
2947 #ifdef USE_ELF_CORE_DUMP
2948 
2949 /*
2950  * Definitions to generate Intel SVR4-like core files.
2951  * These mostly have the same names as the SVR4 types with "target_elf_"
2952  * tacked on the front to prevent clashes with linux definitions,
2953  * and the typedef forms have been avoided.  This is mostly like
2954  * the SVR4 structure, but more Linuxy, with things that Linux does
2955  * not support and which gdb doesn't really use excluded.
2956  *
2957  * Fields we don't dump (their contents is zero) in linux-user qemu
2958  * are marked with XXX.
2959  *
2960  * Core dump code is copied from linux kernel (fs/binfmt_elf.c).
2961  *
2962  * Porting ELF coredump for target is (quite) simple process.  First you
2963  * define USE_ELF_CORE_DUMP in target ELF code (where init_thread() for
2964  * the target resides):
2965  *
2966  * #define USE_ELF_CORE_DUMP
2967  *
2968  * Next you define type of register set used for dumping.  ELF specification
2969  * says that it needs to be array of elf_greg_t that has size of ELF_NREG.
2970  *
2971  * typedef <target_regtype> target_elf_greg_t;
2972  * #define ELF_NREG <number of registers>
2973  * typedef taret_elf_greg_t target_elf_gregset_t[ELF_NREG];
2974  *
2975  * Last step is to implement target specific function that copies registers
2976  * from given cpu into just specified register set.  Prototype is:
2977  *
2978  * static void elf_core_copy_regs(taret_elf_gregset_t *regs,
2979  *                                const CPUArchState *env);
2980  *
2981  * Parameters:
2982  *     regs - copy register values into here (allocated and zeroed by caller)
2983  *     env - copy registers from here
2984  *
2985  * Example for ARM target is provided in this file.
2986  */
2987 
2988 struct target_elf_siginfo {
2989     abi_int    si_signo; /* signal number */
2990     abi_int    si_code;  /* extra code */
2991     abi_int    si_errno; /* errno */
2992 };
2993 
2994 struct target_elf_prstatus {
2995     struct target_elf_siginfo pr_info;      /* Info associated with signal */
2996     abi_short          pr_cursig;    /* Current signal */
2997     abi_ulong          pr_sigpend;   /* XXX */
2998     abi_ulong          pr_sighold;   /* XXX */
2999     target_pid_t       pr_pid;
3000     target_pid_t       pr_ppid;
3001     target_pid_t       pr_pgrp;
3002     target_pid_t       pr_sid;
3003     struct target_timeval pr_utime;  /* XXX User time */
3004     struct target_timeval pr_stime;  /* XXX System time */
3005     struct target_timeval pr_cutime; /* XXX Cumulative user time */
3006     struct target_timeval pr_cstime; /* XXX Cumulative system time */
3007     target_elf_gregset_t      pr_reg;       /* GP registers */
3008     abi_int            pr_fpvalid;   /* XXX */
3009 };
3010 
3011 #define ELF_PRARGSZ     (80) /* Number of chars for args */
3012 
3013 struct target_elf_prpsinfo {
3014     char         pr_state;       /* numeric process state */
3015     char         pr_sname;       /* char for pr_state */
3016     char         pr_zomb;        /* zombie */
3017     char         pr_nice;        /* nice val */
3018     abi_ulong    pr_flag;        /* flags */
3019     target_uid_t pr_uid;
3020     target_gid_t pr_gid;
3021     target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
3022     /* Lots missing */
3023     char    pr_fname[16] QEMU_NONSTRING; /* filename of executable */
3024     char    pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
3025 };
3026 
3027 static void bswap_prstatus(struct target_elf_prstatus *prstatus)
3028 {
3029     if (!target_needs_bswap()) {
3030         return;
3031     }
3032 
3033     prstatus->pr_info.si_signo = tswap32(prstatus->pr_info.si_signo);
3034     prstatus->pr_info.si_code = tswap32(prstatus->pr_info.si_code);
3035     prstatus->pr_info.si_errno = tswap32(prstatus->pr_info.si_errno);
3036     prstatus->pr_cursig = tswap16(prstatus->pr_cursig);
3037     prstatus->pr_sigpend = tswapal(prstatus->pr_sigpend);
3038     prstatus->pr_sighold = tswapal(prstatus->pr_sighold);
3039     prstatus->pr_pid = tswap32(prstatus->pr_pid);
3040     prstatus->pr_ppid = tswap32(prstatus->pr_ppid);
3041     prstatus->pr_pgrp = tswap32(prstatus->pr_pgrp);
3042     prstatus->pr_sid = tswap32(prstatus->pr_sid);
3043     /* cpu times are not filled, so we skip them */
3044     /* regs should be in correct format already */
3045     prstatus->pr_fpvalid = tswap32(prstatus->pr_fpvalid);
3046 }
3047 
3048 static void bswap_psinfo(struct target_elf_prpsinfo *psinfo)
3049 {
3050     if (!target_needs_bswap()) {
3051         return;
3052     }
3053 
3054     psinfo->pr_flag = tswapal(psinfo->pr_flag);
3055     psinfo->pr_uid = tswap16(psinfo->pr_uid);
3056     psinfo->pr_gid = tswap16(psinfo->pr_gid);
3057     psinfo->pr_pid = tswap32(psinfo->pr_pid);
3058     psinfo->pr_ppid = tswap32(psinfo->pr_ppid);
3059     psinfo->pr_pgrp = tswap32(psinfo->pr_pgrp);
3060     psinfo->pr_sid = tswap32(psinfo->pr_sid);
3061 }
3062 
3063 static void bswap_note(struct elf_note *en)
3064 {
3065     if (!target_needs_bswap()) {
3066         return;
3067     }
3068 
3069     bswap32s(&en->n_namesz);
3070     bswap32s(&en->n_descsz);
3071     bswap32s(&en->n_type);
3072 }
3073 
3074 /*
3075  * Calculate file (dump) size of given memory region.
3076  */
3077 static size_t vma_dump_size(vaddr start, vaddr end, int flags)
3078 {
3079     /* The area must be readable. */
3080     if (!(flags & PAGE_READ)) {
3081         return 0;
3082     }
3083 
3084     /*
3085      * Usually we don't dump executable pages as they contain
3086      * non-writable code that debugger can read directly from
3087      * target library etc. If there is no elf header, we dump it.
3088      */
3089     if (!(flags & PAGE_WRITE_ORG) &&
3090         (flags & PAGE_EXEC) &&
3091         memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) {
3092         return 0;
3093     }
3094 
3095     return end - start;
3096 }
3097 
3098 static size_t size_note(const char *name, size_t datasz)
3099 {
3100     size_t namesz = strlen(name) + 1;
3101 
3102     namesz = ROUND_UP(namesz, 4);
3103     datasz = ROUND_UP(datasz, 4);
3104 
3105     return sizeof(struct elf_note) + namesz + datasz;
3106 }
3107 
3108 static void *fill_note(void **pptr, int type, const char *name, size_t datasz)
3109 {
3110     void *ptr = *pptr;
3111     struct elf_note *n = ptr;
3112     size_t namesz = strlen(name) + 1;
3113 
3114     n->n_namesz = namesz;
3115     n->n_descsz = datasz;
3116     n->n_type = type;
3117     bswap_note(n);
3118 
3119     ptr += sizeof(*n);
3120     memcpy(ptr, name, namesz);
3121 
3122     namesz = ROUND_UP(namesz, 4);
3123     datasz = ROUND_UP(datasz, 4);
3124 
3125     *pptr = ptr + namesz + datasz;
3126     return ptr + namesz;
3127 }
3128 
3129 static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine,
3130                             uint32_t flags)
3131 {
3132     memcpy(elf->e_ident, ELFMAG, SELFMAG);
3133 
3134     elf->e_ident[EI_CLASS] = ELF_CLASS;
3135     elf->e_ident[EI_DATA] = ELF_DATA;
3136     elf->e_ident[EI_VERSION] = EV_CURRENT;
3137     elf->e_ident[EI_OSABI] = ELF_OSABI;
3138 
3139     elf->e_type = ET_CORE;
3140     elf->e_machine = machine;
3141     elf->e_version = EV_CURRENT;
3142     elf->e_phoff = sizeof(struct elfhdr);
3143     elf->e_flags = flags;
3144     elf->e_ehsize = sizeof(struct elfhdr);
3145     elf->e_phentsize = sizeof(struct elf_phdr);
3146     elf->e_phnum = segs;
3147 
3148     bswap_ehdr(elf);
3149 }
3150 
3151 static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset)
3152 {
3153     phdr->p_type = PT_NOTE;
3154     phdr->p_offset = offset;
3155     phdr->p_filesz = sz;
3156 
3157     bswap_phdr(phdr, 1);
3158 }
3159 
3160 static void fill_prstatus_note(void *data, CPUState *cpu, int signr)
3161 {
3162     /*
3163      * Because note memory is only aligned to 4, and target_elf_prstatus
3164      * may well have higher alignment requirements, fill locally and
3165      * memcpy to the destination afterward.
3166      */
3167     struct target_elf_prstatus prstatus = {
3168         .pr_info.si_signo = signr,
3169         .pr_cursig = signr,
3170         .pr_pid = get_task_state(cpu)->ts_tid,
3171         .pr_ppid = getppid(),
3172         .pr_pgrp = getpgrp(),
3173         .pr_sid = getsid(0),
3174     };
3175 
3176     elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu));
3177     bswap_prstatus(&prstatus);
3178     memcpy(data, &prstatus, sizeof(prstatus));
3179 }
3180 
3181 static void fill_prpsinfo_note(void *data, const TaskState *ts)
3182 {
3183     /*
3184      * Because note memory is only aligned to 4, and target_elf_prpsinfo
3185      * may well have higher alignment requirements, fill locally and
3186      * memcpy to the destination afterward.
3187      */
3188     struct target_elf_prpsinfo psinfo = {
3189         .pr_pid = getpid(),
3190         .pr_ppid = getppid(),
3191         .pr_pgrp = getpgrp(),
3192         .pr_sid = getsid(0),
3193         .pr_uid = getuid(),
3194         .pr_gid = getgid(),
3195     };
3196     char *base_filename;
3197     size_t len;
3198 
3199     len = ts->info->env_strings - ts->info->arg_strings;
3200     len = MIN(len, ELF_PRARGSZ);
3201     memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len);
3202     for (size_t i = 0; i < len; i++) {
3203         if (psinfo.pr_psargs[i] == 0) {
3204             psinfo.pr_psargs[i] = ' ';
3205         }
3206     }
3207 
3208     base_filename = g_path_get_basename(ts->bprm->filename);
3209     /*
3210      * Using strncpy here is fine: at max-length,
3211      * this field is not NUL-terminated.
3212      */
3213     strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname));
3214     g_free(base_filename);
3215 
3216     bswap_psinfo(&psinfo);
3217     memcpy(data, &psinfo, sizeof(psinfo));
3218 }
3219 
3220 static void fill_auxv_note(void *data, const TaskState *ts)
3221 {
3222     memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len);
3223 }
3224 
3225 /*
3226  * Constructs name of coredump file.  We have following convention
3227  * for the name:
3228  *     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
3229  *
3230  * Returns the filename
3231  */
3232 static char *core_dump_filename(const TaskState *ts)
3233 {
3234     g_autoptr(GDateTime) now = g_date_time_new_now_local();
3235     g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S");
3236     g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename);
3237 
3238     return g_strdup_printf("qemu_%s_%s_%d.core",
3239                            base_filename, nowstr, (int)getpid());
3240 }
3241 
3242 static int dump_write(int fd, const void *ptr, size_t size)
3243 {
3244     const char *bufp = (const char *)ptr;
3245     ssize_t bytes_written, bytes_left;
3246 
3247     bytes_written = 0;
3248     bytes_left = size;
3249 
3250     /*
3251      * In normal conditions, single write(2) should do but
3252      * in case of socket etc. this mechanism is more portable.
3253      */
3254     do {
3255         bytes_written = write(fd, bufp, bytes_left);
3256         if (bytes_written < 0) {
3257             if (errno == EINTR)
3258                 continue;
3259             return (-1);
3260         } else if (bytes_written == 0) { /* eof */
3261             return (-1);
3262         }
3263         bufp += bytes_written;
3264         bytes_left -= bytes_written;
3265     } while (bytes_left > 0);
3266 
3267     return (0);
3268 }
3269 
3270 static int wmr_page_unprotect_regions(void *opaque, vaddr start,
3271                                       vaddr end, int flags)
3272 {
3273     if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) {
3274         size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size());
3275 
3276         while (1) {
3277             page_unprotect(NULL, start, 0);
3278             if (end - start <= step) {
3279                 break;
3280             }
3281             start += step;
3282         }
3283     }
3284     return 0;
3285 }
3286 
3287 typedef struct {
3288     unsigned count;
3289     size_t size;
3290 } CountAndSizeRegions;
3291 
3292 static int wmr_count_and_size_regions(void *opaque, vaddr start,
3293                                       vaddr end, int flags)
3294 {
3295     CountAndSizeRegions *css = opaque;
3296 
3297     css->count++;
3298     css->size += vma_dump_size(start, end, flags);
3299     return 0;
3300 }
3301 
3302 typedef struct {
3303     struct elf_phdr *phdr;
3304     off_t offset;
3305 } FillRegionPhdr;
3306 
3307 static int wmr_fill_region_phdr(void *opaque, vaddr start,
3308                                 vaddr end, int flags)
3309 {
3310     FillRegionPhdr *d = opaque;
3311     struct elf_phdr *phdr = d->phdr;
3312 
3313     phdr->p_type = PT_LOAD;
3314     phdr->p_vaddr = start;
3315     phdr->p_paddr = 0;
3316     phdr->p_filesz = vma_dump_size(start, end, flags);
3317     phdr->p_offset = d->offset;
3318     d->offset += phdr->p_filesz;
3319     phdr->p_memsz = end - start;
3320     phdr->p_flags = (flags & PAGE_READ ? PF_R : 0)
3321                   | (flags & PAGE_WRITE_ORG ? PF_W : 0)
3322                   | (flags & PAGE_EXEC ? PF_X : 0);
3323     phdr->p_align = ELF_EXEC_PAGESIZE;
3324 
3325     bswap_phdr(phdr, 1);
3326     d->phdr = phdr + 1;
3327     return 0;
3328 }
3329 
3330 static int wmr_write_region(void *opaque, vaddr start,
3331                             vaddr end, int flags)
3332 {
3333     int fd = *(int *)opaque;
3334     size_t size = vma_dump_size(start, end, flags);
3335 
3336     if (!size) {
3337         return 0;
3338     }
3339     return dump_write(fd, g2h_untagged(start), size);
3340 }
3341 
3342 /*
3343  * Write out ELF coredump.
3344  *
3345  * See documentation of ELF object file format in:
3346  * http://www.caldera.com/developers/devspecs/gabi41.pdf
3347  *
3348  * Coredump format in linux is following:
3349  *
3350  * 0   +----------------------+         \
3351  *     | ELF header           | ET_CORE  |
3352  *     +----------------------+          |
3353  *     | ELF program headers  |          |--- headers
3354  *     | - NOTE section       |          |
3355  *     | - PT_LOAD sections   |          |
3356  *     +----------------------+         /
3357  *     | NOTEs:               |
3358  *     | - NT_PRSTATUS        |
3359  *     | - NT_PRSINFO         |
3360  *     | - NT_AUXV            |
3361  *     +----------------------+ <-- aligned to target page
3362  *     | Process memory dump  |
3363  *     :                      :
3364  *     .                      .
3365  *     :                      :
3366  *     |                      |
3367  *     +----------------------+
3368  *
3369  * NT_PRSTATUS -> struct elf_prstatus (per thread)
3370  * NT_PRSINFO  -> struct elf_prpsinfo
3371  * NT_AUXV is array of { type, value } pairs (see fill_auxv_note()).
3372  *
3373  * Format follows System V format as close as possible.  Current
3374  * version limitations are as follows:
3375  *     - no floating point registers are dumped
3376  *
3377  * Function returns 0 in case of success, negative errno otherwise.
3378  *
3379  * TODO: make this work also during runtime: it should be
3380  * possible to force coredump from running process and then
3381  * continue processing.  For example qemu could set up SIGUSR2
3382  * handler (provided that target process haven't registered
3383  * handler for that) that does the dump when signal is received.
3384  */
3385 static int elf_core_dump(int signr, const CPUArchState *env)
3386 {
3387     const CPUState *cpu = env_cpu_const(env);
3388     const TaskState *ts = (const TaskState *)get_task_state((CPUState *)cpu);
3389     struct rlimit dumpsize;
3390     CountAndSizeRegions css;
3391     off_t offset, note_offset, data_offset;
3392     size_t note_size;
3393     int cpus, ret;
3394     int fd = -1;
3395     CPUState *cpu_iter;
3396 
3397     if (prctl(PR_GET_DUMPABLE) == 0) {
3398         return 0;
3399     }
3400 
3401     if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) {
3402         return 0;
3403     }
3404 
3405     cpu_list_lock();
3406     mmap_lock();
3407 
3408     /* By unprotecting, we merge vmas that might be split. */
3409     walk_memory_regions(NULL, wmr_page_unprotect_regions);
3410 
3411     /*
3412      * Walk through target process memory mappings and
3413      * set up structure containing this information.
3414      */
3415     memset(&css, 0, sizeof(css));
3416     walk_memory_regions(&css, wmr_count_and_size_regions);
3417 
3418     cpus = 0;
3419     CPU_FOREACH(cpu_iter) {
3420         cpus++;
3421     }
3422 
3423     offset = sizeof(struct elfhdr);
3424     offset += (css.count + 1) * sizeof(struct elf_phdr);
3425     note_offset = offset;
3426 
3427     offset += size_note("CORE", ts->info->auxv_len);
3428     offset += size_note("CORE", sizeof(struct target_elf_prpsinfo));
3429     offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus;
3430     note_size = offset - note_offset;
3431     data_offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE);
3432 
3433     /* Do not dump if the corefile size exceeds the limit. */
3434     if (dumpsize.rlim_cur != RLIM_INFINITY
3435         && dumpsize.rlim_cur < data_offset + css.size) {
3436         errno = 0;
3437         goto out;
3438     }
3439 
3440     {
3441         g_autofree char *corefile = core_dump_filename(ts);
3442         fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC,
3443                   S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
3444     }
3445     if (fd < 0) {
3446         goto out;
3447     }
3448 
3449     /*
3450      * There is a fair amount of alignment padding within the notes
3451      * as well as preceeding the process memory.  Allocate a zeroed
3452      * block to hold it all.  Write all of the headers directly into
3453      * this buffer and then write it out as a block.
3454      */
3455     {
3456         g_autofree void *header = g_malloc0(data_offset);
3457         FillRegionPhdr frp;
3458         void *hptr, *dptr;
3459 
3460         /* Create elf file header. */
3461         hptr = header;
3462         fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0);
3463         hptr += sizeof(struct elfhdr);
3464 
3465         /* Create elf program headers. */
3466         fill_elf_note_phdr(hptr, note_size, note_offset);
3467         hptr += sizeof(struct elf_phdr);
3468 
3469         frp.phdr = hptr;
3470         frp.offset = data_offset;
3471         walk_memory_regions(&frp, wmr_fill_region_phdr);
3472         hptr = frp.phdr;
3473 
3474         /* Create the notes. */
3475         dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len);
3476         fill_auxv_note(dptr, ts);
3477 
3478         dptr = fill_note(&hptr, NT_PRPSINFO, "CORE",
3479                          sizeof(struct target_elf_prpsinfo));
3480         fill_prpsinfo_note(dptr, ts);
3481 
3482         CPU_FOREACH(cpu_iter) {
3483             dptr = fill_note(&hptr, NT_PRSTATUS, "CORE",
3484                              sizeof(struct target_elf_prstatus));
3485             fill_prstatus_note(dptr, cpu_iter, cpu_iter == cpu ? signr : 0);
3486         }
3487 
3488         if (dump_write(fd, header, data_offset) < 0) {
3489             goto out;
3490         }
3491     }
3492 
3493     /*
3494      * Finally write process memory into the corefile as well.
3495      */
3496     if (walk_memory_regions(&fd, wmr_write_region) < 0) {
3497         goto out;
3498     }
3499     errno = 0;
3500 
3501  out:
3502     ret = -errno;
3503     mmap_unlock();
3504     cpu_list_unlock();
3505     if (fd >= 0) {
3506         close(fd);
3507     }
3508     return ret;
3509 }
3510 #endif /* USE_ELF_CORE_DUMP */
3511 
3512 void do_init_main_thread(CPUState *cs, struct image_info *infop)
3513 {
3514 #ifdef HAVE_INIT_MAIN_THREAD
3515     init_main_thread(cs, infop);
3516 #else
3517     target_pt_regs regs = { };
3518 
3519     init_thread(&regs, infop);
3520     target_cpu_copy_regs(cpu_env(cs), &regs);
3521 #endif
3522 }
3523