xref: /openbmc/qemu/linux-user/mmap.c (revision 669dcb60)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     assert(mmap_lock_count > 0);
40     if (--mmap_lock_count == 0) {
41         pthread_mutex_unlock(&mmap_mutex);
42     }
43 }
44 
45 bool have_mmap_lock(void)
46 {
47     return mmap_lock_count > 0 ? true : false;
48 }
49 
50 /* Grab lock to make sure things are in a consistent state after fork().  */
51 void mmap_fork_start(void)
52 {
53     if (mmap_lock_count)
54         abort();
55     pthread_mutex_lock(&mmap_mutex);
56 }
57 
58 void mmap_fork_end(int child)
59 {
60     if (child) {
61         pthread_mutex_init(&mmap_mutex, NULL);
62     } else {
63         pthread_mutex_unlock(&mmap_mutex);
64     }
65 }
66 
67 /*
68  * Validate target prot bitmask.
69  * Return the prot bitmask for the host in *HOST_PROT.
70  * Return 0 if the target prot bitmask is invalid, otherwise
71  * the internal qemu page_flags (which will include PAGE_VALID).
72  */
73 static int validate_prot_to_pageflags(int prot)
74 {
75     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
76     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77 
78 #ifdef TARGET_AARCH64
79     {
80         ARMCPU *cpu = ARM_CPU(thread_cpu);
81 
82         /*
83          * The PROT_BTI bit is only accepted if the cpu supports the feature.
84          * Since this is the unusual case, don't bother checking unless
85          * the bit has been requested.  If set and valid, record the bit
86          * within QEMU's page_flags.
87          */
88         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
89             valid |= TARGET_PROT_BTI;
90             page_flags |= PAGE_BTI;
91         }
92         /* Similarly for the PROT_MTE bit. */
93         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
94             valid |= TARGET_PROT_MTE;
95             page_flags |= PAGE_MTE;
96         }
97     }
98 #elif defined(TARGET_HPPA)
99     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
100 #endif
101 
102     return prot & ~valid ? 0 : page_flags;
103 }
104 
105 /*
106  * For the host, we need not pass anything except read/write/exec.
107  * While PROT_SEM is allowed by all hosts, it is also ignored, so
108  * don't bother transforming guest bit to host bit.  Any other
109  * target-specific prot bits will not be understood by the host
110  * and will need to be encoded into page_flags for qemu emulation.
111  *
112  * Pages that are executable by the guest will never be executed
113  * by the host, but the host will need to be able to read them.
114  */
115 static int target_to_host_prot(int prot)
116 {
117     return (prot & (PROT_READ | PROT_WRITE)) |
118            (prot & PROT_EXEC ? PROT_READ : 0);
119 }
120 
121 /* NOTE: all the constants are the HOST ones, but addresses are target. */
122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
123 {
124     abi_ulong starts[3];
125     abi_ulong lens[3];
126     int prots[3];
127     abi_ulong host_start, host_last, last;
128     int prot1, ret, page_flags, nranges;
129 
130     trace_target_mprotect(start, len, target_prot);
131 
132     if ((start & ~TARGET_PAGE_MASK) != 0) {
133         return -TARGET_EINVAL;
134     }
135     page_flags = validate_prot_to_pageflags(target_prot);
136     if (!page_flags) {
137         return -TARGET_EINVAL;
138     }
139     if (len == 0) {
140         return 0;
141     }
142     len = TARGET_PAGE_ALIGN(len);
143     if (!guest_range_valid_untagged(start, len)) {
144         return -TARGET_ENOMEM;
145     }
146 
147     last = start + len - 1;
148     host_start = start & qemu_host_page_mask;
149     host_last = HOST_PAGE_ALIGN(last) - 1;
150     nranges = 0;
151 
152     mmap_lock();
153 
154     if (host_last - host_start < qemu_host_page_size) {
155         /* Single host page contains all guest pages: sum the prot. */
156         prot1 = target_prot;
157         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
158             prot1 |= page_get_flags(a);
159         }
160         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
161             prot1 |= page_get_flags(a + 1);
162         }
163         starts[nranges] = host_start;
164         lens[nranges] = qemu_host_page_size;
165         prots[nranges] = prot1;
166         nranges++;
167     } else {
168         if (host_start < start) {
169             /* Host page contains more than one guest page: sum the prot. */
170             prot1 = target_prot;
171             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
172                 prot1 |= page_get_flags(a);
173             }
174             /* If the resulting sum differs, create a new range. */
175             if (prot1 != target_prot) {
176                 starts[nranges] = host_start;
177                 lens[nranges] = qemu_host_page_size;
178                 prots[nranges] = prot1;
179                 nranges++;
180                 host_start += qemu_host_page_size;
181             }
182         }
183 
184         if (last < host_last) {
185             /* Host page contains more than one guest page: sum the prot. */
186             prot1 = target_prot;
187             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
188                 prot1 |= page_get_flags(a + 1);
189             }
190             /* If the resulting sum differs, create a new range. */
191             if (prot1 != target_prot) {
192                 host_last -= qemu_host_page_size;
193                 starts[nranges] = host_last + 1;
194                 lens[nranges] = qemu_host_page_size;
195                 prots[nranges] = prot1;
196                 nranges++;
197             }
198         }
199 
200         /* Create a range for the middle, if any remains. */
201         if (host_start < host_last) {
202             starts[nranges] = host_start;
203             lens[nranges] = host_last - host_start + 1;
204             prots[nranges] = target_prot;
205             nranges++;
206         }
207     }
208 
209     for (int i = 0; i < nranges; ++i) {
210         ret = mprotect(g2h_untagged(starts[i]), lens[i],
211                        target_to_host_prot(prots[i]));
212         if (ret != 0) {
213             goto error;
214         }
215     }
216 
217     page_set_flags(start, last, page_flags);
218     ret = 0;
219 
220  error:
221     mmap_unlock();
222     return ret;
223 }
224 
225 /* map an incomplete host page */
226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
227                       int prot, int flags, int fd, off_t offset)
228 {
229     abi_ulong real_last;
230     void *host_start;
231     int prot_old, prot_new;
232     int host_prot_old, host_prot_new;
233 
234     if (!(flags & MAP_ANONYMOUS)
235         && (flags & MAP_TYPE) == MAP_SHARED
236         && (prot & PROT_WRITE)) {
237         /*
238          * msync() won't work with the partial page, so we return an
239          * error if write is possible while it is a shared mapping.
240          */
241         errno = EINVAL;
242         return false;
243     }
244 
245     real_last = real_start + qemu_host_page_size - 1;
246     host_start = g2h_untagged(real_start);
247 
248     /* Get the protection of the target pages outside the mapping. */
249     prot_old = 0;
250     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
251         prot_old |= page_get_flags(a);
252     }
253     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
254         prot_old |= page_get_flags(a);
255     }
256 
257     if (prot_old == 0) {
258         /*
259          * Since !(prot_old & PAGE_VALID), there were no guest pages
260          * outside of the fragment we need to map.  Allocate a new host
261          * page to cover, discarding whatever else may have been present.
262          */
263         void *p = mmap(host_start, qemu_host_page_size,
264                        target_to_host_prot(prot),
265                        flags | MAP_ANONYMOUS, -1, 0);
266         if (p != host_start) {
267             if (p != MAP_FAILED) {
268                 munmap(p, qemu_host_page_size);
269                 errno = EEXIST;
270             }
271             return false;
272         }
273         prot_old = prot;
274     }
275     prot_new = prot | prot_old;
276 
277     host_prot_old = target_to_host_prot(prot_old);
278     host_prot_new = target_to_host_prot(prot_new);
279 
280     /* Adjust protection to be able to write. */
281     if (!(host_prot_old & PROT_WRITE)) {
282         host_prot_old |= PROT_WRITE;
283         mprotect(host_start, qemu_host_page_size, host_prot_old);
284     }
285 
286     /* Read or zero the new guest pages. */
287     if (flags & MAP_ANONYMOUS) {
288         memset(g2h_untagged(start), 0, last - start + 1);
289     } else {
290         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
291             return false;
292         }
293     }
294 
295     /* Put final protection */
296     if (host_prot_new != host_prot_old) {
297         mprotect(host_start, qemu_host_page_size, host_prot_new);
298     }
299     return true;
300 }
301 
302 abi_ulong task_unmapped_base;
303 abi_ulong elf_et_dyn_base;
304 abi_ulong mmap_next_start;
305 
306 /*
307  * Subroutine of mmap_find_vma, used when we have pre-allocated
308  * a chunk of guest address space.
309  */
310 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
311                                         abi_ulong align)
312 {
313     target_ulong ret;
314 
315     ret = page_find_range_empty(start, reserved_va, size, align);
316     if (ret == -1 && start > mmap_min_addr) {
317         /* Restart at the beginning of the address space. */
318         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
319     }
320 
321     return ret;
322 }
323 
324 /*
325  * Find and reserve a free memory area of size 'size'. The search
326  * starts at 'start'.
327  * It must be called with mmap_lock() held.
328  * Return -1 if error.
329  */
330 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
331 {
332     void *ptr, *prev;
333     abi_ulong addr;
334     int wrapped, repeat;
335 
336     align = MAX(align, qemu_host_page_size);
337 
338     /* If 'start' == 0, then a default start address is used. */
339     if (start == 0) {
340         start = mmap_next_start;
341     } else {
342         start &= qemu_host_page_mask;
343     }
344     start = ROUND_UP(start, align);
345 
346     size = HOST_PAGE_ALIGN(size);
347 
348     if (reserved_va) {
349         return mmap_find_vma_reserved(start, size, align);
350     }
351 
352     addr = start;
353     wrapped = repeat = 0;
354     prev = 0;
355 
356     for (;; prev = ptr) {
357         /*
358          * Reserve needed memory area to avoid a race.
359          * It should be discarded using:
360          *  - mmap() with MAP_FIXED flag
361          *  - mremap() with MREMAP_FIXED flag
362          *  - shmat() with SHM_REMAP flag
363          */
364         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
365                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
366 
367         /* ENOMEM, if host address space has no memory */
368         if (ptr == MAP_FAILED) {
369             return (abi_ulong)-1;
370         }
371 
372         /*
373          * Count the number of sequential returns of the same address.
374          * This is used to modify the search algorithm below.
375          */
376         repeat = (ptr == prev ? repeat + 1 : 0);
377 
378         if (h2g_valid(ptr + size - 1)) {
379             addr = h2g(ptr);
380 
381             if ((addr & (align - 1)) == 0) {
382                 /* Success.  */
383                 if (start == mmap_next_start && addr >= task_unmapped_base) {
384                     mmap_next_start = addr + size;
385                 }
386                 return addr;
387             }
388 
389             /* The address is not properly aligned for the target.  */
390             switch (repeat) {
391             case 0:
392                 /*
393                  * Assume the result that the kernel gave us is the
394                  * first with enough free space, so start again at the
395                  * next higher target page.
396                  */
397                 addr = ROUND_UP(addr, align);
398                 break;
399             case 1:
400                 /*
401                  * Sometimes the kernel decides to perform the allocation
402                  * at the top end of memory instead.
403                  */
404                 addr &= -align;
405                 break;
406             case 2:
407                 /* Start over at low memory.  */
408                 addr = 0;
409                 break;
410             default:
411                 /* Fail.  This unaligned block must the last.  */
412                 addr = -1;
413                 break;
414             }
415         } else {
416             /*
417              * Since the result the kernel gave didn't fit, start
418              * again at low memory.  If any repetition, fail.
419              */
420             addr = (repeat ? -1 : 0);
421         }
422 
423         /* Unmap and try again.  */
424         munmap(ptr, size);
425 
426         /* ENOMEM if we checked the whole of the target address space.  */
427         if (addr == (abi_ulong)-1) {
428             return (abi_ulong)-1;
429         } else if (addr == 0) {
430             if (wrapped) {
431                 return (abi_ulong)-1;
432             }
433             wrapped = 1;
434             /*
435              * Don't actually use 0 when wrapping, instead indicate
436              * that we'd truly like an allocation in low memory.
437              */
438             addr = (mmap_min_addr > TARGET_PAGE_SIZE
439                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
440                      : TARGET_PAGE_SIZE);
441         } else if (wrapped && addr >= start) {
442             return (abi_ulong)-1;
443         }
444     }
445 }
446 
447 /* NOTE: all the constants are the HOST ones */
448 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
449                      int flags, int fd, off_t offset)
450 {
451     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
452     abi_ulong passthrough_start = -1, passthrough_last = 0;
453     int page_flags;
454     off_t host_offset;
455 
456     mmap_lock();
457     trace_target_mmap(start, len, target_prot, flags, fd, offset);
458 
459     if (!len) {
460         errno = EINVAL;
461         goto fail;
462     }
463 
464     page_flags = validate_prot_to_pageflags(target_prot);
465     if (!page_flags) {
466         errno = EINVAL;
467         goto fail;
468     }
469 
470     /* Also check for overflows... */
471     len = TARGET_PAGE_ALIGN(len);
472     if (!len) {
473         errno = ENOMEM;
474         goto fail;
475     }
476 
477     if (offset & ~TARGET_PAGE_MASK) {
478         errno = EINVAL;
479         goto fail;
480     }
481 
482     /*
483      * If we're mapping shared memory, ensure we generate code for parallel
484      * execution and flush old translations.  This will work up to the level
485      * supported by the host -- anything that requires EXCP_ATOMIC will not
486      * be atomic with respect to an external process.
487      */
488     if (flags & MAP_SHARED) {
489         CPUState *cpu = thread_cpu;
490         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
491             cpu->tcg_cflags |= CF_PARALLEL;
492             tb_flush(cpu);
493         }
494     }
495 
496     real_start = start & qemu_host_page_mask;
497     host_offset = offset & qemu_host_page_mask;
498 
499     /*
500      * If the user is asking for the kernel to find a location, do that
501      * before we truncate the length for mapping files below.
502      */
503     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
504         host_len = len + offset - host_offset;
505         host_len = HOST_PAGE_ALIGN(host_len);
506         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
507         if (start == (abi_ulong)-1) {
508             errno = ENOMEM;
509             goto fail;
510         }
511     }
512 
513     /*
514      * When mapping files into a memory area larger than the file, accesses
515      * to pages beyond the file size will cause a SIGBUS.
516      *
517      * For example, if mmaping a file of 100 bytes on a host with 4K pages
518      * emulating a target with 8K pages, the target expects to be able to
519      * access the first 8K. But the host will trap us on any access beyond
520      * 4K.
521      *
522      * When emulating a target with a larger page-size than the hosts, we
523      * may need to truncate file maps at EOF and add extra anonymous pages
524      * up to the targets page boundary.
525      */
526     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
527         !(flags & MAP_ANONYMOUS)) {
528         struct stat sb;
529 
530         if (fstat(fd, &sb) == -1) {
531             goto fail;
532         }
533 
534         /* Are we trying to create a map beyond EOF?.  */
535         if (offset + len > sb.st_size) {
536             /*
537              * If so, truncate the file map at eof aligned with
538              * the hosts real pagesize. Additional anonymous maps
539              * will be created beyond EOF.
540              */
541             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
542         }
543     }
544 
545     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
546         uintptr_t host_start;
547         int host_prot;
548         void *p;
549 
550         host_len = len + offset - host_offset;
551         host_len = HOST_PAGE_ALIGN(host_len);
552         host_prot = target_to_host_prot(target_prot);
553 
554         /*
555          * Note: we prefer to control the mapping address. It is
556          * especially important if qemu_host_page_size >
557          * qemu_real_host_page_size.
558          */
559         p = mmap(g2h_untagged(start), host_len, host_prot,
560                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
561         if (p == MAP_FAILED) {
562             goto fail;
563         }
564         /* update start so that it points to the file position at 'offset' */
565         host_start = (uintptr_t)p;
566         if (!(flags & MAP_ANONYMOUS)) {
567             p = mmap(g2h_untagged(start), len, host_prot,
568                      flags | MAP_FIXED, fd, host_offset);
569             if (p == MAP_FAILED) {
570                 munmap(g2h_untagged(start), host_len);
571                 goto fail;
572             }
573             host_start += offset - host_offset;
574         }
575         start = h2g(host_start);
576         last = start + len - 1;
577         passthrough_start = start;
578         passthrough_last = last;
579     } else {
580         if (start & ~TARGET_PAGE_MASK) {
581             errno = EINVAL;
582             goto fail;
583         }
584         last = start + len - 1;
585         real_last = HOST_PAGE_ALIGN(last) - 1;
586 
587         /*
588          * Test if requested memory area fits target address space
589          * It can fail only on 64-bit host with 32-bit target.
590          * On any other target/host host mmap() handles this error correctly.
591          */
592         if (last < start || !guest_range_valid_untagged(start, len)) {
593             errno = ENOMEM;
594             goto fail;
595         }
596 
597         if (flags & MAP_FIXED_NOREPLACE) {
598             /* Validate that the chosen range is empty. */
599             if (!page_check_range_empty(start, last)) {
600                 errno = EEXIST;
601                 goto fail;
602             }
603 
604             /*
605              * With reserved_va, the entire address space is mmaped in the
606              * host to ensure it isn't accidentally used for something else.
607              * We have just checked that the guest address is not mapped
608              * within the guest, but need to replace the host reservation.
609              *
610              * Without reserved_va, despite the guest address check above,
611              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
612              * any host address mappings.
613              */
614             if (reserved_va) {
615                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
616             }
617         }
618 
619         /*
620          * worst case: we cannot map the file because the offset is not
621          * aligned, so we read it
622          */
623         if (!(flags & MAP_ANONYMOUS) &&
624             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
625             /*
626              * msync() won't work here, so we return an error if write is
627              * possible while it is a shared mapping
628              */
629             if ((flags & MAP_TYPE) == MAP_SHARED
630                 && (target_prot & PROT_WRITE)) {
631                 errno = EINVAL;
632                 goto fail;
633             }
634             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
635                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
636                                   | MAP_PRIVATE | MAP_ANONYMOUS,
637                                   -1, 0);
638             if (retaddr == -1) {
639                 goto fail;
640             }
641             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
642                 goto fail;
643             }
644             if (!(target_prot & PROT_WRITE)) {
645                 ret = target_mprotect(start, len, target_prot);
646                 assert(ret == 0);
647             }
648             goto the_end;
649         }
650 
651         /* handle the start of the mapping */
652         if (start > real_start) {
653             if (real_last == real_start + qemu_host_page_size - 1) {
654                 /* one single host page */
655                 if (!mmap_frag(real_start, start, last,
656                                target_prot, flags, fd, offset)) {
657                     goto fail;
658                 }
659                 goto the_end1;
660             }
661             if (!mmap_frag(real_start, start,
662                            real_start + qemu_host_page_size - 1,
663                            target_prot, flags, fd, offset)) {
664                 goto fail;
665             }
666             real_start += qemu_host_page_size;
667         }
668         /* handle the end of the mapping */
669         if (last < real_last) {
670             abi_ulong real_page = real_last - qemu_host_page_size + 1;
671             if (!mmap_frag(real_page, real_page, last,
672                            target_prot, flags, fd,
673                            offset + real_page - start)) {
674                 goto fail;
675             }
676             real_last -= qemu_host_page_size;
677         }
678 
679         /* map the middle (easier) */
680         if (real_start < real_last) {
681             void *p, *want_p;
682             off_t offset1;
683             size_t len1;
684 
685             if (flags & MAP_ANONYMOUS) {
686                 offset1 = 0;
687             } else {
688                 offset1 = offset + real_start - start;
689             }
690             len1 = real_last - real_start + 1;
691             want_p = g2h_untagged(real_start);
692 
693             p = mmap(want_p, len1, target_to_host_prot(target_prot),
694                      flags, fd, offset1);
695             if (p != want_p) {
696                 if (p != MAP_FAILED) {
697                     munmap(p, len1);
698                     errno = EEXIST;
699                 }
700                 goto fail;
701             }
702             passthrough_start = real_start;
703             passthrough_last = real_last;
704         }
705     }
706  the_end1:
707     if (flags & MAP_ANONYMOUS) {
708         page_flags |= PAGE_ANON;
709     }
710     page_flags |= PAGE_RESET;
711     if (passthrough_start > passthrough_last) {
712         page_set_flags(start, last, page_flags);
713     } else {
714         if (start < passthrough_start) {
715             page_set_flags(start, passthrough_start - 1, page_flags);
716         }
717         page_set_flags(passthrough_start, passthrough_last,
718                        page_flags | PAGE_PASSTHROUGH);
719         if (passthrough_last < last) {
720             page_set_flags(passthrough_last + 1, last, page_flags);
721         }
722     }
723  the_end:
724     trace_target_mmap_complete(start);
725     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
726         FILE *f = qemu_log_trylock();
727         if (f) {
728             fprintf(f, "page layout changed following mmap\n");
729             page_dump(f);
730             qemu_log_unlock(f);
731         }
732     }
733     mmap_unlock();
734     return start;
735 fail:
736     mmap_unlock();
737     return -1;
738 }
739 
740 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
741 {
742     abi_ulong real_start;
743     abi_ulong real_last;
744     abi_ulong real_len;
745     abi_ulong last;
746     abi_ulong a;
747     void *host_start;
748     int prot;
749 
750     last = start + len - 1;
751     real_start = start & qemu_host_page_mask;
752     real_last = HOST_PAGE_ALIGN(last) - 1;
753 
754     /*
755      * If guest pages remain on the first or last host pages,
756      * adjust the deallocation to retain those guest pages.
757      * The single page special case is required for the last page,
758      * lest real_start overflow to zero.
759      */
760     if (real_last - real_start < qemu_host_page_size) {
761         prot = 0;
762         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
763             prot |= page_get_flags(a);
764         }
765         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
766             prot |= page_get_flags(a + 1);
767         }
768         if (prot != 0) {
769             return;
770         }
771     } else {
772         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
773             prot |= page_get_flags(a);
774         }
775         if (prot != 0) {
776             real_start += qemu_host_page_size;
777         }
778 
779         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
780             prot |= page_get_flags(a + 1);
781         }
782         if (prot != 0) {
783             real_last -= qemu_host_page_size;
784         }
785 
786         if (real_last < real_start) {
787             return;
788         }
789     }
790 
791     real_len = real_last - real_start + 1;
792     host_start = g2h_untagged(real_start);
793 
794     if (reserved_va) {
795         void *ptr = mmap(host_start, real_len, PROT_NONE,
796                          MAP_FIXED | MAP_ANONYMOUS
797                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
798         assert(ptr == host_start);
799     } else {
800         int ret = munmap(host_start, real_len);
801         assert(ret == 0);
802     }
803 }
804 
805 int target_munmap(abi_ulong start, abi_ulong len)
806 {
807     trace_target_munmap(start, len);
808 
809     if (start & ~TARGET_PAGE_MASK) {
810         return -TARGET_EINVAL;
811     }
812     len = TARGET_PAGE_ALIGN(len);
813     if (len == 0 || !guest_range_valid_untagged(start, len)) {
814         return -TARGET_EINVAL;
815     }
816 
817     mmap_lock();
818     mmap_reserve_or_unmap(start, len);
819     page_set_flags(start, start + len - 1, 0);
820     mmap_unlock();
821 
822     return 0;
823 }
824 
825 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
826                        abi_ulong new_size, unsigned long flags,
827                        abi_ulong new_addr)
828 {
829     int prot;
830     void *host_addr;
831 
832     if (!guest_range_valid_untagged(old_addr, old_size) ||
833         ((flags & MREMAP_FIXED) &&
834          !guest_range_valid_untagged(new_addr, new_size)) ||
835         ((flags & MREMAP_MAYMOVE) == 0 &&
836          !guest_range_valid_untagged(old_addr, new_size))) {
837         errno = ENOMEM;
838         return -1;
839     }
840 
841     mmap_lock();
842 
843     if (flags & MREMAP_FIXED) {
844         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
845                            flags, g2h_untagged(new_addr));
846 
847         if (reserved_va && host_addr != MAP_FAILED) {
848             /*
849              * If new and old addresses overlap then the above mremap will
850              * already have failed with EINVAL.
851              */
852             mmap_reserve_or_unmap(old_addr, old_size);
853         }
854     } else if (flags & MREMAP_MAYMOVE) {
855         abi_ulong mmap_start;
856 
857         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
858 
859         if (mmap_start == -1) {
860             errno = ENOMEM;
861             host_addr = MAP_FAILED;
862         } else {
863             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
864                                flags | MREMAP_FIXED,
865                                g2h_untagged(mmap_start));
866             if (reserved_va) {
867                 mmap_reserve_or_unmap(old_addr, old_size);
868             }
869         }
870     } else {
871         int prot = 0;
872         if (reserved_va && old_size < new_size) {
873             abi_ulong addr;
874             for (addr = old_addr + old_size;
875                  addr < old_addr + new_size;
876                  addr++) {
877                 prot |= page_get_flags(addr);
878             }
879         }
880         if (prot == 0) {
881             host_addr = mremap(g2h_untagged(old_addr),
882                                old_size, new_size, flags);
883 
884             if (host_addr != MAP_FAILED) {
885                 /* Check if address fits target address space */
886                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
887                     /* Revert mremap() changes */
888                     host_addr = mremap(g2h_untagged(old_addr),
889                                        new_size, old_size, flags);
890                     errno = ENOMEM;
891                     host_addr = MAP_FAILED;
892                 } else if (reserved_va && old_size > new_size) {
893                     mmap_reserve_or_unmap(old_addr + old_size,
894                                           old_size - new_size);
895                 }
896             }
897         } else {
898             errno = ENOMEM;
899             host_addr = MAP_FAILED;
900         }
901     }
902 
903     if (host_addr == MAP_FAILED) {
904         new_addr = -1;
905     } else {
906         new_addr = h2g(host_addr);
907         prot = page_get_flags(old_addr);
908         page_set_flags(old_addr, old_addr + old_size - 1, 0);
909         page_set_flags(new_addr, new_addr + new_size - 1,
910                        prot | PAGE_VALID | PAGE_RESET);
911     }
912     mmap_unlock();
913     return new_addr;
914 }
915 
916 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
917 {
918     abi_ulong len;
919     int ret = 0;
920 
921     if (start & ~TARGET_PAGE_MASK) {
922         return -TARGET_EINVAL;
923     }
924     if (len_in == 0) {
925         return 0;
926     }
927     len = TARGET_PAGE_ALIGN(len_in);
928     if (len == 0 || !guest_range_valid_untagged(start, len)) {
929         return -TARGET_EINVAL;
930     }
931 
932     /* Translate for some architectures which have different MADV_xxx values */
933     switch (advice) {
934     case TARGET_MADV_DONTNEED:      /* alpha */
935         advice = MADV_DONTNEED;
936         break;
937     case TARGET_MADV_WIPEONFORK:    /* parisc */
938         advice = MADV_WIPEONFORK;
939         break;
940     case TARGET_MADV_KEEPONFORK:    /* parisc */
941         advice = MADV_KEEPONFORK;
942         break;
943     /* we do not care about the other MADV_xxx values yet */
944     }
945 
946     /*
947      * Most advice values are hints, so ignoring and returning success is ok.
948      *
949      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
950      * MADV_KEEPONFORK are not hints and need to be emulated.
951      *
952      * A straight passthrough for those may not be safe because qemu sometimes
953      * turns private file-backed mappings into anonymous mappings.
954      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
955      * same semantics for the host as for the guest.
956      *
957      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
958      * return failure if not.
959      *
960      * MADV_DONTNEED is passed through as well, if possible.
961      * If passthrough isn't possible, we nevertheless (wrongly!) return
962      * success, which is broken but some userspace programs fail to work
963      * otherwise. Completely implementing such emulation is quite complicated
964      * though.
965      */
966     mmap_lock();
967     switch (advice) {
968     case MADV_WIPEONFORK:
969     case MADV_KEEPONFORK:
970         ret = -EINVAL;
971         /* fall through */
972     case MADV_DONTNEED:
973         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
974             ret = get_errno(madvise(g2h_untagged(start), len, advice));
975             if ((advice == MADV_DONTNEED) && (ret == 0)) {
976                 page_reset_target_data(start, start + len - 1);
977             }
978         }
979     }
980     mmap_unlock();
981 
982     return ret;
983 }
984