xref: /openbmc/qemu/linux-user/mmap.c (revision d9ab1f1f)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     assert(mmap_lock_count > 0);
40     if (--mmap_lock_count == 0) {
41         pthread_mutex_unlock(&mmap_mutex);
42     }
43 }
44 
45 bool have_mmap_lock(void)
46 {
47     return mmap_lock_count > 0 ? true : false;
48 }
49 
50 /* Grab lock to make sure things are in a consistent state after fork().  */
51 void mmap_fork_start(void)
52 {
53     if (mmap_lock_count)
54         abort();
55     pthread_mutex_lock(&mmap_mutex);
56 }
57 
58 void mmap_fork_end(int child)
59 {
60     if (child) {
61         pthread_mutex_init(&mmap_mutex, NULL);
62     } else {
63         pthread_mutex_unlock(&mmap_mutex);
64     }
65 }
66 
67 /*
68  * Validate target prot bitmask.
69  * Return the prot bitmask for the host in *HOST_PROT.
70  * Return 0 if the target prot bitmask is invalid, otherwise
71  * the internal qemu page_flags (which will include PAGE_VALID).
72  */
73 static int validate_prot_to_pageflags(int prot)
74 {
75     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
76     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77 
78 #ifdef TARGET_AARCH64
79     {
80         ARMCPU *cpu = ARM_CPU(thread_cpu);
81 
82         /*
83          * The PROT_BTI bit is only accepted if the cpu supports the feature.
84          * Since this is the unusual case, don't bother checking unless
85          * the bit has been requested.  If set and valid, record the bit
86          * within QEMU's page_flags.
87          */
88         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
89             valid |= TARGET_PROT_BTI;
90             page_flags |= PAGE_BTI;
91         }
92         /* Similarly for the PROT_MTE bit. */
93         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
94             valid |= TARGET_PROT_MTE;
95             page_flags |= PAGE_MTE;
96         }
97     }
98 #elif defined(TARGET_HPPA)
99     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
100 #endif
101 
102     return prot & ~valid ? 0 : page_flags;
103 }
104 
105 /*
106  * For the host, we need not pass anything except read/write/exec.
107  * While PROT_SEM is allowed by all hosts, it is also ignored, so
108  * don't bother transforming guest bit to host bit.  Any other
109  * target-specific prot bits will not be understood by the host
110  * and will need to be encoded into page_flags for qemu emulation.
111  *
112  * Pages that are executable by the guest will never be executed
113  * by the host, but the host will need to be able to read them.
114  */
115 static int target_to_host_prot(int prot)
116 {
117     return (prot & (PROT_READ | PROT_WRITE)) |
118            (prot & PROT_EXEC ? PROT_READ : 0);
119 }
120 
121 /* NOTE: all the constants are the HOST ones, but addresses are target. */
122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
123 {
124     abi_ulong starts[3];
125     abi_ulong lens[3];
126     int prots[3];
127     abi_ulong host_start, host_last, last;
128     int prot1, ret, page_flags, nranges;
129 
130     trace_target_mprotect(start, len, target_prot);
131 
132     if ((start & ~TARGET_PAGE_MASK) != 0) {
133         return -TARGET_EINVAL;
134     }
135     page_flags = validate_prot_to_pageflags(target_prot);
136     if (!page_flags) {
137         return -TARGET_EINVAL;
138     }
139     if (len == 0) {
140         return 0;
141     }
142     len = TARGET_PAGE_ALIGN(len);
143     if (!guest_range_valid_untagged(start, len)) {
144         return -TARGET_ENOMEM;
145     }
146 
147     last = start + len - 1;
148     host_start = start & qemu_host_page_mask;
149     host_last = HOST_PAGE_ALIGN(last) - 1;
150     nranges = 0;
151 
152     mmap_lock();
153 
154     if (host_last - host_start < qemu_host_page_size) {
155         /* Single host page contains all guest pages: sum the prot. */
156         prot1 = target_prot;
157         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
158             prot1 |= page_get_flags(a);
159         }
160         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
161             prot1 |= page_get_flags(a + 1);
162         }
163         starts[nranges] = host_start;
164         lens[nranges] = qemu_host_page_size;
165         prots[nranges] = prot1;
166         nranges++;
167     } else {
168         if (host_start < start) {
169             /* Host page contains more than one guest page: sum the prot. */
170             prot1 = target_prot;
171             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
172                 prot1 |= page_get_flags(a);
173             }
174             /* If the resulting sum differs, create a new range. */
175             if (prot1 != target_prot) {
176                 starts[nranges] = host_start;
177                 lens[nranges] = qemu_host_page_size;
178                 prots[nranges] = prot1;
179                 nranges++;
180                 host_start += qemu_host_page_size;
181             }
182         }
183 
184         if (last < host_last) {
185             /* Host page contains more than one guest page: sum the prot. */
186             prot1 = target_prot;
187             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
188                 prot1 |= page_get_flags(a + 1);
189             }
190             /* If the resulting sum differs, create a new range. */
191             if (prot1 != target_prot) {
192                 host_last -= qemu_host_page_size;
193                 starts[nranges] = host_last + 1;
194                 lens[nranges] = qemu_host_page_size;
195                 prots[nranges] = prot1;
196                 nranges++;
197             }
198         }
199 
200         /* Create a range for the middle, if any remains. */
201         if (host_start < host_last) {
202             starts[nranges] = host_start;
203             lens[nranges] = host_last - host_start + 1;
204             prots[nranges] = target_prot;
205             nranges++;
206         }
207     }
208 
209     for (int i = 0; i < nranges; ++i) {
210         ret = mprotect(g2h_untagged(starts[i]), lens[i],
211                        target_to_host_prot(prots[i]));
212         if (ret != 0) {
213             goto error;
214         }
215     }
216 
217     page_set_flags(start, last, page_flags);
218     ret = 0;
219 
220  error:
221     mmap_unlock();
222     return ret;
223 }
224 
225 /* map an incomplete host page */
226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
227                       int prot, int flags, int fd, off_t offset)
228 {
229     abi_ulong real_last;
230     void *host_start;
231     int prot_old, prot_new;
232     int host_prot_old, host_prot_new;
233 
234     if (!(flags & MAP_ANONYMOUS)
235         && (flags & MAP_TYPE) == MAP_SHARED
236         && (prot & PROT_WRITE)) {
237         /*
238          * msync() won't work with the partial page, so we return an
239          * error if write is possible while it is a shared mapping.
240          */
241         errno = EINVAL;
242         return false;
243     }
244 
245     real_last = real_start + qemu_host_page_size - 1;
246     host_start = g2h_untagged(real_start);
247 
248     /* Get the protection of the target pages outside the mapping. */
249     prot_old = 0;
250     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
251         prot_old |= page_get_flags(a);
252     }
253     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
254         prot_old |= page_get_flags(a);
255     }
256 
257     if (prot_old == 0) {
258         /*
259          * Since !(prot_old & PAGE_VALID), there were no guest pages
260          * outside of the fragment we need to map.  Allocate a new host
261          * page to cover, discarding whatever else may have been present.
262          */
263         void *p = mmap(host_start, qemu_host_page_size,
264                        target_to_host_prot(prot),
265                        flags | MAP_ANONYMOUS, -1, 0);
266         if (p == MAP_FAILED) {
267             return false;
268         }
269         prot_old = prot;
270     }
271     prot_new = prot | prot_old;
272 
273     host_prot_old = target_to_host_prot(prot_old);
274     host_prot_new = target_to_host_prot(prot_new);
275 
276     /* Adjust protection to be able to write. */
277     if (!(host_prot_old & PROT_WRITE)) {
278         host_prot_old |= PROT_WRITE;
279         mprotect(host_start, qemu_host_page_size, host_prot_old);
280     }
281 
282     /* Read or zero the new guest pages. */
283     if (flags & MAP_ANONYMOUS) {
284         memset(g2h_untagged(start), 0, last - start + 1);
285     } else {
286         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
287             return false;
288         }
289     }
290 
291     /* Put final protection */
292     if (host_prot_new != host_prot_old) {
293         mprotect(host_start, qemu_host_page_size, host_prot_new);
294     }
295     return true;
296 }
297 
298 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
299 #ifdef TARGET_AARCH64
300 # define TASK_UNMAPPED_BASE  0x5500000000
301 #else
302 # define TASK_UNMAPPED_BASE  (1ul << 38)
303 #endif
304 #else
305 #ifdef TARGET_HPPA
306 # define TASK_UNMAPPED_BASE  0xfa000000
307 #else
308 # define TASK_UNMAPPED_BASE  0x40000000
309 #endif
310 #endif
311 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
312 
313 unsigned long last_brk;
314 
315 /*
316  * Subroutine of mmap_find_vma, used when we have pre-allocated
317  * a chunk of guest address space.
318  */
319 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
320                                         abi_ulong align)
321 {
322     target_ulong ret;
323 
324     ret = page_find_range_empty(start, reserved_va, size, align);
325     if (ret == -1 && start > mmap_min_addr) {
326         /* Restart at the beginning of the address space. */
327         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
328     }
329 
330     return ret;
331 }
332 
333 /*
334  * Find and reserve a free memory area of size 'size'. The search
335  * starts at 'start'.
336  * It must be called with mmap_lock() held.
337  * Return -1 if error.
338  */
339 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
340 {
341     void *ptr, *prev;
342     abi_ulong addr;
343     int wrapped, repeat;
344 
345     align = MAX(align, qemu_host_page_size);
346 
347     /* If 'start' == 0, then a default start address is used. */
348     if (start == 0) {
349         start = mmap_next_start;
350     } else {
351         start &= qemu_host_page_mask;
352     }
353     start = ROUND_UP(start, align);
354 
355     size = HOST_PAGE_ALIGN(size);
356 
357     if (reserved_va) {
358         return mmap_find_vma_reserved(start, size, align);
359     }
360 
361     addr = start;
362     wrapped = repeat = 0;
363     prev = 0;
364 
365     for (;; prev = ptr) {
366         /*
367          * Reserve needed memory area to avoid a race.
368          * It should be discarded using:
369          *  - mmap() with MAP_FIXED flag
370          *  - mremap() with MREMAP_FIXED flag
371          *  - shmat() with SHM_REMAP flag
372          */
373         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
374                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
375 
376         /* ENOMEM, if host address space has no memory */
377         if (ptr == MAP_FAILED) {
378             return (abi_ulong)-1;
379         }
380 
381         /*
382          * Count the number of sequential returns of the same address.
383          * This is used to modify the search algorithm below.
384          */
385         repeat = (ptr == prev ? repeat + 1 : 0);
386 
387         if (h2g_valid(ptr + size - 1)) {
388             addr = h2g(ptr);
389 
390             if ((addr & (align - 1)) == 0) {
391                 /* Success.  */
392                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
393                     mmap_next_start = addr + size;
394                 }
395                 return addr;
396             }
397 
398             /* The address is not properly aligned for the target.  */
399             switch (repeat) {
400             case 0:
401                 /*
402                  * Assume the result that the kernel gave us is the
403                  * first with enough free space, so start again at the
404                  * next higher target page.
405                  */
406                 addr = ROUND_UP(addr, align);
407                 break;
408             case 1:
409                 /*
410                  * Sometimes the kernel decides to perform the allocation
411                  * at the top end of memory instead.
412                  */
413                 addr &= -align;
414                 break;
415             case 2:
416                 /* Start over at low memory.  */
417                 addr = 0;
418                 break;
419             default:
420                 /* Fail.  This unaligned block must the last.  */
421                 addr = -1;
422                 break;
423             }
424         } else {
425             /*
426              * Since the result the kernel gave didn't fit, start
427              * again at low memory.  If any repetition, fail.
428              */
429             addr = (repeat ? -1 : 0);
430         }
431 
432         /* Unmap and try again.  */
433         munmap(ptr, size);
434 
435         /* ENOMEM if we checked the whole of the target address space.  */
436         if (addr == (abi_ulong)-1) {
437             return (abi_ulong)-1;
438         } else if (addr == 0) {
439             if (wrapped) {
440                 return (abi_ulong)-1;
441             }
442             wrapped = 1;
443             /*
444              * Don't actually use 0 when wrapping, instead indicate
445              * that we'd truly like an allocation in low memory.
446              */
447             addr = (mmap_min_addr > TARGET_PAGE_SIZE
448                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
449                      : TARGET_PAGE_SIZE);
450         } else if (wrapped && addr >= start) {
451             return (abi_ulong)-1;
452         }
453     }
454 }
455 
456 /* NOTE: all the constants are the HOST ones */
457 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
458                      int flags, int fd, off_t offset)
459 {
460     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
461     abi_ulong passthrough_start = -1, passthrough_last = 0;
462     int page_flags;
463     off_t host_offset;
464 
465     mmap_lock();
466     trace_target_mmap(start, len, target_prot, flags, fd, offset);
467 
468     if (!len) {
469         errno = EINVAL;
470         goto fail;
471     }
472 
473     page_flags = validate_prot_to_pageflags(target_prot);
474     if (!page_flags) {
475         errno = EINVAL;
476         goto fail;
477     }
478 
479     /* Also check for overflows... */
480     len = TARGET_PAGE_ALIGN(len);
481     if (!len) {
482         errno = ENOMEM;
483         goto fail;
484     }
485 
486     if (offset & ~TARGET_PAGE_MASK) {
487         errno = EINVAL;
488         goto fail;
489     }
490 
491     /*
492      * If we're mapping shared memory, ensure we generate code for parallel
493      * execution and flush old translations.  This will work up to the level
494      * supported by the host -- anything that requires EXCP_ATOMIC will not
495      * be atomic with respect to an external process.
496      */
497     if (flags & MAP_SHARED) {
498         CPUState *cpu = thread_cpu;
499         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
500             cpu->tcg_cflags |= CF_PARALLEL;
501             tb_flush(cpu);
502         }
503     }
504 
505     real_start = start & qemu_host_page_mask;
506     host_offset = offset & qemu_host_page_mask;
507 
508     /*
509      * If the user is asking for the kernel to find a location, do that
510      * before we truncate the length for mapping files below.
511      */
512     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
513         host_len = len + offset - host_offset;
514         host_len = HOST_PAGE_ALIGN(host_len);
515         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
516         if (start == (abi_ulong)-1) {
517             errno = ENOMEM;
518             goto fail;
519         }
520     }
521 
522     /*
523      * When mapping files into a memory area larger than the file, accesses
524      * to pages beyond the file size will cause a SIGBUS.
525      *
526      * For example, if mmaping a file of 100 bytes on a host with 4K pages
527      * emulating a target with 8K pages, the target expects to be able to
528      * access the first 8K. But the host will trap us on any access beyond
529      * 4K.
530      *
531      * When emulating a target with a larger page-size than the hosts, we
532      * may need to truncate file maps at EOF and add extra anonymous pages
533      * up to the targets page boundary.
534      */
535     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
536         !(flags & MAP_ANONYMOUS)) {
537         struct stat sb;
538 
539         if (fstat(fd, &sb) == -1) {
540             goto fail;
541         }
542 
543         /* Are we trying to create a map beyond EOF?.  */
544         if (offset + len > sb.st_size) {
545             /*
546              * If so, truncate the file map at eof aligned with
547              * the hosts real pagesize. Additional anonymous maps
548              * will be created beyond EOF.
549              */
550             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
551         }
552     }
553 
554     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
555         uintptr_t host_start;
556         int host_prot;
557         void *p;
558 
559         host_len = len + offset - host_offset;
560         host_len = HOST_PAGE_ALIGN(host_len);
561         host_prot = target_to_host_prot(target_prot);
562 
563         /*
564          * Note: we prefer to control the mapping address. It is
565          * especially important if qemu_host_page_size >
566          * qemu_real_host_page_size.
567          */
568         p = mmap(g2h_untagged(start), host_len, host_prot,
569                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
570         if (p == MAP_FAILED) {
571             goto fail;
572         }
573         /* update start so that it points to the file position at 'offset' */
574         host_start = (uintptr_t)p;
575         if (!(flags & MAP_ANONYMOUS)) {
576             p = mmap(g2h_untagged(start), len, host_prot,
577                      flags | MAP_FIXED, fd, host_offset);
578             if (p == MAP_FAILED) {
579                 munmap(g2h_untagged(start), host_len);
580                 goto fail;
581             }
582             host_start += offset - host_offset;
583         }
584         start = h2g(host_start);
585         last = start + len - 1;
586         passthrough_start = start;
587         passthrough_last = last;
588     } else {
589         if (start & ~TARGET_PAGE_MASK) {
590             errno = EINVAL;
591             goto fail;
592         }
593         last = start + len - 1;
594         real_last = HOST_PAGE_ALIGN(last) - 1;
595 
596         /*
597          * Test if requested memory area fits target address space
598          * It can fail only on 64-bit host with 32-bit target.
599          * On any other target/host host mmap() handles this error correctly.
600          */
601         if (last < start || !guest_range_valid_untagged(start, len)) {
602             errno = ENOMEM;
603             goto fail;
604         }
605 
606         /* Validate that the chosen range is empty. */
607         if ((flags & MAP_FIXED_NOREPLACE)
608             && !page_check_range_empty(start, last)) {
609             errno = EEXIST;
610             goto fail;
611         }
612 
613         /*
614          * worst case: we cannot map the file because the offset is not
615          * aligned, so we read it
616          */
617         if (!(flags & MAP_ANONYMOUS) &&
618             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
619             /*
620              * msync() won't work here, so we return an error if write is
621              * possible while it is a shared mapping
622              */
623             if ((flags & MAP_TYPE) == MAP_SHARED
624                 && (target_prot & PROT_WRITE)) {
625                 errno = EINVAL;
626                 goto fail;
627             }
628             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
629                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
630                                   | MAP_PRIVATE | MAP_ANONYMOUS,
631                                   -1, 0);
632             if (retaddr == -1) {
633                 goto fail;
634             }
635             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
636                 goto fail;
637             }
638             if (!(target_prot & PROT_WRITE)) {
639                 ret = target_mprotect(start, len, target_prot);
640                 assert(ret == 0);
641             }
642             goto the_end;
643         }
644 
645         /* handle the start of the mapping */
646         if (start > real_start) {
647             if (real_last == real_start + qemu_host_page_size - 1) {
648                 /* one single host page */
649                 if (!mmap_frag(real_start, start, last,
650                                target_prot, flags, fd, offset)) {
651                     goto fail;
652                 }
653                 goto the_end1;
654             }
655             if (!mmap_frag(real_start, start,
656                            real_start + qemu_host_page_size - 1,
657                            target_prot, flags, fd, offset)) {
658                 goto fail;
659             }
660             real_start += qemu_host_page_size;
661         }
662         /* handle the end of the mapping */
663         if (last < real_last) {
664             abi_ulong real_page = real_last - qemu_host_page_size + 1;
665             if (!mmap_frag(real_page, real_page, last,
666                            target_prot, flags, fd,
667                            offset + real_page - start)) {
668                 goto fail;
669             }
670             real_last -= qemu_host_page_size;
671         }
672 
673         /* map the middle (easier) */
674         if (real_start < real_last) {
675             void *p;
676             off_t offset1;
677 
678             if (flags & MAP_ANONYMOUS) {
679                 offset1 = 0;
680             } else {
681                 offset1 = offset + real_start - start;
682             }
683             p = mmap(g2h_untagged(real_start), real_last - real_start + 1,
684                      target_to_host_prot(target_prot), flags, fd, offset1);
685             if (p == MAP_FAILED) {
686                 goto fail;
687             }
688             passthrough_start = real_start;
689             passthrough_last = real_last;
690         }
691     }
692  the_end1:
693     if (flags & MAP_ANONYMOUS) {
694         page_flags |= PAGE_ANON;
695     }
696     page_flags |= PAGE_RESET;
697     if (passthrough_start > passthrough_last) {
698         page_set_flags(start, last, page_flags);
699     } else {
700         if (start < passthrough_start) {
701             page_set_flags(start, passthrough_start - 1, page_flags);
702         }
703         page_set_flags(passthrough_start, passthrough_last,
704                        page_flags | PAGE_PASSTHROUGH);
705         if (passthrough_last < last) {
706             page_set_flags(passthrough_last + 1, last, page_flags);
707         }
708     }
709  the_end:
710     trace_target_mmap_complete(start);
711     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
712         FILE *f = qemu_log_trylock();
713         if (f) {
714             fprintf(f, "page layout changed following mmap\n");
715             page_dump(f);
716             qemu_log_unlock(f);
717         }
718     }
719     mmap_unlock();
720     return start;
721 fail:
722     mmap_unlock();
723     return -1;
724 }
725 
726 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
727 {
728     abi_ulong real_start;
729     abi_ulong real_last;
730     abi_ulong real_len;
731     abi_ulong last;
732     abi_ulong a;
733     void *host_start;
734     int prot;
735 
736     last = start + len - 1;
737     real_start = start & qemu_host_page_mask;
738     real_last = HOST_PAGE_ALIGN(last) - 1;
739 
740     /*
741      * If guest pages remain on the first or last host pages,
742      * adjust the deallocation to retain those guest pages.
743      * The single page special case is required for the last page,
744      * lest real_start overflow to zero.
745      */
746     if (real_last - real_start < qemu_host_page_size) {
747         prot = 0;
748         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
749             prot |= page_get_flags(a);
750         }
751         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
752             prot |= page_get_flags(a + 1);
753         }
754         if (prot != 0) {
755             return;
756         }
757     } else {
758         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
759             prot |= page_get_flags(a);
760         }
761         if (prot != 0) {
762             real_start += qemu_host_page_size;
763         }
764 
765         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
766             prot |= page_get_flags(a + 1);
767         }
768         if (prot != 0) {
769             real_last -= qemu_host_page_size;
770         }
771 
772         if (real_last < real_start) {
773             return;
774         }
775     }
776 
777     real_len = real_last - real_start + 1;
778     host_start = g2h_untagged(real_start);
779 
780     if (reserved_va) {
781         void *ptr = mmap(host_start, real_len, PROT_NONE,
782                          MAP_FIXED | MAP_ANONYMOUS
783                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
784         assert(ptr == host_start);
785     } else {
786         int ret = munmap(host_start, real_len);
787         assert(ret == 0);
788     }
789 }
790 
791 int target_munmap(abi_ulong start, abi_ulong len)
792 {
793     trace_target_munmap(start, len);
794 
795     if (start & ~TARGET_PAGE_MASK) {
796         return -TARGET_EINVAL;
797     }
798     len = TARGET_PAGE_ALIGN(len);
799     if (len == 0 || !guest_range_valid_untagged(start, len)) {
800         return -TARGET_EINVAL;
801     }
802 
803     mmap_lock();
804     mmap_reserve_or_unmap(start, len);
805     page_set_flags(start, start + len - 1, 0);
806     mmap_unlock();
807 
808     return 0;
809 }
810 
811 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
812                        abi_ulong new_size, unsigned long flags,
813                        abi_ulong new_addr)
814 {
815     int prot;
816     void *host_addr;
817 
818     if (!guest_range_valid_untagged(old_addr, old_size) ||
819         ((flags & MREMAP_FIXED) &&
820          !guest_range_valid_untagged(new_addr, new_size)) ||
821         ((flags & MREMAP_MAYMOVE) == 0 &&
822          !guest_range_valid_untagged(old_addr, new_size))) {
823         errno = ENOMEM;
824         return -1;
825     }
826 
827     mmap_lock();
828 
829     if (flags & MREMAP_FIXED) {
830         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
831                            flags, g2h_untagged(new_addr));
832 
833         if (reserved_va && host_addr != MAP_FAILED) {
834             /*
835              * If new and old addresses overlap then the above mremap will
836              * already have failed with EINVAL.
837              */
838             mmap_reserve_or_unmap(old_addr, old_size);
839         }
840     } else if (flags & MREMAP_MAYMOVE) {
841         abi_ulong mmap_start;
842 
843         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
844 
845         if (mmap_start == -1) {
846             errno = ENOMEM;
847             host_addr = MAP_FAILED;
848         } else {
849             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
850                                flags | MREMAP_FIXED,
851                                g2h_untagged(mmap_start));
852             if (reserved_va) {
853                 mmap_reserve_or_unmap(old_addr, old_size);
854             }
855         }
856     } else {
857         int prot = 0;
858         if (reserved_va && old_size < new_size) {
859             abi_ulong addr;
860             for (addr = old_addr + old_size;
861                  addr < old_addr + new_size;
862                  addr++) {
863                 prot |= page_get_flags(addr);
864             }
865         }
866         if (prot == 0) {
867             host_addr = mremap(g2h_untagged(old_addr),
868                                old_size, new_size, flags);
869 
870             if (host_addr != MAP_FAILED) {
871                 /* Check if address fits target address space */
872                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
873                     /* Revert mremap() changes */
874                     host_addr = mremap(g2h_untagged(old_addr),
875                                        new_size, old_size, flags);
876                     errno = ENOMEM;
877                     host_addr = MAP_FAILED;
878                 } else if (reserved_va && old_size > new_size) {
879                     mmap_reserve_or_unmap(old_addr + old_size,
880                                           old_size - new_size);
881                 }
882             }
883         } else {
884             errno = ENOMEM;
885             host_addr = MAP_FAILED;
886         }
887     }
888 
889     if (host_addr == MAP_FAILED) {
890         new_addr = -1;
891     } else {
892         new_addr = h2g(host_addr);
893         prot = page_get_flags(old_addr);
894         page_set_flags(old_addr, old_addr + old_size - 1, 0);
895         page_set_flags(new_addr, new_addr + new_size - 1,
896                        prot | PAGE_VALID | PAGE_RESET);
897     }
898     mmap_unlock();
899     return new_addr;
900 }
901 
902 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
903 {
904     abi_ulong len;
905     int ret = 0;
906 
907     if (start & ~TARGET_PAGE_MASK) {
908         return -TARGET_EINVAL;
909     }
910     if (len_in == 0) {
911         return 0;
912     }
913     len = TARGET_PAGE_ALIGN(len_in);
914     if (len == 0 || !guest_range_valid_untagged(start, len)) {
915         return -TARGET_EINVAL;
916     }
917 
918     /* Translate for some architectures which have different MADV_xxx values */
919     switch (advice) {
920     case TARGET_MADV_DONTNEED:      /* alpha */
921         advice = MADV_DONTNEED;
922         break;
923     case TARGET_MADV_WIPEONFORK:    /* parisc */
924         advice = MADV_WIPEONFORK;
925         break;
926     case TARGET_MADV_KEEPONFORK:    /* parisc */
927         advice = MADV_KEEPONFORK;
928         break;
929     /* we do not care about the other MADV_xxx values yet */
930     }
931 
932     /*
933      * Most advice values are hints, so ignoring and returning success is ok.
934      *
935      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
936      * MADV_KEEPONFORK are not hints and need to be emulated.
937      *
938      * A straight passthrough for those may not be safe because qemu sometimes
939      * turns private file-backed mappings into anonymous mappings.
940      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
941      * same semantics for the host as for the guest.
942      *
943      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
944      * return failure if not.
945      *
946      * MADV_DONTNEED is passed through as well, if possible.
947      * If passthrough isn't possible, we nevertheless (wrongly!) return
948      * success, which is broken but some userspace programs fail to work
949      * otherwise. Completely implementing such emulation is quite complicated
950      * though.
951      */
952     mmap_lock();
953     switch (advice) {
954     case MADV_WIPEONFORK:
955     case MADV_KEEPONFORK:
956         ret = -EINVAL;
957         /* fall through */
958     case MADV_DONTNEED:
959         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
960             ret = get_errno(madvise(g2h_untagged(start), len, advice));
961             if ((advice == MADV_DONTNEED) && (ret == 0)) {
962                 page_reset_target_data(start, start + len - 1);
963             }
964         }
965     }
966     mmap_unlock();
967 
968     return ret;
969 }
970