xref: /openbmc/qemu/linux-user/mmap.c (revision e4fdf9df)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child)
60         pthread_mutex_init(&mmap_mutex, NULL);
61     else
62         pthread_mutex_unlock(&mmap_mutex);
63 }
64 
65 /*
66  * Validate target prot bitmask.
67  * Return the prot bitmask for the host in *HOST_PROT.
68  * Return 0 if the target prot bitmask is invalid, otherwise
69  * the internal qemu page_flags (which will include PAGE_VALID).
70  */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75 
76     /*
77      * For the host, we need not pass anything except read/write/exec.
78      * While PROT_SEM is allowed by all hosts, it is also ignored, so
79      * don't bother transforming guest bit to host bit.  Any other
80      * target-specific prot bits will not be understood by the host
81      * and will need to be encoded into page_flags for qemu emulation.
82      *
83      * Pages that are executable by the guest will never be executed
84      * by the host, but the host will need to be able to read them.
85      */
86     *host_prot = (prot & (PROT_READ | PROT_WRITE))
87                | (prot & PROT_EXEC ? PROT_READ : 0);
88 
89 #ifdef TARGET_AARCH64
90     {
91         ARMCPU *cpu = ARM_CPU(thread_cpu);
92 
93         /*
94          * The PROT_BTI bit is only accepted if the cpu supports the feature.
95          * Since this is the unusual case, don't bother checking unless
96          * the bit has been requested.  If set and valid, record the bit
97          * within QEMU's page_flags.
98          */
99         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100             valid |= TARGET_PROT_BTI;
101             page_flags |= PAGE_BTI;
102         }
103         /* Similarly for the PROT_MTE bit. */
104         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105             valid |= TARGET_PROT_MTE;
106             page_flags |= PAGE_MTE;
107         }
108     }
109 #elif defined(TARGET_HPPA)
110     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
112 
113     return prot & ~valid ? 0 : page_flags;
114 }
115 
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118 {
119     abi_ulong end, host_start, host_end, addr;
120     int prot1, ret, page_flags, host_prot;
121 
122     trace_target_mprotect(start, len, target_prot);
123 
124     if ((start & ~TARGET_PAGE_MASK) != 0) {
125         return -TARGET_EINVAL;
126     }
127     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128     if (!page_flags) {
129         return -TARGET_EINVAL;
130     }
131     len = TARGET_PAGE_ALIGN(len);
132     end = start + len;
133     if (!guest_range_valid_untagged(start, len)) {
134         return -TARGET_ENOMEM;
135     }
136     if (len == 0) {
137         return 0;
138     }
139 
140     mmap_lock();
141     host_start = start & qemu_host_page_mask;
142     host_end = HOST_PAGE_ALIGN(end);
143     if (start > host_start) {
144         /* handle host page containing start */
145         prot1 = host_prot;
146         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147             prot1 |= page_get_flags(addr);
148         }
149         if (host_end == host_start + qemu_host_page_size) {
150             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151                 prot1 |= page_get_flags(addr);
152             }
153             end = host_end;
154         }
155         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156                        prot1 & PAGE_BITS);
157         if (ret != 0) {
158             goto error;
159         }
160         host_start += qemu_host_page_size;
161     }
162     if (end < host_end) {
163         prot1 = host_prot;
164         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165             prot1 |= page_get_flags(addr);
166         }
167         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168                        qemu_host_page_size, prot1 & PAGE_BITS);
169         if (ret != 0) {
170             goto error;
171         }
172         host_end -= qemu_host_page_size;
173     }
174 
175     /* handle the pages in the middle */
176     if (host_start < host_end) {
177         ret = mprotect(g2h_untagged(host_start),
178                        host_end - host_start, host_prot);
179         if (ret != 0) {
180             goto error;
181         }
182     }
183 
184     page_set_flags(start, start + len, page_flags);
185     tb_invalidate_phys_range(start, start + len);
186     ret = 0;
187 
188 error:
189     mmap_unlock();
190     return ret;
191 }
192 
193 /* map an incomplete host page */
194 static int mmap_frag(abi_ulong real_start,
195                      abi_ulong start, abi_ulong end,
196                      int prot, int flags, int fd, abi_ulong offset)
197 {
198     abi_ulong real_end, addr;
199     void *host_start;
200     int prot1, prot_new;
201 
202     real_end = real_start + qemu_host_page_size;
203     host_start = g2h_untagged(real_start);
204 
205     /* get the protection of the target pages outside the mapping */
206     prot1 = 0;
207     for(addr = real_start; addr < real_end; addr++) {
208         if (addr < start || addr >= end)
209             prot1 |= page_get_flags(addr);
210     }
211 
212     if (prot1 == 0) {
213         /* no page was there, so we allocate one */
214         void *p = mmap(host_start, qemu_host_page_size, prot,
215                        flags | MAP_ANONYMOUS, -1, 0);
216         if (p == MAP_FAILED)
217             return -1;
218         prot1 = prot;
219     }
220     prot1 &= PAGE_BITS;
221 
222     prot_new = prot | prot1;
223     if (!(flags & MAP_ANONYMOUS)) {
224         /* msync() won't work here, so we return an error if write is
225            possible while it is a shared mapping */
226         if ((flags & MAP_TYPE) == MAP_SHARED &&
227             (prot & PROT_WRITE))
228             return -1;
229 
230         /* adjust protection to be able to read */
231         if (!(prot1 & PROT_WRITE))
232             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
233 
234         /* read the corresponding file data */
235         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
236             return -1;
237 
238         /* put final protection */
239         if (prot_new != (prot1 | PROT_WRITE))
240             mprotect(host_start, qemu_host_page_size, prot_new);
241     } else {
242         if (prot_new != prot1) {
243             mprotect(host_start, qemu_host_page_size, prot_new);
244         }
245         if (prot_new & PROT_WRITE) {
246             memset(g2h_untagged(start), 0, end - start);
247         }
248     }
249     return 0;
250 }
251 
252 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
253 #ifdef TARGET_AARCH64
254 # define TASK_UNMAPPED_BASE  0x5500000000
255 #else
256 # define TASK_UNMAPPED_BASE  (1ul << 38)
257 #endif
258 #else
259 #ifdef TARGET_HPPA
260 # define TASK_UNMAPPED_BASE  0xfa000000
261 #else
262 # define TASK_UNMAPPED_BASE  0x40000000
263 #endif
264 #endif
265 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
266 
267 unsigned long last_brk;
268 
269 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
270    of guest address space.  */
271 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
272                                         abi_ulong align)
273 {
274     abi_ulong addr, end_addr, incr = qemu_host_page_size;
275     int prot;
276     bool looped = false;
277 
278     if (size > reserved_va) {
279         return (abi_ulong)-1;
280     }
281 
282     /* Note that start and size have already been aligned by mmap_find_vma. */
283 
284     end_addr = start + size;
285     if (start > reserved_va - size) {
286         /* Start at the top of the address space.  */
287         end_addr = ((reserved_va - size) & -align) + size;
288         looped = true;
289     }
290 
291     /* Search downward from END_ADDR, checking to see if a page is in use.  */
292     addr = end_addr;
293     while (1) {
294         addr -= incr;
295         if (addr > end_addr) {
296             if (looped) {
297                 /* Failure.  The entire address space has been searched.  */
298                 return (abi_ulong)-1;
299             }
300             /* Re-start at the top of the address space.  */
301             addr = end_addr = ((reserved_va - size) & -align) + size;
302             looped = true;
303         } else {
304             prot = page_get_flags(addr);
305             if (prot) {
306                 /* Page in use.  Restart below this page.  */
307                 addr = end_addr = ((addr - size) & -align) + size;
308             } else if (addr && addr + size == end_addr) {
309                 /* Success!  All pages between ADDR and END_ADDR are free.  */
310                 if (start == mmap_next_start) {
311                     mmap_next_start = addr;
312                 }
313                 return addr;
314             }
315         }
316     }
317 }
318 
319 /*
320  * Find and reserve a free memory area of size 'size'. The search
321  * starts at 'start'.
322  * It must be called with mmap_lock() held.
323  * Return -1 if error.
324  */
325 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
326 {
327     void *ptr, *prev;
328     abi_ulong addr;
329     int wrapped, repeat;
330 
331     align = MAX(align, qemu_host_page_size);
332 
333     /* If 'start' == 0, then a default start address is used. */
334     if (start == 0) {
335         start = mmap_next_start;
336     } else {
337         start &= qemu_host_page_mask;
338     }
339     start = ROUND_UP(start, align);
340 
341     size = HOST_PAGE_ALIGN(size);
342 
343     if (reserved_va) {
344         return mmap_find_vma_reserved(start, size, align);
345     }
346 
347     addr = start;
348     wrapped = repeat = 0;
349     prev = 0;
350 
351     for (;; prev = ptr) {
352         /*
353          * Reserve needed memory area to avoid a race.
354          * It should be discarded using:
355          *  - mmap() with MAP_FIXED flag
356          *  - mremap() with MREMAP_FIXED flag
357          *  - shmat() with SHM_REMAP flag
358          */
359         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
360                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
361 
362         /* ENOMEM, if host address space has no memory */
363         if (ptr == MAP_FAILED) {
364             return (abi_ulong)-1;
365         }
366 
367         /* Count the number of sequential returns of the same address.
368            This is used to modify the search algorithm below.  */
369         repeat = (ptr == prev ? repeat + 1 : 0);
370 
371         if (h2g_valid(ptr + size - 1)) {
372             addr = h2g(ptr);
373 
374             if ((addr & (align - 1)) == 0) {
375                 /* Success.  */
376                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
377                     mmap_next_start = addr + size;
378                 }
379                 return addr;
380             }
381 
382             /* The address is not properly aligned for the target.  */
383             switch (repeat) {
384             case 0:
385                 /* Assume the result that the kernel gave us is the
386                    first with enough free space, so start again at the
387                    next higher target page.  */
388                 addr = ROUND_UP(addr, align);
389                 break;
390             case 1:
391                 /* Sometimes the kernel decides to perform the allocation
392                    at the top end of memory instead.  */
393                 addr &= -align;
394                 break;
395             case 2:
396                 /* Start over at low memory.  */
397                 addr = 0;
398                 break;
399             default:
400                 /* Fail.  This unaligned block must the last.  */
401                 addr = -1;
402                 break;
403             }
404         } else {
405             /* Since the result the kernel gave didn't fit, start
406                again at low memory.  If any repetition, fail.  */
407             addr = (repeat ? -1 : 0);
408         }
409 
410         /* Unmap and try again.  */
411         munmap(ptr, size);
412 
413         /* ENOMEM if we checked the whole of the target address space.  */
414         if (addr == (abi_ulong)-1) {
415             return (abi_ulong)-1;
416         } else if (addr == 0) {
417             if (wrapped) {
418                 return (abi_ulong)-1;
419             }
420             wrapped = 1;
421             /* Don't actually use 0 when wrapping, instead indicate
422                that we'd truly like an allocation in low memory.  */
423             addr = (mmap_min_addr > TARGET_PAGE_SIZE
424                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
425                      : TARGET_PAGE_SIZE);
426         } else if (wrapped && addr >= start) {
427             return (abi_ulong)-1;
428         }
429     }
430 }
431 
432 /* NOTE: all the constants are the HOST ones */
433 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
434                      int flags, int fd, abi_ulong offset)
435 {
436     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
437               passthrough_start = -1, passthrough_end = -1;
438     int page_flags, host_prot;
439 
440     mmap_lock();
441     trace_target_mmap(start, len, target_prot, flags, fd, offset);
442 
443     if (!len) {
444         errno = EINVAL;
445         goto fail;
446     }
447 
448     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
449     if (!page_flags) {
450         errno = EINVAL;
451         goto fail;
452     }
453 
454     /* Also check for overflows... */
455     len = TARGET_PAGE_ALIGN(len);
456     if (!len) {
457         errno = ENOMEM;
458         goto fail;
459     }
460 
461     if (offset & ~TARGET_PAGE_MASK) {
462         errno = EINVAL;
463         goto fail;
464     }
465 
466     /*
467      * If we're mapping shared memory, ensure we generate code for parallel
468      * execution and flush old translations.  This will work up to the level
469      * supported by the host -- anything that requires EXCP_ATOMIC will not
470      * be atomic with respect to an external process.
471      */
472     if (flags & MAP_SHARED) {
473         CPUState *cpu = thread_cpu;
474         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
475             cpu->tcg_cflags |= CF_PARALLEL;
476             tb_flush(cpu);
477         }
478     }
479 
480     real_start = start & qemu_host_page_mask;
481     host_offset = offset & qemu_host_page_mask;
482 
483     /* If the user is asking for the kernel to find a location, do that
484        before we truncate the length for mapping files below.  */
485     if (!(flags & MAP_FIXED)) {
486         host_len = len + offset - host_offset;
487         host_len = HOST_PAGE_ALIGN(host_len);
488         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
489         if (start == (abi_ulong)-1) {
490             errno = ENOMEM;
491             goto fail;
492         }
493     }
494 
495     /* When mapping files into a memory area larger than the file, accesses
496        to pages beyond the file size will cause a SIGBUS.
497 
498        For example, if mmaping a file of 100 bytes on a host with 4K pages
499        emulating a target with 8K pages, the target expects to be able to
500        access the first 8K. But the host will trap us on any access beyond
501        4K.
502 
503        When emulating a target with a larger page-size than the hosts, we
504        may need to truncate file maps at EOF and add extra anonymous pages
505        up to the targets page boundary.  */
506 
507     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
508         !(flags & MAP_ANONYMOUS)) {
509         struct stat sb;
510 
511        if (fstat (fd, &sb) == -1)
512            goto fail;
513 
514        /* Are we trying to create a map beyond EOF?.  */
515        if (offset + len > sb.st_size) {
516            /* If so, truncate the file map at eof aligned with
517               the hosts real pagesize. Additional anonymous maps
518               will be created beyond EOF.  */
519            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
520        }
521     }
522 
523     if (!(flags & MAP_FIXED)) {
524         unsigned long host_start;
525         void *p;
526 
527         host_len = len + offset - host_offset;
528         host_len = HOST_PAGE_ALIGN(host_len);
529 
530         /* Note: we prefer to control the mapping address. It is
531            especially important if qemu_host_page_size >
532            qemu_real_host_page_size */
533         p = mmap(g2h_untagged(start), host_len, host_prot,
534                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
535         if (p == MAP_FAILED) {
536             goto fail;
537         }
538         /* update start so that it points to the file position at 'offset' */
539         host_start = (unsigned long)p;
540         if (!(flags & MAP_ANONYMOUS)) {
541             p = mmap(g2h_untagged(start), len, host_prot,
542                      flags | MAP_FIXED, fd, host_offset);
543             if (p == MAP_FAILED) {
544                 munmap(g2h_untagged(start), host_len);
545                 goto fail;
546             }
547             host_start += offset - host_offset;
548         }
549         start = h2g(host_start);
550         passthrough_start = start;
551         passthrough_end = start + len;
552     } else {
553         if (start & ~TARGET_PAGE_MASK) {
554             errno = EINVAL;
555             goto fail;
556         }
557         end = start + len;
558         real_end = HOST_PAGE_ALIGN(end);
559 
560         /*
561          * Test if requested memory area fits target address space
562          * It can fail only on 64-bit host with 32-bit target.
563          * On any other target/host host mmap() handles this error correctly.
564          */
565         if (end < start || !guest_range_valid_untagged(start, len)) {
566             errno = ENOMEM;
567             goto fail;
568         }
569 
570         /* worst case: we cannot map the file because the offset is not
571            aligned, so we read it */
572         if (!(flags & MAP_ANONYMOUS) &&
573             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
574             /* msync() won't work here, so we return an error if write is
575                possible while it is a shared mapping */
576             if ((flags & MAP_TYPE) == MAP_SHARED &&
577                 (host_prot & PROT_WRITE)) {
578                 errno = EINVAL;
579                 goto fail;
580             }
581             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
582                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
583                                   -1, 0);
584             if (retaddr == -1)
585                 goto fail;
586             if (pread(fd, g2h_untagged(start), len, offset) == -1)
587                 goto fail;
588             if (!(host_prot & PROT_WRITE)) {
589                 ret = target_mprotect(start, len, target_prot);
590                 assert(ret == 0);
591             }
592             goto the_end;
593         }
594 
595         /* handle the start of the mapping */
596         if (start > real_start) {
597             if (real_end == real_start + qemu_host_page_size) {
598                 /* one single host page */
599                 ret = mmap_frag(real_start, start, end,
600                                 host_prot, flags, fd, offset);
601                 if (ret == -1)
602                     goto fail;
603                 goto the_end1;
604             }
605             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
606                             host_prot, flags, fd, offset);
607             if (ret == -1)
608                 goto fail;
609             real_start += qemu_host_page_size;
610         }
611         /* handle the end of the mapping */
612         if (end < real_end) {
613             ret = mmap_frag(real_end - qemu_host_page_size,
614                             real_end - qemu_host_page_size, end,
615                             host_prot, flags, fd,
616                             offset + real_end - qemu_host_page_size - start);
617             if (ret == -1)
618                 goto fail;
619             real_end -= qemu_host_page_size;
620         }
621 
622         /* map the middle (easier) */
623         if (real_start < real_end) {
624             void *p;
625             unsigned long offset1;
626             if (flags & MAP_ANONYMOUS)
627                 offset1 = 0;
628             else
629                 offset1 = offset + real_start - start;
630             p = mmap(g2h_untagged(real_start), real_end - real_start,
631                      host_prot, flags, fd, offset1);
632             if (p == MAP_FAILED)
633                 goto fail;
634             passthrough_start = real_start;
635             passthrough_end = real_end;
636         }
637     }
638  the_end1:
639     if (flags & MAP_ANONYMOUS) {
640         page_flags |= PAGE_ANON;
641     }
642     page_flags |= PAGE_RESET;
643     if (passthrough_start == passthrough_end) {
644         page_set_flags(start, start + len, page_flags);
645     } else {
646         if (start < passthrough_start) {
647             page_set_flags(start, passthrough_start, page_flags);
648         }
649         page_set_flags(passthrough_start, passthrough_end,
650                        page_flags | PAGE_PASSTHROUGH);
651         if (passthrough_end < start + len) {
652             page_set_flags(passthrough_end, start + len, page_flags);
653         }
654     }
655  the_end:
656     trace_target_mmap_complete(start);
657     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
658         FILE *f = qemu_log_trylock();
659         if (f) {
660             fprintf(f, "page layout changed following mmap\n");
661             page_dump(f);
662             qemu_log_unlock(f);
663         }
664     }
665     tb_invalidate_phys_range(start, start + len);
666     mmap_unlock();
667     return start;
668 fail:
669     mmap_unlock();
670     return -1;
671 }
672 
673 static void mmap_reserve(abi_ulong start, abi_ulong size)
674 {
675     abi_ulong real_start;
676     abi_ulong real_end;
677     abi_ulong addr;
678     abi_ulong end;
679     int prot;
680 
681     real_start = start & qemu_host_page_mask;
682     real_end = HOST_PAGE_ALIGN(start + size);
683     end = start + size;
684     if (start > real_start) {
685         /* handle host page containing start */
686         prot = 0;
687         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
688             prot |= page_get_flags(addr);
689         }
690         if (real_end == real_start + qemu_host_page_size) {
691             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
692                 prot |= page_get_flags(addr);
693             }
694             end = real_end;
695         }
696         if (prot != 0)
697             real_start += qemu_host_page_size;
698     }
699     if (end < real_end) {
700         prot = 0;
701         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
702             prot |= page_get_flags(addr);
703         }
704         if (prot != 0)
705             real_end -= qemu_host_page_size;
706     }
707     if (real_start != real_end) {
708         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
709                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
710                  -1, 0);
711     }
712 }
713 
714 int target_munmap(abi_ulong start, abi_ulong len)
715 {
716     abi_ulong end, real_start, real_end, addr;
717     int prot, ret;
718 
719     trace_target_munmap(start, len);
720 
721     if (start & ~TARGET_PAGE_MASK)
722         return -TARGET_EINVAL;
723     len = TARGET_PAGE_ALIGN(len);
724     if (len == 0 || !guest_range_valid_untagged(start, len)) {
725         return -TARGET_EINVAL;
726     }
727 
728     mmap_lock();
729     end = start + len;
730     real_start = start & qemu_host_page_mask;
731     real_end = HOST_PAGE_ALIGN(end);
732 
733     if (start > real_start) {
734         /* handle host page containing start */
735         prot = 0;
736         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
737             prot |= page_get_flags(addr);
738         }
739         if (real_end == real_start + qemu_host_page_size) {
740             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
741                 prot |= page_get_flags(addr);
742             }
743             end = real_end;
744         }
745         if (prot != 0)
746             real_start += qemu_host_page_size;
747     }
748     if (end < real_end) {
749         prot = 0;
750         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
751             prot |= page_get_flags(addr);
752         }
753         if (prot != 0)
754             real_end -= qemu_host_page_size;
755     }
756 
757     ret = 0;
758     /* unmap what we can */
759     if (real_start < real_end) {
760         if (reserved_va) {
761             mmap_reserve(real_start, real_end - real_start);
762         } else {
763             ret = munmap(g2h_untagged(real_start), real_end - real_start);
764         }
765     }
766 
767     if (ret == 0) {
768         page_set_flags(start, start + len, 0);
769         tb_invalidate_phys_range(start, start + len);
770     }
771     mmap_unlock();
772     return ret;
773 }
774 
775 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
776                        abi_ulong new_size, unsigned long flags,
777                        abi_ulong new_addr)
778 {
779     int prot;
780     void *host_addr;
781 
782     if (!guest_range_valid_untagged(old_addr, old_size) ||
783         ((flags & MREMAP_FIXED) &&
784          !guest_range_valid_untagged(new_addr, new_size)) ||
785         ((flags & MREMAP_MAYMOVE) == 0 &&
786          !guest_range_valid_untagged(old_addr, new_size))) {
787         errno = ENOMEM;
788         return -1;
789     }
790 
791     mmap_lock();
792 
793     if (flags & MREMAP_FIXED) {
794         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
795                            flags, g2h_untagged(new_addr));
796 
797         if (reserved_va && host_addr != MAP_FAILED) {
798             /* If new and old addresses overlap then the above mremap will
799                already have failed with EINVAL.  */
800             mmap_reserve(old_addr, old_size);
801         }
802     } else if (flags & MREMAP_MAYMOVE) {
803         abi_ulong mmap_start;
804 
805         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
806 
807         if (mmap_start == -1) {
808             errno = ENOMEM;
809             host_addr = MAP_FAILED;
810         } else {
811             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
812                                flags | MREMAP_FIXED,
813                                g2h_untagged(mmap_start));
814             if (reserved_va) {
815                 mmap_reserve(old_addr, old_size);
816             }
817         }
818     } else {
819         int prot = 0;
820         if (reserved_va && old_size < new_size) {
821             abi_ulong addr;
822             for (addr = old_addr + old_size;
823                  addr < old_addr + new_size;
824                  addr++) {
825                 prot |= page_get_flags(addr);
826             }
827         }
828         if (prot == 0) {
829             host_addr = mremap(g2h_untagged(old_addr),
830                                old_size, new_size, flags);
831 
832             if (host_addr != MAP_FAILED) {
833                 /* Check if address fits target address space */
834                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
835                     /* Revert mremap() changes */
836                     host_addr = mremap(g2h_untagged(old_addr),
837                                        new_size, old_size, flags);
838                     errno = ENOMEM;
839                     host_addr = MAP_FAILED;
840                 } else if (reserved_va && old_size > new_size) {
841                     mmap_reserve(old_addr + old_size, old_size - new_size);
842                 }
843             }
844         } else {
845             errno = ENOMEM;
846             host_addr = MAP_FAILED;
847         }
848     }
849 
850     if (host_addr == MAP_FAILED) {
851         new_addr = -1;
852     } else {
853         new_addr = h2g(host_addr);
854         prot = page_get_flags(old_addr);
855         page_set_flags(old_addr, old_addr + old_size, 0);
856         page_set_flags(new_addr, new_addr + new_size,
857                        prot | PAGE_VALID | PAGE_RESET);
858     }
859     tb_invalidate_phys_range(new_addr, new_addr + new_size);
860     mmap_unlock();
861     return new_addr;
862 }
863 
864 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
865 {
866     ulong addr;
867 
868     if ((start | end) & ~qemu_host_page_mask) {
869         return false;
870     }
871 
872     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
873         if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
874             return false;
875         }
876     }
877 
878     return true;
879 }
880 
881 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
882 {
883     abi_ulong len, end;
884     int ret = 0;
885 
886     if (start & ~TARGET_PAGE_MASK) {
887         return -TARGET_EINVAL;
888     }
889     len = TARGET_PAGE_ALIGN(len_in);
890 
891     if (len_in && !len) {
892         return -TARGET_EINVAL;
893     }
894 
895     end = start + len;
896     if (end < start) {
897         return -TARGET_EINVAL;
898     }
899 
900     if (end == start) {
901         return 0;
902     }
903 
904     if (!guest_range_valid_untagged(start, len)) {
905         return -TARGET_EINVAL;
906     }
907 
908     /*
909      * A straight passthrough may not be safe because qemu sometimes turns
910      * private file-backed mappings into anonymous mappings.
911      *
912      * This is a hint, so ignoring and returning success is ok.
913      *
914      * This breaks MADV_DONTNEED, completely implementing which is quite
915      * complicated. However, there is one low-hanging fruit: mappings that are
916      * known to have the same semantics in the host and the guest. In this case
917      * passthrough is safe, so do it.
918      */
919     mmap_lock();
920     if (advice == TARGET_MADV_DONTNEED &&
921         can_passthrough_madv_dontneed(start, end)) {
922         ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
923         if (ret == 0) {
924             page_reset_target_data(start, start + len);
925         }
926     }
927     mmap_unlock();
928 
929     return ret;
930 }
931