xref: /openbmc/qemu/linux-user/mmap.c (revision 38b870cc8c5e9d079649d8769c8f271f28953d24)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child)
60         pthread_mutex_init(&mmap_mutex, NULL);
61     else
62         pthread_mutex_unlock(&mmap_mutex);
63 }
64 
65 /*
66  * Validate target prot bitmask.
67  * Return the prot bitmask for the host in *HOST_PROT.
68  * Return 0 if the target prot bitmask is invalid, otherwise
69  * the internal qemu page_flags (which will include PAGE_VALID).
70  */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75 
76     /*
77      * For the host, we need not pass anything except read/write/exec.
78      * While PROT_SEM is allowed by all hosts, it is also ignored, so
79      * don't bother transforming guest bit to host bit.  Any other
80      * target-specific prot bits will not be understood by the host
81      * and will need to be encoded into page_flags for qemu emulation.
82      *
83      * Pages that are executable by the guest will never be executed
84      * by the host, but the host will need to be able to read them.
85      */
86     *host_prot = (prot & (PROT_READ | PROT_WRITE))
87                | (prot & PROT_EXEC ? PROT_READ : 0);
88 
89 #ifdef TARGET_AARCH64
90     {
91         ARMCPU *cpu = ARM_CPU(thread_cpu);
92 
93         /*
94          * The PROT_BTI bit is only accepted if the cpu supports the feature.
95          * Since this is the unusual case, don't bother checking unless
96          * the bit has been requested.  If set and valid, record the bit
97          * within QEMU's page_flags.
98          */
99         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100             valid |= TARGET_PROT_BTI;
101             page_flags |= PAGE_BTI;
102         }
103         /* Similarly for the PROT_MTE bit. */
104         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105             valid |= TARGET_PROT_MTE;
106             page_flags |= PAGE_MTE;
107         }
108     }
109 #endif
110 
111     return prot & ~valid ? 0 : page_flags;
112 }
113 
114 /* NOTE: all the constants are the HOST ones, but addresses are target. */
115 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
116 {
117     abi_ulong end, host_start, host_end, addr;
118     int prot1, ret, page_flags, host_prot;
119 
120     trace_target_mprotect(start, len, target_prot);
121 
122     if ((start & ~TARGET_PAGE_MASK) != 0) {
123         return -TARGET_EINVAL;
124     }
125     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
126     if (!page_flags) {
127         return -TARGET_EINVAL;
128     }
129     len = TARGET_PAGE_ALIGN(len);
130     end = start + len;
131     if (!guest_range_valid_untagged(start, len)) {
132         return -TARGET_ENOMEM;
133     }
134     if (len == 0) {
135         return 0;
136     }
137 
138     mmap_lock();
139     host_start = start & qemu_host_page_mask;
140     host_end = HOST_PAGE_ALIGN(end);
141     if (start > host_start) {
142         /* handle host page containing start */
143         prot1 = host_prot;
144         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
145             prot1 |= page_get_flags(addr);
146         }
147         if (host_end == host_start + qemu_host_page_size) {
148             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
149                 prot1 |= page_get_flags(addr);
150             }
151             end = host_end;
152         }
153         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
154                        prot1 & PAGE_BITS);
155         if (ret != 0) {
156             goto error;
157         }
158         host_start += qemu_host_page_size;
159     }
160     if (end < host_end) {
161         prot1 = host_prot;
162         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
163             prot1 |= page_get_flags(addr);
164         }
165         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
166                        qemu_host_page_size, prot1 & PAGE_BITS);
167         if (ret != 0) {
168             goto error;
169         }
170         host_end -= qemu_host_page_size;
171     }
172 
173     /* handle the pages in the middle */
174     if (host_start < host_end) {
175         ret = mprotect(g2h_untagged(host_start),
176                        host_end - host_start, host_prot);
177         if (ret != 0) {
178             goto error;
179         }
180     }
181 
182     page_set_flags(start, start + len, page_flags);
183     tb_invalidate_phys_range(start, start + len);
184     ret = 0;
185 
186 error:
187     mmap_unlock();
188     return ret;
189 }
190 
191 /* map an incomplete host page */
192 static int mmap_frag(abi_ulong real_start,
193                      abi_ulong start, abi_ulong end,
194                      int prot, int flags, int fd, abi_ulong offset)
195 {
196     abi_ulong real_end, addr;
197     void *host_start;
198     int prot1, prot_new;
199 
200     real_end = real_start + qemu_host_page_size;
201     host_start = g2h_untagged(real_start);
202 
203     /* get the protection of the target pages outside the mapping */
204     prot1 = 0;
205     for(addr = real_start; addr < real_end; addr++) {
206         if (addr < start || addr >= end)
207             prot1 |= page_get_flags(addr);
208     }
209 
210     if (prot1 == 0) {
211         /* no page was there, so we allocate one */
212         void *p = mmap(host_start, qemu_host_page_size, prot,
213                        flags | MAP_ANONYMOUS, -1, 0);
214         if (p == MAP_FAILED)
215             return -1;
216         prot1 = prot;
217     }
218     prot1 &= PAGE_BITS;
219 
220     prot_new = prot | prot1;
221     if (!(flags & MAP_ANONYMOUS)) {
222         /* msync() won't work here, so we return an error if write is
223            possible while it is a shared mapping */
224         if ((flags & MAP_TYPE) == MAP_SHARED &&
225             (prot & PROT_WRITE))
226             return -1;
227 
228         /* adjust protection to be able to read */
229         if (!(prot1 & PROT_WRITE))
230             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
231 
232         /* read the corresponding file data */
233         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
234             return -1;
235 
236         /* put final protection */
237         if (prot_new != (prot1 | PROT_WRITE))
238             mprotect(host_start, qemu_host_page_size, prot_new);
239     } else {
240         if (prot_new != prot1) {
241             mprotect(host_start, qemu_host_page_size, prot_new);
242         }
243         if (prot_new & PROT_WRITE) {
244             memset(g2h_untagged(start), 0, end - start);
245         }
246     }
247     return 0;
248 }
249 
250 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
251 #ifdef TARGET_AARCH64
252 # define TASK_UNMAPPED_BASE  0x5500000000
253 #else
254 # define TASK_UNMAPPED_BASE  (1ul << 38)
255 #endif
256 #else
257 #ifdef TARGET_HPPA
258 # define TASK_UNMAPPED_BASE  0xfa000000
259 #else
260 # define TASK_UNMAPPED_BASE  0x40000000
261 #endif
262 #endif
263 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
264 
265 unsigned long last_brk;
266 
267 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
268    of guest address space.  */
269 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
270                                         abi_ulong align)
271 {
272     abi_ulong addr, end_addr, incr = qemu_host_page_size;
273     int prot;
274     bool looped = false;
275 
276     if (size > reserved_va) {
277         return (abi_ulong)-1;
278     }
279 
280     /* Note that start and size have already been aligned by mmap_find_vma. */
281 
282     end_addr = start + size;
283     if (start > reserved_va - size) {
284         /* Start at the top of the address space.  */
285         end_addr = ((reserved_va - size) & -align) + size;
286         looped = true;
287     }
288 
289     /* Search downward from END_ADDR, checking to see if a page is in use.  */
290     addr = end_addr;
291     while (1) {
292         addr -= incr;
293         if (addr > end_addr) {
294             if (looped) {
295                 /* Failure.  The entire address space has been searched.  */
296                 return (abi_ulong)-1;
297             }
298             /* Re-start at the top of the address space.  */
299             addr = end_addr = ((reserved_va - size) & -align) + size;
300             looped = true;
301         } else {
302             prot = page_get_flags(addr);
303             if (prot) {
304                 /* Page in use.  Restart below this page.  */
305                 addr = end_addr = ((addr - size) & -align) + size;
306             } else if (addr && addr + size == end_addr) {
307                 /* Success!  All pages between ADDR and END_ADDR are free.  */
308                 if (start == mmap_next_start) {
309                     mmap_next_start = addr;
310                 }
311                 return addr;
312             }
313         }
314     }
315 }
316 
317 /*
318  * Find and reserve a free memory area of size 'size'. The search
319  * starts at 'start'.
320  * It must be called with mmap_lock() held.
321  * Return -1 if error.
322  */
323 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
324 {
325     void *ptr, *prev;
326     abi_ulong addr;
327     int wrapped, repeat;
328 
329     align = MAX(align, qemu_host_page_size);
330 
331     /* If 'start' == 0, then a default start address is used. */
332     if (start == 0) {
333         start = mmap_next_start;
334     } else {
335         start &= qemu_host_page_mask;
336     }
337     start = ROUND_UP(start, align);
338 
339     size = HOST_PAGE_ALIGN(size);
340 
341     if (reserved_va) {
342         return mmap_find_vma_reserved(start, size, align);
343     }
344 
345     addr = start;
346     wrapped = repeat = 0;
347     prev = 0;
348 
349     for (;; prev = ptr) {
350         /*
351          * Reserve needed memory area to avoid a race.
352          * It should be discarded using:
353          *  - mmap() with MAP_FIXED flag
354          *  - mremap() with MREMAP_FIXED flag
355          *  - shmat() with SHM_REMAP flag
356          */
357         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
358                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
359 
360         /* ENOMEM, if host address space has no memory */
361         if (ptr == MAP_FAILED) {
362             return (abi_ulong)-1;
363         }
364 
365         /* Count the number of sequential returns of the same address.
366            This is used to modify the search algorithm below.  */
367         repeat = (ptr == prev ? repeat + 1 : 0);
368 
369         if (h2g_valid(ptr + size - 1)) {
370             addr = h2g(ptr);
371 
372             if ((addr & (align - 1)) == 0) {
373                 /* Success.  */
374                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
375                     mmap_next_start = addr + size;
376                 }
377                 return addr;
378             }
379 
380             /* The address is not properly aligned for the target.  */
381             switch (repeat) {
382             case 0:
383                 /* Assume the result that the kernel gave us is the
384                    first with enough free space, so start again at the
385                    next higher target page.  */
386                 addr = ROUND_UP(addr, align);
387                 break;
388             case 1:
389                 /* Sometimes the kernel decides to perform the allocation
390                    at the top end of memory instead.  */
391                 addr &= -align;
392                 break;
393             case 2:
394                 /* Start over at low memory.  */
395                 addr = 0;
396                 break;
397             default:
398                 /* Fail.  This unaligned block must the last.  */
399                 addr = -1;
400                 break;
401             }
402         } else {
403             /* Since the result the kernel gave didn't fit, start
404                again at low memory.  If any repetition, fail.  */
405             addr = (repeat ? -1 : 0);
406         }
407 
408         /* Unmap and try again.  */
409         munmap(ptr, size);
410 
411         /* ENOMEM if we checked the whole of the target address space.  */
412         if (addr == (abi_ulong)-1) {
413             return (abi_ulong)-1;
414         } else if (addr == 0) {
415             if (wrapped) {
416                 return (abi_ulong)-1;
417             }
418             wrapped = 1;
419             /* Don't actually use 0 when wrapping, instead indicate
420                that we'd truly like an allocation in low memory.  */
421             addr = (mmap_min_addr > TARGET_PAGE_SIZE
422                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
423                      : TARGET_PAGE_SIZE);
424         } else if (wrapped && addr >= start) {
425             return (abi_ulong)-1;
426         }
427     }
428 }
429 
430 /* NOTE: all the constants are the HOST ones */
431 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
432                      int flags, int fd, abi_ulong offset)
433 {
434     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
435               passthrough_start = -1, passthrough_end = -1;
436     int page_flags, host_prot;
437 
438     mmap_lock();
439     trace_target_mmap(start, len, target_prot, flags, fd, offset);
440 
441     if (!len) {
442         errno = EINVAL;
443         goto fail;
444     }
445 
446     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
447     if (!page_flags) {
448         errno = EINVAL;
449         goto fail;
450     }
451 
452     /* Also check for overflows... */
453     len = TARGET_PAGE_ALIGN(len);
454     if (!len) {
455         errno = ENOMEM;
456         goto fail;
457     }
458 
459     if (offset & ~TARGET_PAGE_MASK) {
460         errno = EINVAL;
461         goto fail;
462     }
463 
464     /*
465      * If we're mapping shared memory, ensure we generate code for parallel
466      * execution and flush old translations.  This will work up to the level
467      * supported by the host -- anything that requires EXCP_ATOMIC will not
468      * be atomic with respect to an external process.
469      */
470     if (flags & MAP_SHARED) {
471         CPUState *cpu = thread_cpu;
472         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
473             cpu->tcg_cflags |= CF_PARALLEL;
474             tb_flush(cpu);
475         }
476     }
477 
478     real_start = start & qemu_host_page_mask;
479     host_offset = offset & qemu_host_page_mask;
480 
481     /* If the user is asking for the kernel to find a location, do that
482        before we truncate the length for mapping files below.  */
483     if (!(flags & MAP_FIXED)) {
484         host_len = len + offset - host_offset;
485         host_len = HOST_PAGE_ALIGN(host_len);
486         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
487         if (start == (abi_ulong)-1) {
488             errno = ENOMEM;
489             goto fail;
490         }
491     }
492 
493     /* When mapping files into a memory area larger than the file, accesses
494        to pages beyond the file size will cause a SIGBUS.
495 
496        For example, if mmaping a file of 100 bytes on a host with 4K pages
497        emulating a target with 8K pages, the target expects to be able to
498        access the first 8K. But the host will trap us on any access beyond
499        4K.
500 
501        When emulating a target with a larger page-size than the hosts, we
502        may need to truncate file maps at EOF and add extra anonymous pages
503        up to the targets page boundary.  */
504 
505     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
506         !(flags & MAP_ANONYMOUS)) {
507         struct stat sb;
508 
509        if (fstat (fd, &sb) == -1)
510            goto fail;
511 
512        /* Are we trying to create a map beyond EOF?.  */
513        if (offset + len > sb.st_size) {
514            /* If so, truncate the file map at eof aligned with
515               the hosts real pagesize. Additional anonymous maps
516               will be created beyond EOF.  */
517            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
518        }
519     }
520 
521     if (!(flags & MAP_FIXED)) {
522         unsigned long host_start;
523         void *p;
524 
525         host_len = len + offset - host_offset;
526         host_len = HOST_PAGE_ALIGN(host_len);
527 
528         /* Note: we prefer to control the mapping address. It is
529            especially important if qemu_host_page_size >
530            qemu_real_host_page_size */
531         p = mmap(g2h_untagged(start), host_len, host_prot,
532                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
533         if (p == MAP_FAILED) {
534             goto fail;
535         }
536         /* update start so that it points to the file position at 'offset' */
537         host_start = (unsigned long)p;
538         if (!(flags & MAP_ANONYMOUS)) {
539             p = mmap(g2h_untagged(start), len, host_prot,
540                      flags | MAP_FIXED, fd, host_offset);
541             if (p == MAP_FAILED) {
542                 munmap(g2h_untagged(start), host_len);
543                 goto fail;
544             }
545             host_start += offset - host_offset;
546         }
547         start = h2g(host_start);
548         passthrough_start = start;
549         passthrough_end = start + len;
550     } else {
551         if (start & ~TARGET_PAGE_MASK) {
552             errno = EINVAL;
553             goto fail;
554         }
555         end = start + len;
556         real_end = HOST_PAGE_ALIGN(end);
557 
558         /*
559          * Test if requested memory area fits target address space
560          * It can fail only on 64-bit host with 32-bit target.
561          * On any other target/host host mmap() handles this error correctly.
562          */
563         if (end < start || !guest_range_valid_untagged(start, len)) {
564             errno = ENOMEM;
565             goto fail;
566         }
567 
568         /* worst case: we cannot map the file because the offset is not
569            aligned, so we read it */
570         if (!(flags & MAP_ANONYMOUS) &&
571             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
572             /* msync() won't work here, so we return an error if write is
573                possible while it is a shared mapping */
574             if ((flags & MAP_TYPE) == MAP_SHARED &&
575                 (host_prot & PROT_WRITE)) {
576                 errno = EINVAL;
577                 goto fail;
578             }
579             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
580                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
581                                   -1, 0);
582             if (retaddr == -1)
583                 goto fail;
584             if (pread(fd, g2h_untagged(start), len, offset) == -1)
585                 goto fail;
586             if (!(host_prot & PROT_WRITE)) {
587                 ret = target_mprotect(start, len, target_prot);
588                 assert(ret == 0);
589             }
590             goto the_end;
591         }
592 
593         /* handle the start of the mapping */
594         if (start > real_start) {
595             if (real_end == real_start + qemu_host_page_size) {
596                 /* one single host page */
597                 ret = mmap_frag(real_start, start, end,
598                                 host_prot, flags, fd, offset);
599                 if (ret == -1)
600                     goto fail;
601                 goto the_end1;
602             }
603             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
604                             host_prot, flags, fd, offset);
605             if (ret == -1)
606                 goto fail;
607             real_start += qemu_host_page_size;
608         }
609         /* handle the end of the mapping */
610         if (end < real_end) {
611             ret = mmap_frag(real_end - qemu_host_page_size,
612                             real_end - qemu_host_page_size, end,
613                             host_prot, flags, fd,
614                             offset + real_end - qemu_host_page_size - start);
615             if (ret == -1)
616                 goto fail;
617             real_end -= qemu_host_page_size;
618         }
619 
620         /* map the middle (easier) */
621         if (real_start < real_end) {
622             void *p;
623             unsigned long offset1;
624             if (flags & MAP_ANONYMOUS)
625                 offset1 = 0;
626             else
627                 offset1 = offset + real_start - start;
628             p = mmap(g2h_untagged(real_start), real_end - real_start,
629                      host_prot, flags, fd, offset1);
630             if (p == MAP_FAILED)
631                 goto fail;
632             passthrough_start = real_start;
633             passthrough_end = real_end;
634         }
635     }
636  the_end1:
637     if (flags & MAP_ANONYMOUS) {
638         page_flags |= PAGE_ANON;
639     }
640     page_flags |= PAGE_RESET;
641     if (passthrough_start == passthrough_end) {
642         page_set_flags(start, start + len, page_flags);
643     } else {
644         if (start < passthrough_start) {
645             page_set_flags(start, passthrough_start, page_flags);
646         }
647         page_set_flags(passthrough_start, passthrough_end,
648                        page_flags | PAGE_PASSTHROUGH);
649         if (passthrough_end < start + len) {
650             page_set_flags(passthrough_end, start + len, page_flags);
651         }
652     }
653  the_end:
654     trace_target_mmap_complete(start);
655     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
656         FILE *f = qemu_log_trylock();
657         if (f) {
658             fprintf(f, "page layout changed following mmap\n");
659             page_dump(f);
660             qemu_log_unlock(f);
661         }
662     }
663     tb_invalidate_phys_range(start, start + len);
664     mmap_unlock();
665     return start;
666 fail:
667     mmap_unlock();
668     return -1;
669 }
670 
671 static void mmap_reserve(abi_ulong start, abi_ulong size)
672 {
673     abi_ulong real_start;
674     abi_ulong real_end;
675     abi_ulong addr;
676     abi_ulong end;
677     int prot;
678 
679     real_start = start & qemu_host_page_mask;
680     real_end = HOST_PAGE_ALIGN(start + size);
681     end = start + size;
682     if (start > real_start) {
683         /* handle host page containing start */
684         prot = 0;
685         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
686             prot |= page_get_flags(addr);
687         }
688         if (real_end == real_start + qemu_host_page_size) {
689             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
690                 prot |= page_get_flags(addr);
691             }
692             end = real_end;
693         }
694         if (prot != 0)
695             real_start += qemu_host_page_size;
696     }
697     if (end < real_end) {
698         prot = 0;
699         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
700             prot |= page_get_flags(addr);
701         }
702         if (prot != 0)
703             real_end -= qemu_host_page_size;
704     }
705     if (real_start != real_end) {
706         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
707                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
708                  -1, 0);
709     }
710 }
711 
712 int target_munmap(abi_ulong start, abi_ulong len)
713 {
714     abi_ulong end, real_start, real_end, addr;
715     int prot, ret;
716 
717     trace_target_munmap(start, len);
718 
719     if (start & ~TARGET_PAGE_MASK)
720         return -TARGET_EINVAL;
721     len = TARGET_PAGE_ALIGN(len);
722     if (len == 0 || !guest_range_valid_untagged(start, len)) {
723         return -TARGET_EINVAL;
724     }
725 
726     mmap_lock();
727     end = start + len;
728     real_start = start & qemu_host_page_mask;
729     real_end = HOST_PAGE_ALIGN(end);
730 
731     if (start > real_start) {
732         /* handle host page containing start */
733         prot = 0;
734         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
735             prot |= page_get_flags(addr);
736         }
737         if (real_end == real_start + qemu_host_page_size) {
738             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
739                 prot |= page_get_flags(addr);
740             }
741             end = real_end;
742         }
743         if (prot != 0)
744             real_start += qemu_host_page_size;
745     }
746     if (end < real_end) {
747         prot = 0;
748         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
749             prot |= page_get_flags(addr);
750         }
751         if (prot != 0)
752             real_end -= qemu_host_page_size;
753     }
754 
755     ret = 0;
756     /* unmap what we can */
757     if (real_start < real_end) {
758         if (reserved_va) {
759             mmap_reserve(real_start, real_end - real_start);
760         } else {
761             ret = munmap(g2h_untagged(real_start), real_end - real_start);
762         }
763     }
764 
765     if (ret == 0) {
766         page_set_flags(start, start + len, 0);
767         tb_invalidate_phys_range(start, start + len);
768     }
769     mmap_unlock();
770     return ret;
771 }
772 
773 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
774                        abi_ulong new_size, unsigned long flags,
775                        abi_ulong new_addr)
776 {
777     int prot;
778     void *host_addr;
779 
780     if (!guest_range_valid_untagged(old_addr, old_size) ||
781         ((flags & MREMAP_FIXED) &&
782          !guest_range_valid_untagged(new_addr, new_size)) ||
783         ((flags & MREMAP_MAYMOVE) == 0 &&
784          !guest_range_valid_untagged(old_addr, new_size))) {
785         errno = ENOMEM;
786         return -1;
787     }
788 
789     mmap_lock();
790 
791     if (flags & MREMAP_FIXED) {
792         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
793                            flags, g2h_untagged(new_addr));
794 
795         if (reserved_va && host_addr != MAP_FAILED) {
796             /* If new and old addresses overlap then the above mremap will
797                already have failed with EINVAL.  */
798             mmap_reserve(old_addr, old_size);
799         }
800     } else if (flags & MREMAP_MAYMOVE) {
801         abi_ulong mmap_start;
802 
803         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
804 
805         if (mmap_start == -1) {
806             errno = ENOMEM;
807             host_addr = MAP_FAILED;
808         } else {
809             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
810                                flags | MREMAP_FIXED,
811                                g2h_untagged(mmap_start));
812             if (reserved_va) {
813                 mmap_reserve(old_addr, old_size);
814             }
815         }
816     } else {
817         int prot = 0;
818         if (reserved_va && old_size < new_size) {
819             abi_ulong addr;
820             for (addr = old_addr + old_size;
821                  addr < old_addr + new_size;
822                  addr++) {
823                 prot |= page_get_flags(addr);
824             }
825         }
826         if (prot == 0) {
827             host_addr = mremap(g2h_untagged(old_addr),
828                                old_size, new_size, flags);
829 
830             if (host_addr != MAP_FAILED) {
831                 /* Check if address fits target address space */
832                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
833                     /* Revert mremap() changes */
834                     host_addr = mremap(g2h_untagged(old_addr),
835                                        new_size, old_size, flags);
836                     errno = ENOMEM;
837                     host_addr = MAP_FAILED;
838                 } else if (reserved_va && old_size > new_size) {
839                     mmap_reserve(old_addr + old_size, old_size - new_size);
840                 }
841             }
842         } else {
843             errno = ENOMEM;
844             host_addr = MAP_FAILED;
845         }
846     }
847 
848     if (host_addr == MAP_FAILED) {
849         new_addr = -1;
850     } else {
851         new_addr = h2g(host_addr);
852         prot = page_get_flags(old_addr);
853         page_set_flags(old_addr, old_addr + old_size, 0);
854         page_set_flags(new_addr, new_addr + new_size,
855                        prot | PAGE_VALID | PAGE_RESET);
856     }
857     tb_invalidate_phys_range(new_addr, new_addr + new_size);
858     mmap_unlock();
859     return new_addr;
860 }
861 
862 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
863 {
864     ulong addr;
865 
866     if ((start | end) & ~qemu_host_page_mask) {
867         return false;
868     }
869 
870     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
871         if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
872             return false;
873         }
874     }
875 
876     return true;
877 }
878 
879 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
880 {
881     abi_ulong len, end;
882     int ret = 0;
883 
884     if (start & ~TARGET_PAGE_MASK) {
885         return -TARGET_EINVAL;
886     }
887     len = TARGET_PAGE_ALIGN(len_in);
888 
889     if (len_in && !len) {
890         return -TARGET_EINVAL;
891     }
892 
893     end = start + len;
894     if (end < start) {
895         return -TARGET_EINVAL;
896     }
897 
898     if (end == start) {
899         return 0;
900     }
901 
902     if (!guest_range_valid_untagged(start, len)) {
903         return -TARGET_EINVAL;
904     }
905 
906     /*
907      * A straight passthrough may not be safe because qemu sometimes turns
908      * private file-backed mappings into anonymous mappings.
909      *
910      * This is a hint, so ignoring and returning success is ok.
911      *
912      * This breaks MADV_DONTNEED, completely implementing which is quite
913      * complicated. However, there is one low-hanging fruit: mappings that are
914      * known to have the same semantics in the host and the guest. In this case
915      * passthrough is safe, so do it.
916      */
917     mmap_lock();
918     if (advice == TARGET_MADV_DONTNEED &&
919         can_passthrough_madv_dontneed(start, end)) {
920         ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
921         if (ret == 0) {
922             page_reset_target_data(start, start + len);
923         }
924     }
925     mmap_unlock();
926 
927     return ret;
928 }
929