xref: /openbmc/qemu/linux-user/mmap.c (revision 5258a7e2)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 
26 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
27 static __thread int mmap_lock_count;
28 
29 void mmap_lock(void)
30 {
31     if (mmap_lock_count++ == 0) {
32         pthread_mutex_lock(&mmap_mutex);
33     }
34 }
35 
36 void mmap_unlock(void)
37 {
38     if (--mmap_lock_count == 0) {
39         pthread_mutex_unlock(&mmap_mutex);
40     }
41 }
42 
43 bool have_mmap_lock(void)
44 {
45     return mmap_lock_count > 0 ? true : false;
46 }
47 
48 /* Grab lock to make sure things are in a consistent state after fork().  */
49 void mmap_fork_start(void)
50 {
51     if (mmap_lock_count)
52         abort();
53     pthread_mutex_lock(&mmap_mutex);
54 }
55 
56 void mmap_fork_end(int child)
57 {
58     if (child)
59         pthread_mutex_init(&mmap_mutex, NULL);
60     else
61         pthread_mutex_unlock(&mmap_mutex);
62 }
63 
64 /*
65  * Validate target prot bitmask.
66  * Return the prot bitmask for the host in *HOST_PROT.
67  * Return 0 if the target prot bitmask is invalid, otherwise
68  * the internal qemu page_flags (which will include PAGE_VALID).
69  */
70 static int validate_prot_to_pageflags(int *host_prot, int prot)
71 {
72     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
73     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
74 
75     /*
76      * For the host, we need not pass anything except read/write/exec.
77      * While PROT_SEM is allowed by all hosts, it is also ignored, so
78      * don't bother transforming guest bit to host bit.  Any other
79      * target-specific prot bits will not be understood by the host
80      * and will need to be encoded into page_flags for qemu emulation.
81      *
82      * Pages that are executable by the guest will never be executed
83      * by the host, but the host will need to be able to read them.
84      */
85     *host_prot = (prot & (PROT_READ | PROT_WRITE))
86                | (prot & PROT_EXEC ? PROT_READ : 0);
87 
88 #ifdef TARGET_AARCH64
89     {
90         ARMCPU *cpu = ARM_CPU(thread_cpu);
91 
92         /*
93          * The PROT_BTI bit is only accepted if the cpu supports the feature.
94          * Since this is the unusual case, don't bother checking unless
95          * the bit has been requested.  If set and valid, record the bit
96          * within QEMU's page_flags.
97          */
98         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
99             valid |= TARGET_PROT_BTI;
100             page_flags |= PAGE_BTI;
101         }
102         /* Similarly for the PROT_MTE bit. */
103         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
104             valid |= TARGET_PROT_MTE;
105             page_flags |= PAGE_MTE;
106         }
107     }
108 #endif
109 
110     return prot & ~valid ? 0 : page_flags;
111 }
112 
113 /* NOTE: all the constants are the HOST ones, but addresses are target. */
114 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
115 {
116     abi_ulong end, host_start, host_end, addr;
117     int prot1, ret, page_flags, host_prot;
118 
119     trace_target_mprotect(start, len, target_prot);
120 
121     if ((start & ~TARGET_PAGE_MASK) != 0) {
122         return -TARGET_EINVAL;
123     }
124     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
125     if (!page_flags) {
126         return -TARGET_EINVAL;
127     }
128     len = TARGET_PAGE_ALIGN(len);
129     end = start + len;
130     if (!guest_range_valid_untagged(start, len)) {
131         return -TARGET_ENOMEM;
132     }
133     if (len == 0) {
134         return 0;
135     }
136 
137     mmap_lock();
138     host_start = start & qemu_host_page_mask;
139     host_end = HOST_PAGE_ALIGN(end);
140     if (start > host_start) {
141         /* handle host page containing start */
142         prot1 = host_prot;
143         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
144             prot1 |= page_get_flags(addr);
145         }
146         if (host_end == host_start + qemu_host_page_size) {
147             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
148                 prot1 |= page_get_flags(addr);
149             }
150             end = host_end;
151         }
152         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
153                        prot1 & PAGE_BITS);
154         if (ret != 0) {
155             goto error;
156         }
157         host_start += qemu_host_page_size;
158     }
159     if (end < host_end) {
160         prot1 = host_prot;
161         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
162             prot1 |= page_get_flags(addr);
163         }
164         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
165                        qemu_host_page_size, prot1 & PAGE_BITS);
166         if (ret != 0) {
167             goto error;
168         }
169         host_end -= qemu_host_page_size;
170     }
171 
172     /* handle the pages in the middle */
173     if (host_start < host_end) {
174         ret = mprotect(g2h_untagged(host_start),
175                        host_end - host_start, host_prot);
176         if (ret != 0) {
177             goto error;
178         }
179     }
180     page_set_flags(start, start + len, page_flags);
181     mmap_unlock();
182     return 0;
183 error:
184     mmap_unlock();
185     return ret;
186 }
187 
188 /* map an incomplete host page */
189 static int mmap_frag(abi_ulong real_start,
190                      abi_ulong start, abi_ulong end,
191                      int prot, int flags, int fd, abi_ulong offset)
192 {
193     abi_ulong real_end, addr;
194     void *host_start;
195     int prot1, prot_new;
196 
197     real_end = real_start + qemu_host_page_size;
198     host_start = g2h_untagged(real_start);
199 
200     /* get the protection of the target pages outside the mapping */
201     prot1 = 0;
202     for(addr = real_start; addr < real_end; addr++) {
203         if (addr < start || addr >= end)
204             prot1 |= page_get_flags(addr);
205     }
206 
207     if (prot1 == 0) {
208         /* no page was there, so we allocate one */
209         void *p = mmap(host_start, qemu_host_page_size, prot,
210                        flags | MAP_ANONYMOUS, -1, 0);
211         if (p == MAP_FAILED)
212             return -1;
213         prot1 = prot;
214     }
215     prot1 &= PAGE_BITS;
216 
217     prot_new = prot | prot1;
218     if (!(flags & MAP_ANONYMOUS)) {
219         /* msync() won't work here, so we return an error if write is
220            possible while it is a shared mapping */
221         if ((flags & MAP_TYPE) == MAP_SHARED &&
222             (prot & PROT_WRITE))
223             return -1;
224 
225         /* adjust protection to be able to read */
226         if (!(prot1 & PROT_WRITE))
227             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
228 
229         /* read the corresponding file data */
230         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
231             return -1;
232 
233         /* put final protection */
234         if (prot_new != (prot1 | PROT_WRITE))
235             mprotect(host_start, qemu_host_page_size, prot_new);
236     } else {
237         if (prot_new != prot1) {
238             mprotect(host_start, qemu_host_page_size, prot_new);
239         }
240         if (prot_new & PROT_WRITE) {
241             memset(g2h_untagged(start), 0, end - start);
242         }
243     }
244     return 0;
245 }
246 
247 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
248 #ifdef TARGET_AARCH64
249 # define TASK_UNMAPPED_BASE  0x5500000000
250 #else
251 # define TASK_UNMAPPED_BASE  (1ul << 38)
252 #endif
253 #else
254 # define TASK_UNMAPPED_BASE  0x40000000
255 #endif
256 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
257 
258 unsigned long last_brk;
259 
260 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
261    of guest address space.  */
262 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
263                                         abi_ulong align)
264 {
265     abi_ulong addr, end_addr, incr = qemu_host_page_size;
266     int prot;
267     bool looped = false;
268 
269     if (size > reserved_va) {
270         return (abi_ulong)-1;
271     }
272 
273     /* Note that start and size have already been aligned by mmap_find_vma. */
274 
275     end_addr = start + size;
276     if (start > reserved_va - size) {
277         /* Start at the top of the address space.  */
278         end_addr = ((reserved_va - size) & -align) + size;
279         looped = true;
280     }
281 
282     /* Search downward from END_ADDR, checking to see if a page is in use.  */
283     addr = end_addr;
284     while (1) {
285         addr -= incr;
286         if (addr > end_addr) {
287             if (looped) {
288                 /* Failure.  The entire address space has been searched.  */
289                 return (abi_ulong)-1;
290             }
291             /* Re-start at the top of the address space.  */
292             addr = end_addr = ((reserved_va - size) & -align) + size;
293             looped = true;
294         } else {
295             prot = page_get_flags(addr);
296             if (prot) {
297                 /* Page in use.  Restart below this page.  */
298                 addr = end_addr = ((addr - size) & -align) + size;
299             } else if (addr && addr + size == end_addr) {
300                 /* Success!  All pages between ADDR and END_ADDR are free.  */
301                 if (start == mmap_next_start) {
302                     mmap_next_start = addr;
303                 }
304                 return addr;
305             }
306         }
307     }
308 }
309 
310 /*
311  * Find and reserve a free memory area of size 'size'. The search
312  * starts at 'start'.
313  * It must be called with mmap_lock() held.
314  * Return -1 if error.
315  */
316 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
317 {
318     void *ptr, *prev;
319     abi_ulong addr;
320     int wrapped, repeat;
321 
322     align = MAX(align, qemu_host_page_size);
323 
324     /* If 'start' == 0, then a default start address is used. */
325     if (start == 0) {
326         start = mmap_next_start;
327     } else {
328         start &= qemu_host_page_mask;
329     }
330     start = ROUND_UP(start, align);
331 
332     size = HOST_PAGE_ALIGN(size);
333 
334     if (reserved_va) {
335         return mmap_find_vma_reserved(start, size, align);
336     }
337 
338     addr = start;
339     wrapped = repeat = 0;
340     prev = 0;
341 
342     for (;; prev = ptr) {
343         /*
344          * Reserve needed memory area to avoid a race.
345          * It should be discarded using:
346          *  - mmap() with MAP_FIXED flag
347          *  - mremap() with MREMAP_FIXED flag
348          *  - shmat() with SHM_REMAP flag
349          */
350         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
351                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
352 
353         /* ENOMEM, if host address space has no memory */
354         if (ptr == MAP_FAILED) {
355             return (abi_ulong)-1;
356         }
357 
358         /* Count the number of sequential returns of the same address.
359            This is used to modify the search algorithm below.  */
360         repeat = (ptr == prev ? repeat + 1 : 0);
361 
362         if (h2g_valid(ptr + size - 1)) {
363             addr = h2g(ptr);
364 
365             if ((addr & (align - 1)) == 0) {
366                 /* Success.  */
367                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
368                     mmap_next_start = addr + size;
369                 }
370                 return addr;
371             }
372 
373             /* The address is not properly aligned for the target.  */
374             switch (repeat) {
375             case 0:
376                 /* Assume the result that the kernel gave us is the
377                    first with enough free space, so start again at the
378                    next higher target page.  */
379                 addr = ROUND_UP(addr, align);
380                 break;
381             case 1:
382                 /* Sometimes the kernel decides to perform the allocation
383                    at the top end of memory instead.  */
384                 addr &= -align;
385                 break;
386             case 2:
387                 /* Start over at low memory.  */
388                 addr = 0;
389                 break;
390             default:
391                 /* Fail.  This unaligned block must the last.  */
392                 addr = -1;
393                 break;
394             }
395         } else {
396             /* Since the result the kernel gave didn't fit, start
397                again at low memory.  If any repetition, fail.  */
398             addr = (repeat ? -1 : 0);
399         }
400 
401         /* Unmap and try again.  */
402         munmap(ptr, size);
403 
404         /* ENOMEM if we checked the whole of the target address space.  */
405         if (addr == (abi_ulong)-1) {
406             return (abi_ulong)-1;
407         } else if (addr == 0) {
408             if (wrapped) {
409                 return (abi_ulong)-1;
410             }
411             wrapped = 1;
412             /* Don't actually use 0 when wrapping, instead indicate
413                that we'd truly like an allocation in low memory.  */
414             addr = (mmap_min_addr > TARGET_PAGE_SIZE
415                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
416                      : TARGET_PAGE_SIZE);
417         } else if (wrapped && addr >= start) {
418             return (abi_ulong)-1;
419         }
420     }
421 }
422 
423 /* NOTE: all the constants are the HOST ones */
424 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
425                      int flags, int fd, abi_ulong offset)
426 {
427     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
428     int page_flags, host_prot;
429 
430     mmap_lock();
431     trace_target_mmap(start, len, target_prot, flags, fd, offset);
432 
433     if (!len) {
434         errno = EINVAL;
435         goto fail;
436     }
437 
438     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
439     if (!page_flags) {
440         errno = EINVAL;
441         goto fail;
442     }
443 
444     /* Also check for overflows... */
445     len = TARGET_PAGE_ALIGN(len);
446     if (!len) {
447         errno = ENOMEM;
448         goto fail;
449     }
450 
451     if (offset & ~TARGET_PAGE_MASK) {
452         errno = EINVAL;
453         goto fail;
454     }
455 
456     /*
457      * If we're mapping shared memory, ensure we generate code for parallel
458      * execution and flush old translations.  This will work up to the level
459      * supported by the host -- anything that requires EXCP_ATOMIC will not
460      * be atomic with respect to an external process.
461      */
462     if (flags & MAP_SHARED) {
463         CPUState *cpu = thread_cpu;
464         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
465             cpu->tcg_cflags |= CF_PARALLEL;
466             tb_flush(cpu);
467         }
468     }
469 
470     real_start = start & qemu_host_page_mask;
471     host_offset = offset & qemu_host_page_mask;
472 
473     /* If the user is asking for the kernel to find a location, do that
474        before we truncate the length for mapping files below.  */
475     if (!(flags & MAP_FIXED)) {
476         host_len = len + offset - host_offset;
477         host_len = HOST_PAGE_ALIGN(host_len);
478         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
479         if (start == (abi_ulong)-1) {
480             errno = ENOMEM;
481             goto fail;
482         }
483     }
484 
485     /* When mapping files into a memory area larger than the file, accesses
486        to pages beyond the file size will cause a SIGBUS.
487 
488        For example, if mmaping a file of 100 bytes on a host with 4K pages
489        emulating a target with 8K pages, the target expects to be able to
490        access the first 8K. But the host will trap us on any access beyond
491        4K.
492 
493        When emulating a target with a larger page-size than the hosts, we
494        may need to truncate file maps at EOF and add extra anonymous pages
495        up to the targets page boundary.  */
496 
497     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
498         !(flags & MAP_ANONYMOUS)) {
499         struct stat sb;
500 
501        if (fstat (fd, &sb) == -1)
502            goto fail;
503 
504        /* Are we trying to create a map beyond EOF?.  */
505        if (offset + len > sb.st_size) {
506            /* If so, truncate the file map at eof aligned with
507               the hosts real pagesize. Additional anonymous maps
508               will be created beyond EOF.  */
509            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
510        }
511     }
512 
513     if (!(flags & MAP_FIXED)) {
514         unsigned long host_start;
515         void *p;
516 
517         host_len = len + offset - host_offset;
518         host_len = HOST_PAGE_ALIGN(host_len);
519 
520         /* Note: we prefer to control the mapping address. It is
521            especially important if qemu_host_page_size >
522            qemu_real_host_page_size */
523         p = mmap(g2h_untagged(start), host_len, host_prot,
524                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
525         if (p == MAP_FAILED) {
526             goto fail;
527         }
528         /* update start so that it points to the file position at 'offset' */
529         host_start = (unsigned long)p;
530         if (!(flags & MAP_ANONYMOUS)) {
531             p = mmap(g2h_untagged(start), len, host_prot,
532                      flags | MAP_FIXED, fd, host_offset);
533             if (p == MAP_FAILED) {
534                 munmap(g2h_untagged(start), host_len);
535                 goto fail;
536             }
537             host_start += offset - host_offset;
538         }
539         start = h2g(host_start);
540     } else {
541         if (start & ~TARGET_PAGE_MASK) {
542             errno = EINVAL;
543             goto fail;
544         }
545         end = start + len;
546         real_end = HOST_PAGE_ALIGN(end);
547 
548         /*
549          * Test if requested memory area fits target address space
550          * It can fail only on 64-bit host with 32-bit target.
551          * On any other target/host host mmap() handles this error correctly.
552          */
553         if (end < start || !guest_range_valid_untagged(start, len)) {
554             errno = ENOMEM;
555             goto fail;
556         }
557 
558         /* worst case: we cannot map the file because the offset is not
559            aligned, so we read it */
560         if (!(flags & MAP_ANONYMOUS) &&
561             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
562             /* msync() won't work here, so we return an error if write is
563                possible while it is a shared mapping */
564             if ((flags & MAP_TYPE) == MAP_SHARED &&
565                 (host_prot & PROT_WRITE)) {
566                 errno = EINVAL;
567                 goto fail;
568             }
569             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
570                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
571                                   -1, 0);
572             if (retaddr == -1)
573                 goto fail;
574             if (pread(fd, g2h_untagged(start), len, offset) == -1)
575                 goto fail;
576             if (!(host_prot & PROT_WRITE)) {
577                 ret = target_mprotect(start, len, target_prot);
578                 assert(ret == 0);
579             }
580             goto the_end;
581         }
582 
583         /* handle the start of the mapping */
584         if (start > real_start) {
585             if (real_end == real_start + qemu_host_page_size) {
586                 /* one single host page */
587                 ret = mmap_frag(real_start, start, end,
588                                 host_prot, flags, fd, offset);
589                 if (ret == -1)
590                     goto fail;
591                 goto the_end1;
592             }
593             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
594                             host_prot, flags, fd, offset);
595             if (ret == -1)
596                 goto fail;
597             real_start += qemu_host_page_size;
598         }
599         /* handle the end of the mapping */
600         if (end < real_end) {
601             ret = mmap_frag(real_end - qemu_host_page_size,
602                             real_end - qemu_host_page_size, end,
603                             host_prot, flags, fd,
604                             offset + real_end - qemu_host_page_size - start);
605             if (ret == -1)
606                 goto fail;
607             real_end -= qemu_host_page_size;
608         }
609 
610         /* map the middle (easier) */
611         if (real_start < real_end) {
612             void *p;
613             unsigned long offset1;
614             if (flags & MAP_ANONYMOUS)
615                 offset1 = 0;
616             else
617                 offset1 = offset + real_start - start;
618             p = mmap(g2h_untagged(real_start), real_end - real_start,
619                      host_prot, flags, fd, offset1);
620             if (p == MAP_FAILED)
621                 goto fail;
622         }
623     }
624  the_end1:
625     if (flags & MAP_ANONYMOUS) {
626         page_flags |= PAGE_ANON;
627     }
628     page_flags |= PAGE_RESET;
629     page_set_flags(start, start + len, page_flags);
630  the_end:
631     trace_target_mmap_complete(start);
632     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
633         FILE *f = qemu_log_trylock();
634         if (f) {
635             fprintf(f, "page layout changed following mmap\n");
636             page_dump(f);
637             qemu_log_unlock(f);
638         }
639     }
640     tb_invalidate_phys_range(start, start + len);
641     mmap_unlock();
642     return start;
643 fail:
644     mmap_unlock();
645     return -1;
646 }
647 
648 static void mmap_reserve(abi_ulong start, abi_ulong size)
649 {
650     abi_ulong real_start;
651     abi_ulong real_end;
652     abi_ulong addr;
653     abi_ulong end;
654     int prot;
655 
656     real_start = start & qemu_host_page_mask;
657     real_end = HOST_PAGE_ALIGN(start + size);
658     end = start + size;
659     if (start > real_start) {
660         /* handle host page containing start */
661         prot = 0;
662         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
663             prot |= page_get_flags(addr);
664         }
665         if (real_end == real_start + qemu_host_page_size) {
666             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
667                 prot |= page_get_flags(addr);
668             }
669             end = real_end;
670         }
671         if (prot != 0)
672             real_start += qemu_host_page_size;
673     }
674     if (end < real_end) {
675         prot = 0;
676         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
677             prot |= page_get_flags(addr);
678         }
679         if (prot != 0)
680             real_end -= qemu_host_page_size;
681     }
682     if (real_start != real_end) {
683         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
684                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
685                  -1, 0);
686     }
687 }
688 
689 int target_munmap(abi_ulong start, abi_ulong len)
690 {
691     abi_ulong end, real_start, real_end, addr;
692     int prot, ret;
693 
694     trace_target_munmap(start, len);
695 
696     if (start & ~TARGET_PAGE_MASK)
697         return -TARGET_EINVAL;
698     len = TARGET_PAGE_ALIGN(len);
699     if (len == 0 || !guest_range_valid_untagged(start, len)) {
700         return -TARGET_EINVAL;
701     }
702 
703     mmap_lock();
704     end = start + len;
705     real_start = start & qemu_host_page_mask;
706     real_end = HOST_PAGE_ALIGN(end);
707 
708     if (start > real_start) {
709         /* handle host page containing start */
710         prot = 0;
711         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
712             prot |= page_get_flags(addr);
713         }
714         if (real_end == real_start + qemu_host_page_size) {
715             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
716                 prot |= page_get_flags(addr);
717             }
718             end = real_end;
719         }
720         if (prot != 0)
721             real_start += qemu_host_page_size;
722     }
723     if (end < real_end) {
724         prot = 0;
725         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
726             prot |= page_get_flags(addr);
727         }
728         if (prot != 0)
729             real_end -= qemu_host_page_size;
730     }
731 
732     ret = 0;
733     /* unmap what we can */
734     if (real_start < real_end) {
735         if (reserved_va) {
736             mmap_reserve(real_start, real_end - real_start);
737         } else {
738             ret = munmap(g2h_untagged(real_start), real_end - real_start);
739         }
740     }
741 
742     if (ret == 0) {
743         page_set_flags(start, start + len, 0);
744         tb_invalidate_phys_range(start, start + len);
745     }
746     mmap_unlock();
747     return ret;
748 }
749 
750 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
751                        abi_ulong new_size, unsigned long flags,
752                        abi_ulong new_addr)
753 {
754     int prot;
755     void *host_addr;
756 
757     if (!guest_range_valid_untagged(old_addr, old_size) ||
758         ((flags & MREMAP_FIXED) &&
759          !guest_range_valid_untagged(new_addr, new_size)) ||
760         ((flags & MREMAP_MAYMOVE) == 0 &&
761          !guest_range_valid_untagged(old_addr, new_size))) {
762         errno = ENOMEM;
763         return -1;
764     }
765 
766     mmap_lock();
767 
768     if (flags & MREMAP_FIXED) {
769         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
770                            flags, g2h_untagged(new_addr));
771 
772         if (reserved_va && host_addr != MAP_FAILED) {
773             /* If new and old addresses overlap then the above mremap will
774                already have failed with EINVAL.  */
775             mmap_reserve(old_addr, old_size);
776         }
777     } else if (flags & MREMAP_MAYMOVE) {
778         abi_ulong mmap_start;
779 
780         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
781 
782         if (mmap_start == -1) {
783             errno = ENOMEM;
784             host_addr = MAP_FAILED;
785         } else {
786             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
787                                flags | MREMAP_FIXED,
788                                g2h_untagged(mmap_start));
789             if (reserved_va) {
790                 mmap_reserve(old_addr, old_size);
791             }
792         }
793     } else {
794         int prot = 0;
795         if (reserved_va && old_size < new_size) {
796             abi_ulong addr;
797             for (addr = old_addr + old_size;
798                  addr < old_addr + new_size;
799                  addr++) {
800                 prot |= page_get_flags(addr);
801             }
802         }
803         if (prot == 0) {
804             host_addr = mremap(g2h_untagged(old_addr),
805                                old_size, new_size, flags);
806 
807             if (host_addr != MAP_FAILED) {
808                 /* Check if address fits target address space */
809                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
810                     /* Revert mremap() changes */
811                     host_addr = mremap(g2h_untagged(old_addr),
812                                        new_size, old_size, flags);
813                     errno = ENOMEM;
814                     host_addr = MAP_FAILED;
815                 } else if (reserved_va && old_size > new_size) {
816                     mmap_reserve(old_addr + old_size, old_size - new_size);
817                 }
818             }
819         } else {
820             errno = ENOMEM;
821             host_addr = MAP_FAILED;
822         }
823     }
824 
825     if (host_addr == MAP_FAILED) {
826         new_addr = -1;
827     } else {
828         new_addr = h2g(host_addr);
829         prot = page_get_flags(old_addr);
830         page_set_flags(old_addr, old_addr + old_size, 0);
831         page_set_flags(new_addr, new_addr + new_size,
832                        prot | PAGE_VALID | PAGE_RESET);
833     }
834     tb_invalidate_phys_range(new_addr, new_addr + new_size);
835     mmap_unlock();
836     return new_addr;
837 }
838 
839 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
840 {
841     ulong addr;
842 
843     if ((start | end) & ~qemu_host_page_mask) {
844         return false;
845     }
846 
847     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
848         if (!(page_get_flags(addr) & PAGE_ANON)) {
849             return false;
850         }
851     }
852 
853     return true;
854 }
855 
856 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
857 {
858     abi_ulong len, end;
859     int ret = 0;
860 
861     if (start & ~TARGET_PAGE_MASK) {
862         return -TARGET_EINVAL;
863     }
864     len = TARGET_PAGE_ALIGN(len_in);
865 
866     if (len_in && !len) {
867         return -TARGET_EINVAL;
868     }
869 
870     end = start + len;
871     if (end < start) {
872         return -TARGET_EINVAL;
873     }
874 
875     if (end == start) {
876         return 0;
877     }
878 
879     if (!guest_range_valid_untagged(start, len)) {
880         return -TARGET_EINVAL;
881     }
882 
883     /*
884      * A straight passthrough may not be safe because qemu sometimes turns
885      * private file-backed mappings into anonymous mappings.
886      *
887      * This is a hint, so ignoring and returning success is ok.
888      *
889      * This breaks MADV_DONTNEED, completely implementing which is quite
890      * complicated. However, there is one low-hanging fruit: host-page-aligned
891      * anonymous mappings. In this case passthrough is safe, so do it.
892      */
893     mmap_lock();
894     if (advice == MADV_DONTNEED &&
895         can_passthrough_madv_dontneed(start, end)) {
896         ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
897     }
898     mmap_unlock();
899 
900     return ret;
901 }
902