xref: /openbmc/qemu/linux-user/mmap.c (revision 9f22020b)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 
26 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
27 static __thread int mmap_lock_count;
28 
29 void mmap_lock(void)
30 {
31     if (mmap_lock_count++ == 0) {
32         pthread_mutex_lock(&mmap_mutex);
33     }
34 }
35 
36 void mmap_unlock(void)
37 {
38     if (--mmap_lock_count == 0) {
39         pthread_mutex_unlock(&mmap_mutex);
40     }
41 }
42 
43 bool have_mmap_lock(void)
44 {
45     return mmap_lock_count > 0 ? true : false;
46 }
47 
48 /* Grab lock to make sure things are in a consistent state after fork().  */
49 void mmap_fork_start(void)
50 {
51     if (mmap_lock_count)
52         abort();
53     pthread_mutex_lock(&mmap_mutex);
54 }
55 
56 void mmap_fork_end(int child)
57 {
58     if (child)
59         pthread_mutex_init(&mmap_mutex, NULL);
60     else
61         pthread_mutex_unlock(&mmap_mutex);
62 }
63 
64 /*
65  * Validate target prot bitmask.
66  * Return the prot bitmask for the host in *HOST_PROT.
67  * Return 0 if the target prot bitmask is invalid, otherwise
68  * the internal qemu page_flags (which will include PAGE_VALID).
69  */
70 static int validate_prot_to_pageflags(int *host_prot, int prot)
71 {
72     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
73     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
74 
75     /*
76      * For the host, we need not pass anything except read/write/exec.
77      * While PROT_SEM is allowed by all hosts, it is also ignored, so
78      * don't bother transforming guest bit to host bit.  Any other
79      * target-specific prot bits will not be understood by the host
80      * and will need to be encoded into page_flags for qemu emulation.
81      *
82      * Pages that are executable by the guest will never be executed
83      * by the host, but the host will need to be able to read them.
84      */
85     *host_prot = (prot & (PROT_READ | PROT_WRITE))
86                | (prot & PROT_EXEC ? PROT_READ : 0);
87 
88 #ifdef TARGET_AARCH64
89     {
90         ARMCPU *cpu = ARM_CPU(thread_cpu);
91 
92         /*
93          * The PROT_BTI bit is only accepted if the cpu supports the feature.
94          * Since this is the unusual case, don't bother checking unless
95          * the bit has been requested.  If set and valid, record the bit
96          * within QEMU's page_flags.
97          */
98         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
99             valid |= TARGET_PROT_BTI;
100             page_flags |= PAGE_BTI;
101         }
102         /* Similarly for the PROT_MTE bit. */
103         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
104             valid |= TARGET_PROT_MTE;
105             page_flags |= PAGE_MTE;
106         }
107     }
108 #endif
109 
110     return prot & ~valid ? 0 : page_flags;
111 }
112 
113 /* NOTE: all the constants are the HOST ones, but addresses are target. */
114 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
115 {
116     abi_ulong end, host_start, host_end, addr;
117     int prot1, ret, page_flags, host_prot;
118 
119     trace_target_mprotect(start, len, target_prot);
120 
121     if ((start & ~TARGET_PAGE_MASK) != 0) {
122         return -TARGET_EINVAL;
123     }
124     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
125     if (!page_flags) {
126         return -TARGET_EINVAL;
127     }
128     len = TARGET_PAGE_ALIGN(len);
129     end = start + len;
130     if (!guest_range_valid_untagged(start, len)) {
131         return -TARGET_ENOMEM;
132     }
133     if (len == 0) {
134         return 0;
135     }
136 
137     mmap_lock();
138     host_start = start & qemu_host_page_mask;
139     host_end = HOST_PAGE_ALIGN(end);
140     if (start > host_start) {
141         /* handle host page containing start */
142         prot1 = host_prot;
143         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
144             prot1 |= page_get_flags(addr);
145         }
146         if (host_end == host_start + qemu_host_page_size) {
147             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
148                 prot1 |= page_get_flags(addr);
149             }
150             end = host_end;
151         }
152         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
153                        prot1 & PAGE_BITS);
154         if (ret != 0) {
155             goto error;
156         }
157         host_start += qemu_host_page_size;
158     }
159     if (end < host_end) {
160         prot1 = host_prot;
161         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
162             prot1 |= page_get_flags(addr);
163         }
164         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
165                        qemu_host_page_size, prot1 & PAGE_BITS);
166         if (ret != 0) {
167             goto error;
168         }
169         host_end -= qemu_host_page_size;
170     }
171 
172     /* handle the pages in the middle */
173     if (host_start < host_end) {
174         ret = mprotect(g2h_untagged(host_start),
175                        host_end - host_start, host_prot);
176         if (ret != 0) {
177             goto error;
178         }
179     }
180 
181     page_set_flags(start, start + len, page_flags);
182     tb_invalidate_phys_range(start, start + len);
183     ret = 0;
184 
185 error:
186     mmap_unlock();
187     return ret;
188 }
189 
190 /* map an incomplete host page */
191 static int mmap_frag(abi_ulong real_start,
192                      abi_ulong start, abi_ulong end,
193                      int prot, int flags, int fd, abi_ulong offset)
194 {
195     abi_ulong real_end, addr;
196     void *host_start;
197     int prot1, prot_new;
198 
199     real_end = real_start + qemu_host_page_size;
200     host_start = g2h_untagged(real_start);
201 
202     /* get the protection of the target pages outside the mapping */
203     prot1 = 0;
204     for(addr = real_start; addr < real_end; addr++) {
205         if (addr < start || addr >= end)
206             prot1 |= page_get_flags(addr);
207     }
208 
209     if (prot1 == 0) {
210         /* no page was there, so we allocate one */
211         void *p = mmap(host_start, qemu_host_page_size, prot,
212                        flags | MAP_ANONYMOUS, -1, 0);
213         if (p == MAP_FAILED)
214             return -1;
215         prot1 = prot;
216     }
217     prot1 &= PAGE_BITS;
218 
219     prot_new = prot | prot1;
220     if (!(flags & MAP_ANONYMOUS)) {
221         /* msync() won't work here, so we return an error if write is
222            possible while it is a shared mapping */
223         if ((flags & MAP_TYPE) == MAP_SHARED &&
224             (prot & PROT_WRITE))
225             return -1;
226 
227         /* adjust protection to be able to read */
228         if (!(prot1 & PROT_WRITE))
229             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
230 
231         /* read the corresponding file data */
232         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
233             return -1;
234 
235         /* put final protection */
236         if (prot_new != (prot1 | PROT_WRITE))
237             mprotect(host_start, qemu_host_page_size, prot_new);
238     } else {
239         if (prot_new != prot1) {
240             mprotect(host_start, qemu_host_page_size, prot_new);
241         }
242         if (prot_new & PROT_WRITE) {
243             memset(g2h_untagged(start), 0, end - start);
244         }
245     }
246     return 0;
247 }
248 
249 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
250 #ifdef TARGET_AARCH64
251 # define TASK_UNMAPPED_BASE  0x5500000000
252 #else
253 # define TASK_UNMAPPED_BASE  (1ul << 38)
254 #endif
255 #else
256 #ifdef TARGET_HPPA
257 # define TASK_UNMAPPED_BASE  0xfa000000
258 #else
259 # define TASK_UNMAPPED_BASE  0x40000000
260 #endif
261 #endif
262 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
263 
264 unsigned long last_brk;
265 
266 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
267    of guest address space.  */
268 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
269                                         abi_ulong align)
270 {
271     abi_ulong addr, end_addr, incr = qemu_host_page_size;
272     int prot;
273     bool looped = false;
274 
275     if (size > reserved_va) {
276         return (abi_ulong)-1;
277     }
278 
279     /* Note that start and size have already been aligned by mmap_find_vma. */
280 
281     end_addr = start + size;
282     if (start > reserved_va - size) {
283         /* Start at the top of the address space.  */
284         end_addr = ((reserved_va - size) & -align) + size;
285         looped = true;
286     }
287 
288     /* Search downward from END_ADDR, checking to see if a page is in use.  */
289     addr = end_addr;
290     while (1) {
291         addr -= incr;
292         if (addr > end_addr) {
293             if (looped) {
294                 /* Failure.  The entire address space has been searched.  */
295                 return (abi_ulong)-1;
296             }
297             /* Re-start at the top of the address space.  */
298             addr = end_addr = ((reserved_va - size) & -align) + size;
299             looped = true;
300         } else {
301             prot = page_get_flags(addr);
302             if (prot) {
303                 /* Page in use.  Restart below this page.  */
304                 addr = end_addr = ((addr - size) & -align) + size;
305             } else if (addr && addr + size == end_addr) {
306                 /* Success!  All pages between ADDR and END_ADDR are free.  */
307                 if (start == mmap_next_start) {
308                     mmap_next_start = addr;
309                 }
310                 return addr;
311             }
312         }
313     }
314 }
315 
316 /*
317  * Find and reserve a free memory area of size 'size'. The search
318  * starts at 'start'.
319  * It must be called with mmap_lock() held.
320  * Return -1 if error.
321  */
322 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
323 {
324     void *ptr, *prev;
325     abi_ulong addr;
326     int wrapped, repeat;
327 
328     align = MAX(align, qemu_host_page_size);
329 
330     /* If 'start' == 0, then a default start address is used. */
331     if (start == 0) {
332         start = mmap_next_start;
333     } else {
334         start &= qemu_host_page_mask;
335     }
336     start = ROUND_UP(start, align);
337 
338     size = HOST_PAGE_ALIGN(size);
339 
340     if (reserved_va) {
341         return mmap_find_vma_reserved(start, size, align);
342     }
343 
344     addr = start;
345     wrapped = repeat = 0;
346     prev = 0;
347 
348     for (;; prev = ptr) {
349         /*
350          * Reserve needed memory area to avoid a race.
351          * It should be discarded using:
352          *  - mmap() with MAP_FIXED flag
353          *  - mremap() with MREMAP_FIXED flag
354          *  - shmat() with SHM_REMAP flag
355          */
356         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
357                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
358 
359         /* ENOMEM, if host address space has no memory */
360         if (ptr == MAP_FAILED) {
361             return (abi_ulong)-1;
362         }
363 
364         /* Count the number of sequential returns of the same address.
365            This is used to modify the search algorithm below.  */
366         repeat = (ptr == prev ? repeat + 1 : 0);
367 
368         if (h2g_valid(ptr + size - 1)) {
369             addr = h2g(ptr);
370 
371             if ((addr & (align - 1)) == 0) {
372                 /* Success.  */
373                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
374                     mmap_next_start = addr + size;
375                 }
376                 return addr;
377             }
378 
379             /* The address is not properly aligned for the target.  */
380             switch (repeat) {
381             case 0:
382                 /* Assume the result that the kernel gave us is the
383                    first with enough free space, so start again at the
384                    next higher target page.  */
385                 addr = ROUND_UP(addr, align);
386                 break;
387             case 1:
388                 /* Sometimes the kernel decides to perform the allocation
389                    at the top end of memory instead.  */
390                 addr &= -align;
391                 break;
392             case 2:
393                 /* Start over at low memory.  */
394                 addr = 0;
395                 break;
396             default:
397                 /* Fail.  This unaligned block must the last.  */
398                 addr = -1;
399                 break;
400             }
401         } else {
402             /* Since the result the kernel gave didn't fit, start
403                again at low memory.  If any repetition, fail.  */
404             addr = (repeat ? -1 : 0);
405         }
406 
407         /* Unmap and try again.  */
408         munmap(ptr, size);
409 
410         /* ENOMEM if we checked the whole of the target address space.  */
411         if (addr == (abi_ulong)-1) {
412             return (abi_ulong)-1;
413         } else if (addr == 0) {
414             if (wrapped) {
415                 return (abi_ulong)-1;
416             }
417             wrapped = 1;
418             /* Don't actually use 0 when wrapping, instead indicate
419                that we'd truly like an allocation in low memory.  */
420             addr = (mmap_min_addr > TARGET_PAGE_SIZE
421                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
422                      : TARGET_PAGE_SIZE);
423         } else if (wrapped && addr >= start) {
424             return (abi_ulong)-1;
425         }
426     }
427 }
428 
429 /* NOTE: all the constants are the HOST ones */
430 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
431                      int flags, int fd, abi_ulong offset)
432 {
433     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
434     int page_flags, host_prot;
435 
436     mmap_lock();
437     trace_target_mmap(start, len, target_prot, flags, fd, offset);
438 
439     if (!len) {
440         errno = EINVAL;
441         goto fail;
442     }
443 
444     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
445     if (!page_flags) {
446         errno = EINVAL;
447         goto fail;
448     }
449 
450     /* Also check for overflows... */
451     len = TARGET_PAGE_ALIGN(len);
452     if (!len) {
453         errno = ENOMEM;
454         goto fail;
455     }
456 
457     if (offset & ~TARGET_PAGE_MASK) {
458         errno = EINVAL;
459         goto fail;
460     }
461 
462     /*
463      * If we're mapping shared memory, ensure we generate code for parallel
464      * execution and flush old translations.  This will work up to the level
465      * supported by the host -- anything that requires EXCP_ATOMIC will not
466      * be atomic with respect to an external process.
467      */
468     if (flags & MAP_SHARED) {
469         CPUState *cpu = thread_cpu;
470         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
471             cpu->tcg_cflags |= CF_PARALLEL;
472             tb_flush(cpu);
473         }
474     }
475 
476     real_start = start & qemu_host_page_mask;
477     host_offset = offset & qemu_host_page_mask;
478 
479     /* If the user is asking for the kernel to find a location, do that
480        before we truncate the length for mapping files below.  */
481     if (!(flags & MAP_FIXED)) {
482         host_len = len + offset - host_offset;
483         host_len = HOST_PAGE_ALIGN(host_len);
484         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
485         if (start == (abi_ulong)-1) {
486             errno = ENOMEM;
487             goto fail;
488         }
489     }
490 
491     /* When mapping files into a memory area larger than the file, accesses
492        to pages beyond the file size will cause a SIGBUS.
493 
494        For example, if mmaping a file of 100 bytes on a host with 4K pages
495        emulating a target with 8K pages, the target expects to be able to
496        access the first 8K. But the host will trap us on any access beyond
497        4K.
498 
499        When emulating a target with a larger page-size than the hosts, we
500        may need to truncate file maps at EOF and add extra anonymous pages
501        up to the targets page boundary.  */
502 
503     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
504         !(flags & MAP_ANONYMOUS)) {
505         struct stat sb;
506 
507        if (fstat (fd, &sb) == -1)
508            goto fail;
509 
510        /* Are we trying to create a map beyond EOF?.  */
511        if (offset + len > sb.st_size) {
512            /* If so, truncate the file map at eof aligned with
513               the hosts real pagesize. Additional anonymous maps
514               will be created beyond EOF.  */
515            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
516        }
517     }
518 
519     if (!(flags & MAP_FIXED)) {
520         unsigned long host_start;
521         void *p;
522 
523         host_len = len + offset - host_offset;
524         host_len = HOST_PAGE_ALIGN(host_len);
525 
526         /* Note: we prefer to control the mapping address. It is
527            especially important if qemu_host_page_size >
528            qemu_real_host_page_size */
529         p = mmap(g2h_untagged(start), host_len, host_prot,
530                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
531         if (p == MAP_FAILED) {
532             goto fail;
533         }
534         /* update start so that it points to the file position at 'offset' */
535         host_start = (unsigned long)p;
536         if (!(flags & MAP_ANONYMOUS)) {
537             p = mmap(g2h_untagged(start), len, host_prot,
538                      flags | MAP_FIXED, fd, host_offset);
539             if (p == MAP_FAILED) {
540                 munmap(g2h_untagged(start), host_len);
541                 goto fail;
542             }
543             host_start += offset - host_offset;
544         }
545         start = h2g(host_start);
546     } else {
547         if (start & ~TARGET_PAGE_MASK) {
548             errno = EINVAL;
549             goto fail;
550         }
551         end = start + len;
552         real_end = HOST_PAGE_ALIGN(end);
553 
554         /*
555          * Test if requested memory area fits target address space
556          * It can fail only on 64-bit host with 32-bit target.
557          * On any other target/host host mmap() handles this error correctly.
558          */
559         if (end < start || !guest_range_valid_untagged(start, len)) {
560             errno = ENOMEM;
561             goto fail;
562         }
563 
564         /* worst case: we cannot map the file because the offset is not
565            aligned, so we read it */
566         if (!(flags & MAP_ANONYMOUS) &&
567             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
568             /* msync() won't work here, so we return an error if write is
569                possible while it is a shared mapping */
570             if ((flags & MAP_TYPE) == MAP_SHARED &&
571                 (host_prot & PROT_WRITE)) {
572                 errno = EINVAL;
573                 goto fail;
574             }
575             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
576                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
577                                   -1, 0);
578             if (retaddr == -1)
579                 goto fail;
580             if (pread(fd, g2h_untagged(start), len, offset) == -1)
581                 goto fail;
582             if (!(host_prot & PROT_WRITE)) {
583                 ret = target_mprotect(start, len, target_prot);
584                 assert(ret == 0);
585             }
586             goto the_end;
587         }
588 
589         /* handle the start of the mapping */
590         if (start > real_start) {
591             if (real_end == real_start + qemu_host_page_size) {
592                 /* one single host page */
593                 ret = mmap_frag(real_start, start, end,
594                                 host_prot, flags, fd, offset);
595                 if (ret == -1)
596                     goto fail;
597                 goto the_end1;
598             }
599             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
600                             host_prot, flags, fd, offset);
601             if (ret == -1)
602                 goto fail;
603             real_start += qemu_host_page_size;
604         }
605         /* handle the end of the mapping */
606         if (end < real_end) {
607             ret = mmap_frag(real_end - qemu_host_page_size,
608                             real_end - qemu_host_page_size, end,
609                             host_prot, flags, fd,
610                             offset + real_end - qemu_host_page_size - start);
611             if (ret == -1)
612                 goto fail;
613             real_end -= qemu_host_page_size;
614         }
615 
616         /* map the middle (easier) */
617         if (real_start < real_end) {
618             void *p;
619             unsigned long offset1;
620             if (flags & MAP_ANONYMOUS)
621                 offset1 = 0;
622             else
623                 offset1 = offset + real_start - start;
624             p = mmap(g2h_untagged(real_start), real_end - real_start,
625                      host_prot, flags, fd, offset1);
626             if (p == MAP_FAILED)
627                 goto fail;
628         }
629     }
630  the_end1:
631     if (flags & MAP_ANONYMOUS) {
632         page_flags |= PAGE_ANON;
633     }
634     page_flags |= PAGE_RESET;
635     page_set_flags(start, start + len, page_flags);
636  the_end:
637     trace_target_mmap_complete(start);
638     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
639         FILE *f = qemu_log_trylock();
640         if (f) {
641             fprintf(f, "page layout changed following mmap\n");
642             page_dump(f);
643             qemu_log_unlock(f);
644         }
645     }
646     tb_invalidate_phys_range(start, start + len);
647     mmap_unlock();
648     return start;
649 fail:
650     mmap_unlock();
651     return -1;
652 }
653 
654 static void mmap_reserve(abi_ulong start, abi_ulong size)
655 {
656     abi_ulong real_start;
657     abi_ulong real_end;
658     abi_ulong addr;
659     abi_ulong end;
660     int prot;
661 
662     real_start = start & qemu_host_page_mask;
663     real_end = HOST_PAGE_ALIGN(start + size);
664     end = start + size;
665     if (start > real_start) {
666         /* handle host page containing start */
667         prot = 0;
668         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
669             prot |= page_get_flags(addr);
670         }
671         if (real_end == real_start + qemu_host_page_size) {
672             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
673                 prot |= page_get_flags(addr);
674             }
675             end = real_end;
676         }
677         if (prot != 0)
678             real_start += qemu_host_page_size;
679     }
680     if (end < real_end) {
681         prot = 0;
682         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
683             prot |= page_get_flags(addr);
684         }
685         if (prot != 0)
686             real_end -= qemu_host_page_size;
687     }
688     if (real_start != real_end) {
689         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
690                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
691                  -1, 0);
692     }
693 }
694 
695 int target_munmap(abi_ulong start, abi_ulong len)
696 {
697     abi_ulong end, real_start, real_end, addr;
698     int prot, ret;
699 
700     trace_target_munmap(start, len);
701 
702     if (start & ~TARGET_PAGE_MASK)
703         return -TARGET_EINVAL;
704     len = TARGET_PAGE_ALIGN(len);
705     if (len == 0 || !guest_range_valid_untagged(start, len)) {
706         return -TARGET_EINVAL;
707     }
708 
709     mmap_lock();
710     end = start + len;
711     real_start = start & qemu_host_page_mask;
712     real_end = HOST_PAGE_ALIGN(end);
713 
714     if (start > real_start) {
715         /* handle host page containing start */
716         prot = 0;
717         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
718             prot |= page_get_flags(addr);
719         }
720         if (real_end == real_start + qemu_host_page_size) {
721             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
722                 prot |= page_get_flags(addr);
723             }
724             end = real_end;
725         }
726         if (prot != 0)
727             real_start += qemu_host_page_size;
728     }
729     if (end < real_end) {
730         prot = 0;
731         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
732             prot |= page_get_flags(addr);
733         }
734         if (prot != 0)
735             real_end -= qemu_host_page_size;
736     }
737 
738     ret = 0;
739     /* unmap what we can */
740     if (real_start < real_end) {
741         if (reserved_va) {
742             mmap_reserve(real_start, real_end - real_start);
743         } else {
744             ret = munmap(g2h_untagged(real_start), real_end - real_start);
745         }
746     }
747 
748     if (ret == 0) {
749         page_set_flags(start, start + len, 0);
750         tb_invalidate_phys_range(start, start + len);
751     }
752     mmap_unlock();
753     return ret;
754 }
755 
756 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
757                        abi_ulong new_size, unsigned long flags,
758                        abi_ulong new_addr)
759 {
760     int prot;
761     void *host_addr;
762 
763     if (!guest_range_valid_untagged(old_addr, old_size) ||
764         ((flags & MREMAP_FIXED) &&
765          !guest_range_valid_untagged(new_addr, new_size)) ||
766         ((flags & MREMAP_MAYMOVE) == 0 &&
767          !guest_range_valid_untagged(old_addr, new_size))) {
768         errno = ENOMEM;
769         return -1;
770     }
771 
772     mmap_lock();
773 
774     if (flags & MREMAP_FIXED) {
775         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
776                            flags, g2h_untagged(new_addr));
777 
778         if (reserved_va && host_addr != MAP_FAILED) {
779             /* If new and old addresses overlap then the above mremap will
780                already have failed with EINVAL.  */
781             mmap_reserve(old_addr, old_size);
782         }
783     } else if (flags & MREMAP_MAYMOVE) {
784         abi_ulong mmap_start;
785 
786         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
787 
788         if (mmap_start == -1) {
789             errno = ENOMEM;
790             host_addr = MAP_FAILED;
791         } else {
792             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
793                                flags | MREMAP_FIXED,
794                                g2h_untagged(mmap_start));
795             if (reserved_va) {
796                 mmap_reserve(old_addr, old_size);
797             }
798         }
799     } else {
800         int prot = 0;
801         if (reserved_va && old_size < new_size) {
802             abi_ulong addr;
803             for (addr = old_addr + old_size;
804                  addr < old_addr + new_size;
805                  addr++) {
806                 prot |= page_get_flags(addr);
807             }
808         }
809         if (prot == 0) {
810             host_addr = mremap(g2h_untagged(old_addr),
811                                old_size, new_size, flags);
812 
813             if (host_addr != MAP_FAILED) {
814                 /* Check if address fits target address space */
815                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
816                     /* Revert mremap() changes */
817                     host_addr = mremap(g2h_untagged(old_addr),
818                                        new_size, old_size, flags);
819                     errno = ENOMEM;
820                     host_addr = MAP_FAILED;
821                 } else if (reserved_va && old_size > new_size) {
822                     mmap_reserve(old_addr + old_size, old_size - new_size);
823                 }
824             }
825         } else {
826             errno = ENOMEM;
827             host_addr = MAP_FAILED;
828         }
829     }
830 
831     if (host_addr == MAP_FAILED) {
832         new_addr = -1;
833     } else {
834         new_addr = h2g(host_addr);
835         prot = page_get_flags(old_addr);
836         page_set_flags(old_addr, old_addr + old_size, 0);
837         page_set_flags(new_addr, new_addr + new_size,
838                        prot | PAGE_VALID | PAGE_RESET);
839     }
840     tb_invalidate_phys_range(new_addr, new_addr + new_size);
841     mmap_unlock();
842     return new_addr;
843 }
844 
845 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
846 {
847     ulong addr;
848 
849     if ((start | end) & ~qemu_host_page_mask) {
850         return false;
851     }
852 
853     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
854         if (!(page_get_flags(addr) & PAGE_ANON)) {
855             return false;
856         }
857     }
858 
859     return true;
860 }
861 
862 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
863 {
864     abi_ulong len, end;
865     int ret = 0;
866 
867     if (start & ~TARGET_PAGE_MASK) {
868         return -TARGET_EINVAL;
869     }
870     len = TARGET_PAGE_ALIGN(len_in);
871 
872     if (len_in && !len) {
873         return -TARGET_EINVAL;
874     }
875 
876     end = start + len;
877     if (end < start) {
878         return -TARGET_EINVAL;
879     }
880 
881     if (end == start) {
882         return 0;
883     }
884 
885     if (!guest_range_valid_untagged(start, len)) {
886         return -TARGET_EINVAL;
887     }
888 
889     /*
890      * A straight passthrough may not be safe because qemu sometimes turns
891      * private file-backed mappings into anonymous mappings.
892      *
893      * This is a hint, so ignoring and returning success is ok.
894      *
895      * This breaks MADV_DONTNEED, completely implementing which is quite
896      * complicated. However, there is one low-hanging fruit: host-page-aligned
897      * anonymous mappings. In this case passthrough is safe, so do it.
898      */
899     mmap_lock();
900     if (advice == MADV_DONTNEED &&
901         can_passthrough_madv_dontneed(start, end)) {
902         ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
903         if (ret == 0) {
904             page_reset_target_data(start, start + len);
905         }
906     }
907     mmap_unlock();
908 
909     return ret;
910 }
911