xref: /openbmc/qemu/linux-user/mmap.c (revision d525f73f)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 
26 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
27 static __thread int mmap_lock_count;
28 
29 void mmap_lock(void)
30 {
31     if (mmap_lock_count++ == 0) {
32         pthread_mutex_lock(&mmap_mutex);
33     }
34 }
35 
36 void mmap_unlock(void)
37 {
38     if (--mmap_lock_count == 0) {
39         pthread_mutex_unlock(&mmap_mutex);
40     }
41 }
42 
43 bool have_mmap_lock(void)
44 {
45     return mmap_lock_count > 0 ? true : false;
46 }
47 
48 /* Grab lock to make sure things are in a consistent state after fork().  */
49 void mmap_fork_start(void)
50 {
51     if (mmap_lock_count)
52         abort();
53     pthread_mutex_lock(&mmap_mutex);
54 }
55 
56 void mmap_fork_end(int child)
57 {
58     if (child)
59         pthread_mutex_init(&mmap_mutex, NULL);
60     else
61         pthread_mutex_unlock(&mmap_mutex);
62 }
63 
64 /*
65  * Validate target prot bitmask.
66  * Return the prot bitmask for the host in *HOST_PROT.
67  * Return 0 if the target prot bitmask is invalid, otherwise
68  * the internal qemu page_flags (which will include PAGE_VALID).
69  */
70 static int validate_prot_to_pageflags(int *host_prot, int prot)
71 {
72     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
73     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
74 
75     /*
76      * For the host, we need not pass anything except read/write/exec.
77      * While PROT_SEM is allowed by all hosts, it is also ignored, so
78      * don't bother transforming guest bit to host bit.  Any other
79      * target-specific prot bits will not be understood by the host
80      * and will need to be encoded into page_flags for qemu emulation.
81      *
82      * Pages that are executable by the guest will never be executed
83      * by the host, but the host will need to be able to read them.
84      */
85     *host_prot = (prot & (PROT_READ | PROT_WRITE))
86                | (prot & PROT_EXEC ? PROT_READ : 0);
87 
88 #ifdef TARGET_AARCH64
89     {
90         ARMCPU *cpu = ARM_CPU(thread_cpu);
91 
92         /*
93          * The PROT_BTI bit is only accepted if the cpu supports the feature.
94          * Since this is the unusual case, don't bother checking unless
95          * the bit has been requested.  If set and valid, record the bit
96          * within QEMU's page_flags.
97          */
98         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
99             valid |= TARGET_PROT_BTI;
100             page_flags |= PAGE_BTI;
101         }
102         /* Similarly for the PROT_MTE bit. */
103         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
104             valid |= TARGET_PROT_MTE;
105             page_flags |= PAGE_MTE;
106         }
107     }
108 #endif
109 
110     return prot & ~valid ? 0 : page_flags;
111 }
112 
113 /* NOTE: all the constants are the HOST ones, but addresses are target. */
114 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
115 {
116     abi_ulong end, host_start, host_end, addr;
117     int prot1, ret, page_flags, host_prot;
118 
119     trace_target_mprotect(start, len, target_prot);
120 
121     if ((start & ~TARGET_PAGE_MASK) != 0) {
122         return -TARGET_EINVAL;
123     }
124     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
125     if (!page_flags) {
126         return -TARGET_EINVAL;
127     }
128     len = TARGET_PAGE_ALIGN(len);
129     end = start + len;
130     if (!guest_range_valid_untagged(start, len)) {
131         return -TARGET_ENOMEM;
132     }
133     if (len == 0) {
134         return 0;
135     }
136 
137     mmap_lock();
138     host_start = start & qemu_host_page_mask;
139     host_end = HOST_PAGE_ALIGN(end);
140     if (start > host_start) {
141         /* handle host page containing start */
142         prot1 = host_prot;
143         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
144             prot1 |= page_get_flags(addr);
145         }
146         if (host_end == host_start + qemu_host_page_size) {
147             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
148                 prot1 |= page_get_flags(addr);
149             }
150             end = host_end;
151         }
152         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
153                        prot1 & PAGE_BITS);
154         if (ret != 0) {
155             goto error;
156         }
157         host_start += qemu_host_page_size;
158     }
159     if (end < host_end) {
160         prot1 = host_prot;
161         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
162             prot1 |= page_get_flags(addr);
163         }
164         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
165                        qemu_host_page_size, prot1 & PAGE_BITS);
166         if (ret != 0) {
167             goto error;
168         }
169         host_end -= qemu_host_page_size;
170     }
171 
172     /* handle the pages in the middle */
173     if (host_start < host_end) {
174         ret = mprotect(g2h_untagged(host_start),
175                        host_end - host_start, host_prot);
176         if (ret != 0) {
177             goto error;
178         }
179     }
180 
181     page_set_flags(start, start + len, page_flags);
182     tb_invalidate_phys_range(start, start + len);
183     ret = 0;
184 
185 error:
186     mmap_unlock();
187     return ret;
188 }
189 
190 /* map an incomplete host page */
191 static int mmap_frag(abi_ulong real_start,
192                      abi_ulong start, abi_ulong end,
193                      int prot, int flags, int fd, abi_ulong offset)
194 {
195     abi_ulong real_end, addr;
196     void *host_start;
197     int prot1, prot_new;
198 
199     real_end = real_start + qemu_host_page_size;
200     host_start = g2h_untagged(real_start);
201 
202     /* get the protection of the target pages outside the mapping */
203     prot1 = 0;
204     for(addr = real_start; addr < real_end; addr++) {
205         if (addr < start || addr >= end)
206             prot1 |= page_get_flags(addr);
207     }
208 
209     if (prot1 == 0) {
210         /* no page was there, so we allocate one */
211         void *p = mmap(host_start, qemu_host_page_size, prot,
212                        flags | MAP_ANONYMOUS, -1, 0);
213         if (p == MAP_FAILED)
214             return -1;
215         prot1 = prot;
216     }
217     prot1 &= PAGE_BITS;
218 
219     prot_new = prot | prot1;
220     if (!(flags & MAP_ANONYMOUS)) {
221         /* msync() won't work here, so we return an error if write is
222            possible while it is a shared mapping */
223         if ((flags & MAP_TYPE) == MAP_SHARED &&
224             (prot & PROT_WRITE))
225             return -1;
226 
227         /* adjust protection to be able to read */
228         if (!(prot1 & PROT_WRITE))
229             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
230 
231         /* read the corresponding file data */
232         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
233             return -1;
234 
235         /* put final protection */
236         if (prot_new != (prot1 | PROT_WRITE))
237             mprotect(host_start, qemu_host_page_size, prot_new);
238     } else {
239         if (prot_new != prot1) {
240             mprotect(host_start, qemu_host_page_size, prot_new);
241         }
242         if (prot_new & PROT_WRITE) {
243             memset(g2h_untagged(start), 0, end - start);
244         }
245     }
246     return 0;
247 }
248 
249 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
250 #ifdef TARGET_AARCH64
251 # define TASK_UNMAPPED_BASE  0x5500000000
252 #else
253 # define TASK_UNMAPPED_BASE  (1ul << 38)
254 #endif
255 #else
256 # define TASK_UNMAPPED_BASE  0x40000000
257 #endif
258 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
259 
260 unsigned long last_brk;
261 
262 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
263    of guest address space.  */
264 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
265                                         abi_ulong align)
266 {
267     abi_ulong addr, end_addr, incr = qemu_host_page_size;
268     int prot;
269     bool looped = false;
270 
271     if (size > reserved_va) {
272         return (abi_ulong)-1;
273     }
274 
275     /* Note that start and size have already been aligned by mmap_find_vma. */
276 
277     end_addr = start + size;
278     if (start > reserved_va - size) {
279         /* Start at the top of the address space.  */
280         end_addr = ((reserved_va - size) & -align) + size;
281         looped = true;
282     }
283 
284     /* Search downward from END_ADDR, checking to see if a page is in use.  */
285     addr = end_addr;
286     while (1) {
287         addr -= incr;
288         if (addr > end_addr) {
289             if (looped) {
290                 /* Failure.  The entire address space has been searched.  */
291                 return (abi_ulong)-1;
292             }
293             /* Re-start at the top of the address space.  */
294             addr = end_addr = ((reserved_va - size) & -align) + size;
295             looped = true;
296         } else {
297             prot = page_get_flags(addr);
298             if (prot) {
299                 /* Page in use.  Restart below this page.  */
300                 addr = end_addr = ((addr - size) & -align) + size;
301             } else if (addr && addr + size == end_addr) {
302                 /* Success!  All pages between ADDR and END_ADDR are free.  */
303                 if (start == mmap_next_start) {
304                     mmap_next_start = addr;
305                 }
306                 return addr;
307             }
308         }
309     }
310 }
311 
312 /*
313  * Find and reserve a free memory area of size 'size'. The search
314  * starts at 'start'.
315  * It must be called with mmap_lock() held.
316  * Return -1 if error.
317  */
318 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
319 {
320     void *ptr, *prev;
321     abi_ulong addr;
322     int wrapped, repeat;
323 
324     align = MAX(align, qemu_host_page_size);
325 
326     /* If 'start' == 0, then a default start address is used. */
327     if (start == 0) {
328         start = mmap_next_start;
329     } else {
330         start &= qemu_host_page_mask;
331     }
332     start = ROUND_UP(start, align);
333 
334     size = HOST_PAGE_ALIGN(size);
335 
336     if (reserved_va) {
337         return mmap_find_vma_reserved(start, size, align);
338     }
339 
340     addr = start;
341     wrapped = repeat = 0;
342     prev = 0;
343 
344     for (;; prev = ptr) {
345         /*
346          * Reserve needed memory area to avoid a race.
347          * It should be discarded using:
348          *  - mmap() with MAP_FIXED flag
349          *  - mremap() with MREMAP_FIXED flag
350          *  - shmat() with SHM_REMAP flag
351          */
352         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
353                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
354 
355         /* ENOMEM, if host address space has no memory */
356         if (ptr == MAP_FAILED) {
357             return (abi_ulong)-1;
358         }
359 
360         /* Count the number of sequential returns of the same address.
361            This is used to modify the search algorithm below.  */
362         repeat = (ptr == prev ? repeat + 1 : 0);
363 
364         if (h2g_valid(ptr + size - 1)) {
365             addr = h2g(ptr);
366 
367             if ((addr & (align - 1)) == 0) {
368                 /* Success.  */
369                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
370                     mmap_next_start = addr + size;
371                 }
372                 return addr;
373             }
374 
375             /* The address is not properly aligned for the target.  */
376             switch (repeat) {
377             case 0:
378                 /* Assume the result that the kernel gave us is the
379                    first with enough free space, so start again at the
380                    next higher target page.  */
381                 addr = ROUND_UP(addr, align);
382                 break;
383             case 1:
384                 /* Sometimes the kernel decides to perform the allocation
385                    at the top end of memory instead.  */
386                 addr &= -align;
387                 break;
388             case 2:
389                 /* Start over at low memory.  */
390                 addr = 0;
391                 break;
392             default:
393                 /* Fail.  This unaligned block must the last.  */
394                 addr = -1;
395                 break;
396             }
397         } else {
398             /* Since the result the kernel gave didn't fit, start
399                again at low memory.  If any repetition, fail.  */
400             addr = (repeat ? -1 : 0);
401         }
402 
403         /* Unmap and try again.  */
404         munmap(ptr, size);
405 
406         /* ENOMEM if we checked the whole of the target address space.  */
407         if (addr == (abi_ulong)-1) {
408             return (abi_ulong)-1;
409         } else if (addr == 0) {
410             if (wrapped) {
411                 return (abi_ulong)-1;
412             }
413             wrapped = 1;
414             /* Don't actually use 0 when wrapping, instead indicate
415                that we'd truly like an allocation in low memory.  */
416             addr = (mmap_min_addr > TARGET_PAGE_SIZE
417                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
418                      : TARGET_PAGE_SIZE);
419         } else if (wrapped && addr >= start) {
420             return (abi_ulong)-1;
421         }
422     }
423 }
424 
425 /* NOTE: all the constants are the HOST ones */
426 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
427                      int flags, int fd, abi_ulong offset)
428 {
429     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
430     int page_flags, host_prot;
431 
432     mmap_lock();
433     trace_target_mmap(start, len, target_prot, flags, fd, offset);
434 
435     if (!len) {
436         errno = EINVAL;
437         goto fail;
438     }
439 
440     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
441     if (!page_flags) {
442         errno = EINVAL;
443         goto fail;
444     }
445 
446     /* Also check for overflows... */
447     len = TARGET_PAGE_ALIGN(len);
448     if (!len) {
449         errno = ENOMEM;
450         goto fail;
451     }
452 
453     if (offset & ~TARGET_PAGE_MASK) {
454         errno = EINVAL;
455         goto fail;
456     }
457 
458     /*
459      * If we're mapping shared memory, ensure we generate code for parallel
460      * execution and flush old translations.  This will work up to the level
461      * supported by the host -- anything that requires EXCP_ATOMIC will not
462      * be atomic with respect to an external process.
463      */
464     if (flags & MAP_SHARED) {
465         CPUState *cpu = thread_cpu;
466         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
467             cpu->tcg_cflags |= CF_PARALLEL;
468             tb_flush(cpu);
469         }
470     }
471 
472     real_start = start & qemu_host_page_mask;
473     host_offset = offset & qemu_host_page_mask;
474 
475     /* If the user is asking for the kernel to find a location, do that
476        before we truncate the length for mapping files below.  */
477     if (!(flags & MAP_FIXED)) {
478         host_len = len + offset - host_offset;
479         host_len = HOST_PAGE_ALIGN(host_len);
480         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
481         if (start == (abi_ulong)-1) {
482             errno = ENOMEM;
483             goto fail;
484         }
485     }
486 
487     /* When mapping files into a memory area larger than the file, accesses
488        to pages beyond the file size will cause a SIGBUS.
489 
490        For example, if mmaping a file of 100 bytes on a host with 4K pages
491        emulating a target with 8K pages, the target expects to be able to
492        access the first 8K. But the host will trap us on any access beyond
493        4K.
494 
495        When emulating a target with a larger page-size than the hosts, we
496        may need to truncate file maps at EOF and add extra anonymous pages
497        up to the targets page boundary.  */
498 
499     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
500         !(flags & MAP_ANONYMOUS)) {
501         struct stat sb;
502 
503        if (fstat (fd, &sb) == -1)
504            goto fail;
505 
506        /* Are we trying to create a map beyond EOF?.  */
507        if (offset + len > sb.st_size) {
508            /* If so, truncate the file map at eof aligned with
509               the hosts real pagesize. Additional anonymous maps
510               will be created beyond EOF.  */
511            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
512        }
513     }
514 
515     if (!(flags & MAP_FIXED)) {
516         unsigned long host_start;
517         void *p;
518 
519         host_len = len + offset - host_offset;
520         host_len = HOST_PAGE_ALIGN(host_len);
521 
522         /* Note: we prefer to control the mapping address. It is
523            especially important if qemu_host_page_size >
524            qemu_real_host_page_size */
525         p = mmap(g2h_untagged(start), host_len, host_prot,
526                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
527         if (p == MAP_FAILED) {
528             goto fail;
529         }
530         /* update start so that it points to the file position at 'offset' */
531         host_start = (unsigned long)p;
532         if (!(flags & MAP_ANONYMOUS)) {
533             p = mmap(g2h_untagged(start), len, host_prot,
534                      flags | MAP_FIXED, fd, host_offset);
535             if (p == MAP_FAILED) {
536                 munmap(g2h_untagged(start), host_len);
537                 goto fail;
538             }
539             host_start += offset - host_offset;
540         }
541         start = h2g(host_start);
542     } else {
543         if (start & ~TARGET_PAGE_MASK) {
544             errno = EINVAL;
545             goto fail;
546         }
547         end = start + len;
548         real_end = HOST_PAGE_ALIGN(end);
549 
550         /*
551          * Test if requested memory area fits target address space
552          * It can fail only on 64-bit host with 32-bit target.
553          * On any other target/host host mmap() handles this error correctly.
554          */
555         if (end < start || !guest_range_valid_untagged(start, len)) {
556             errno = ENOMEM;
557             goto fail;
558         }
559 
560         /* worst case: we cannot map the file because the offset is not
561            aligned, so we read it */
562         if (!(flags & MAP_ANONYMOUS) &&
563             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
564             /* msync() won't work here, so we return an error if write is
565                possible while it is a shared mapping */
566             if ((flags & MAP_TYPE) == MAP_SHARED &&
567                 (host_prot & PROT_WRITE)) {
568                 errno = EINVAL;
569                 goto fail;
570             }
571             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
572                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
573                                   -1, 0);
574             if (retaddr == -1)
575                 goto fail;
576             if (pread(fd, g2h_untagged(start), len, offset) == -1)
577                 goto fail;
578             if (!(host_prot & PROT_WRITE)) {
579                 ret = target_mprotect(start, len, target_prot);
580                 assert(ret == 0);
581             }
582             goto the_end;
583         }
584 
585         /* handle the start of the mapping */
586         if (start > real_start) {
587             if (real_end == real_start + qemu_host_page_size) {
588                 /* one single host page */
589                 ret = mmap_frag(real_start, start, end,
590                                 host_prot, flags, fd, offset);
591                 if (ret == -1)
592                     goto fail;
593                 goto the_end1;
594             }
595             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
596                             host_prot, flags, fd, offset);
597             if (ret == -1)
598                 goto fail;
599             real_start += qemu_host_page_size;
600         }
601         /* handle the end of the mapping */
602         if (end < real_end) {
603             ret = mmap_frag(real_end - qemu_host_page_size,
604                             real_end - qemu_host_page_size, end,
605                             host_prot, flags, fd,
606                             offset + real_end - qemu_host_page_size - start);
607             if (ret == -1)
608                 goto fail;
609             real_end -= qemu_host_page_size;
610         }
611 
612         /* map the middle (easier) */
613         if (real_start < real_end) {
614             void *p;
615             unsigned long offset1;
616             if (flags & MAP_ANONYMOUS)
617                 offset1 = 0;
618             else
619                 offset1 = offset + real_start - start;
620             p = mmap(g2h_untagged(real_start), real_end - real_start,
621                      host_prot, flags, fd, offset1);
622             if (p == MAP_FAILED)
623                 goto fail;
624         }
625     }
626  the_end1:
627     if (flags & MAP_ANONYMOUS) {
628         page_flags |= PAGE_ANON;
629     }
630     page_flags |= PAGE_RESET;
631     page_set_flags(start, start + len, page_flags);
632  the_end:
633     trace_target_mmap_complete(start);
634     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
635         FILE *f = qemu_log_trylock();
636         if (f) {
637             fprintf(f, "page layout changed following mmap\n");
638             page_dump(f);
639             qemu_log_unlock(f);
640         }
641     }
642     tb_invalidate_phys_range(start, start + len);
643     mmap_unlock();
644     return start;
645 fail:
646     mmap_unlock();
647     return -1;
648 }
649 
650 static void mmap_reserve(abi_ulong start, abi_ulong size)
651 {
652     abi_ulong real_start;
653     abi_ulong real_end;
654     abi_ulong addr;
655     abi_ulong end;
656     int prot;
657 
658     real_start = start & qemu_host_page_mask;
659     real_end = HOST_PAGE_ALIGN(start + size);
660     end = start + size;
661     if (start > real_start) {
662         /* handle host page containing start */
663         prot = 0;
664         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
665             prot |= page_get_flags(addr);
666         }
667         if (real_end == real_start + qemu_host_page_size) {
668             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
669                 prot |= page_get_flags(addr);
670             }
671             end = real_end;
672         }
673         if (prot != 0)
674             real_start += qemu_host_page_size;
675     }
676     if (end < real_end) {
677         prot = 0;
678         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
679             prot |= page_get_flags(addr);
680         }
681         if (prot != 0)
682             real_end -= qemu_host_page_size;
683     }
684     if (real_start != real_end) {
685         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
686                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
687                  -1, 0);
688     }
689 }
690 
691 int target_munmap(abi_ulong start, abi_ulong len)
692 {
693     abi_ulong end, real_start, real_end, addr;
694     int prot, ret;
695 
696     trace_target_munmap(start, len);
697 
698     if (start & ~TARGET_PAGE_MASK)
699         return -TARGET_EINVAL;
700     len = TARGET_PAGE_ALIGN(len);
701     if (len == 0 || !guest_range_valid_untagged(start, len)) {
702         return -TARGET_EINVAL;
703     }
704 
705     mmap_lock();
706     end = start + len;
707     real_start = start & qemu_host_page_mask;
708     real_end = HOST_PAGE_ALIGN(end);
709 
710     if (start > real_start) {
711         /* handle host page containing start */
712         prot = 0;
713         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
714             prot |= page_get_flags(addr);
715         }
716         if (real_end == real_start + qemu_host_page_size) {
717             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
718                 prot |= page_get_flags(addr);
719             }
720             end = real_end;
721         }
722         if (prot != 0)
723             real_start += qemu_host_page_size;
724     }
725     if (end < real_end) {
726         prot = 0;
727         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
728             prot |= page_get_flags(addr);
729         }
730         if (prot != 0)
731             real_end -= qemu_host_page_size;
732     }
733 
734     ret = 0;
735     /* unmap what we can */
736     if (real_start < real_end) {
737         if (reserved_va) {
738             mmap_reserve(real_start, real_end - real_start);
739         } else {
740             ret = munmap(g2h_untagged(real_start), real_end - real_start);
741         }
742     }
743 
744     if (ret == 0) {
745         page_set_flags(start, start + len, 0);
746         tb_invalidate_phys_range(start, start + len);
747     }
748     mmap_unlock();
749     return ret;
750 }
751 
752 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
753                        abi_ulong new_size, unsigned long flags,
754                        abi_ulong new_addr)
755 {
756     int prot;
757     void *host_addr;
758 
759     if (!guest_range_valid_untagged(old_addr, old_size) ||
760         ((flags & MREMAP_FIXED) &&
761          !guest_range_valid_untagged(new_addr, new_size)) ||
762         ((flags & MREMAP_MAYMOVE) == 0 &&
763          !guest_range_valid_untagged(old_addr, new_size))) {
764         errno = ENOMEM;
765         return -1;
766     }
767 
768     mmap_lock();
769 
770     if (flags & MREMAP_FIXED) {
771         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
772                            flags, g2h_untagged(new_addr));
773 
774         if (reserved_va && host_addr != MAP_FAILED) {
775             /* If new and old addresses overlap then the above mremap will
776                already have failed with EINVAL.  */
777             mmap_reserve(old_addr, old_size);
778         }
779     } else if (flags & MREMAP_MAYMOVE) {
780         abi_ulong mmap_start;
781 
782         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
783 
784         if (mmap_start == -1) {
785             errno = ENOMEM;
786             host_addr = MAP_FAILED;
787         } else {
788             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
789                                flags | MREMAP_FIXED,
790                                g2h_untagged(mmap_start));
791             if (reserved_va) {
792                 mmap_reserve(old_addr, old_size);
793             }
794         }
795     } else {
796         int prot = 0;
797         if (reserved_va && old_size < new_size) {
798             abi_ulong addr;
799             for (addr = old_addr + old_size;
800                  addr < old_addr + new_size;
801                  addr++) {
802                 prot |= page_get_flags(addr);
803             }
804         }
805         if (prot == 0) {
806             host_addr = mremap(g2h_untagged(old_addr),
807                                old_size, new_size, flags);
808 
809             if (host_addr != MAP_FAILED) {
810                 /* Check if address fits target address space */
811                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
812                     /* Revert mremap() changes */
813                     host_addr = mremap(g2h_untagged(old_addr),
814                                        new_size, old_size, flags);
815                     errno = ENOMEM;
816                     host_addr = MAP_FAILED;
817                 } else if (reserved_va && old_size > new_size) {
818                     mmap_reserve(old_addr + old_size, old_size - new_size);
819                 }
820             }
821         } else {
822             errno = ENOMEM;
823             host_addr = MAP_FAILED;
824         }
825     }
826 
827     if (host_addr == MAP_FAILED) {
828         new_addr = -1;
829     } else {
830         new_addr = h2g(host_addr);
831         prot = page_get_flags(old_addr);
832         page_set_flags(old_addr, old_addr + old_size, 0);
833         page_set_flags(new_addr, new_addr + new_size,
834                        prot | PAGE_VALID | PAGE_RESET);
835     }
836     tb_invalidate_phys_range(new_addr, new_addr + new_size);
837     mmap_unlock();
838     return new_addr;
839 }
840 
841 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
842 {
843     ulong addr;
844 
845     if ((start | end) & ~qemu_host_page_mask) {
846         return false;
847     }
848 
849     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
850         if (!(page_get_flags(addr) & PAGE_ANON)) {
851             return false;
852         }
853     }
854 
855     return true;
856 }
857 
858 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
859 {
860     abi_ulong len, end;
861     int ret = 0;
862 
863     if (start & ~TARGET_PAGE_MASK) {
864         return -TARGET_EINVAL;
865     }
866     len = TARGET_PAGE_ALIGN(len_in);
867 
868     if (len_in && !len) {
869         return -TARGET_EINVAL;
870     }
871 
872     end = start + len;
873     if (end < start) {
874         return -TARGET_EINVAL;
875     }
876 
877     if (end == start) {
878         return 0;
879     }
880 
881     if (!guest_range_valid_untagged(start, len)) {
882         return -TARGET_EINVAL;
883     }
884 
885     /*
886      * A straight passthrough may not be safe because qemu sometimes turns
887      * private file-backed mappings into anonymous mappings.
888      *
889      * This is a hint, so ignoring and returning success is ok.
890      *
891      * This breaks MADV_DONTNEED, completely implementing which is quite
892      * complicated. However, there is one low-hanging fruit: host-page-aligned
893      * anonymous mappings. In this case passthrough is safe, so do it.
894      */
895     mmap_lock();
896     if (advice == MADV_DONTNEED &&
897         can_passthrough_madv_dontneed(start, end)) {
898         ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
899         if (ret == 0) {
900             page_reset_target_data(start, start + len);
901         }
902     }
903     mmap_unlock();
904 
905     return ret;
906 }
907