xref: /openbmc/qemu/linux-user/mmap.c (revision 8c6631e6)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child)
60         pthread_mutex_init(&mmap_mutex, NULL);
61     else
62         pthread_mutex_unlock(&mmap_mutex);
63 }
64 
65 /*
66  * Validate target prot bitmask.
67  * Return the prot bitmask for the host in *HOST_PROT.
68  * Return 0 if the target prot bitmask is invalid, otherwise
69  * the internal qemu page_flags (which will include PAGE_VALID).
70  */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75 
76     /*
77      * For the host, we need not pass anything except read/write/exec.
78      * While PROT_SEM is allowed by all hosts, it is also ignored, so
79      * don't bother transforming guest bit to host bit.  Any other
80      * target-specific prot bits will not be understood by the host
81      * and will need to be encoded into page_flags for qemu emulation.
82      *
83      * Pages that are executable by the guest will never be executed
84      * by the host, but the host will need to be able to read them.
85      */
86     *host_prot = (prot & (PROT_READ | PROT_WRITE))
87                | (prot & PROT_EXEC ? PROT_READ : 0);
88 
89 #ifdef TARGET_AARCH64
90     {
91         ARMCPU *cpu = ARM_CPU(thread_cpu);
92 
93         /*
94          * The PROT_BTI bit is only accepted if the cpu supports the feature.
95          * Since this is the unusual case, don't bother checking unless
96          * the bit has been requested.  If set and valid, record the bit
97          * within QEMU's page_flags.
98          */
99         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100             valid |= TARGET_PROT_BTI;
101             page_flags |= PAGE_BTI;
102         }
103         /* Similarly for the PROT_MTE bit. */
104         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105             valid |= TARGET_PROT_MTE;
106             page_flags |= PAGE_MTE;
107         }
108     }
109 #elif defined(TARGET_HPPA)
110     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
112 
113     return prot & ~valid ? 0 : page_flags;
114 }
115 
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118 {
119     abi_ulong end, host_start, host_end, addr;
120     int prot1, ret, page_flags, host_prot;
121 
122     trace_target_mprotect(start, len, target_prot);
123 
124     if ((start & ~TARGET_PAGE_MASK) != 0) {
125         return -TARGET_EINVAL;
126     }
127     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128     if (!page_flags) {
129         return -TARGET_EINVAL;
130     }
131     len = TARGET_PAGE_ALIGN(len);
132     end = start + len;
133     if (!guest_range_valid_untagged(start, len)) {
134         return -TARGET_ENOMEM;
135     }
136     if (len == 0) {
137         return 0;
138     }
139 
140     mmap_lock();
141     host_start = start & qemu_host_page_mask;
142     host_end = HOST_PAGE_ALIGN(end);
143     if (start > host_start) {
144         /* handle host page containing start */
145         prot1 = host_prot;
146         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147             prot1 |= page_get_flags(addr);
148         }
149         if (host_end == host_start + qemu_host_page_size) {
150             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151                 prot1 |= page_get_flags(addr);
152             }
153             end = host_end;
154         }
155         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156                        prot1 & PAGE_BITS);
157         if (ret != 0) {
158             goto error;
159         }
160         host_start += qemu_host_page_size;
161     }
162     if (end < host_end) {
163         prot1 = host_prot;
164         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165             prot1 |= page_get_flags(addr);
166         }
167         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168                        qemu_host_page_size, prot1 & PAGE_BITS);
169         if (ret != 0) {
170             goto error;
171         }
172         host_end -= qemu_host_page_size;
173     }
174 
175     /* handle the pages in the middle */
176     if (host_start < host_end) {
177         ret = mprotect(g2h_untagged(host_start),
178                        host_end - host_start, host_prot);
179         if (ret != 0) {
180             goto error;
181         }
182     }
183 
184     page_set_flags(start, start + len, page_flags);
185     ret = 0;
186 
187 error:
188     mmap_unlock();
189     return ret;
190 }
191 
192 /* map an incomplete host page */
193 static int mmap_frag(abi_ulong real_start,
194                      abi_ulong start, abi_ulong end,
195                      int prot, int flags, int fd, abi_ulong offset)
196 {
197     abi_ulong real_end, addr;
198     void *host_start;
199     int prot1, prot_new;
200 
201     real_end = real_start + qemu_host_page_size;
202     host_start = g2h_untagged(real_start);
203 
204     /* get the protection of the target pages outside the mapping */
205     prot1 = 0;
206     for(addr = real_start; addr < real_end; addr++) {
207         if (addr < start || addr >= end)
208             prot1 |= page_get_flags(addr);
209     }
210 
211     if (prot1 == 0) {
212         /* no page was there, so we allocate one */
213         void *p = mmap(host_start, qemu_host_page_size, prot,
214                        flags | MAP_ANONYMOUS, -1, 0);
215         if (p == MAP_FAILED)
216             return -1;
217         prot1 = prot;
218     }
219     prot1 &= PAGE_BITS;
220 
221     prot_new = prot | prot1;
222     if (!(flags & MAP_ANONYMOUS)) {
223         /* msync() won't work here, so we return an error if write is
224            possible while it is a shared mapping */
225         if ((flags & MAP_TYPE) == MAP_SHARED &&
226             (prot & PROT_WRITE))
227             return -1;
228 
229         /* adjust protection to be able to read */
230         if (!(prot1 & PROT_WRITE))
231             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
232 
233         /* read the corresponding file data */
234         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
235             return -1;
236 
237         /* put final protection */
238         if (prot_new != (prot1 | PROT_WRITE))
239             mprotect(host_start, qemu_host_page_size, prot_new);
240     } else {
241         if (prot_new != prot1) {
242             mprotect(host_start, qemu_host_page_size, prot_new);
243         }
244         if (prot_new & PROT_WRITE) {
245             memset(g2h_untagged(start), 0, end - start);
246         }
247     }
248     return 0;
249 }
250 
251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
252 #ifdef TARGET_AARCH64
253 # define TASK_UNMAPPED_BASE  0x5500000000
254 #else
255 # define TASK_UNMAPPED_BASE  (1ul << 38)
256 #endif
257 #else
258 #ifdef TARGET_HPPA
259 # define TASK_UNMAPPED_BASE  0xfa000000
260 #else
261 # define TASK_UNMAPPED_BASE  0x40000000
262 #endif
263 #endif
264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
265 
266 unsigned long last_brk;
267 
268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269    of guest address space.  */
270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271                                         abi_ulong align)
272 {
273     abi_ulong addr, end_addr, incr = qemu_host_page_size;
274     int prot;
275     bool looped = false;
276 
277     if (size > reserved_va) {
278         return (abi_ulong)-1;
279     }
280 
281     /* Note that start and size have already been aligned by mmap_find_vma. */
282 
283     end_addr = start + size;
284     if (start > reserved_va - size) {
285         /* Start at the top of the address space.  */
286         end_addr = ((reserved_va - size) & -align) + size;
287         looped = true;
288     }
289 
290     /* Search downward from END_ADDR, checking to see if a page is in use.  */
291     addr = end_addr;
292     while (1) {
293         addr -= incr;
294         if (addr > end_addr) {
295             if (looped) {
296                 /* Failure.  The entire address space has been searched.  */
297                 return (abi_ulong)-1;
298             }
299             /* Re-start at the top of the address space.  */
300             addr = end_addr = ((reserved_va - size) & -align) + size;
301             looped = true;
302         } else {
303             prot = page_get_flags(addr);
304             if (prot) {
305                 /* Page in use.  Restart below this page.  */
306                 addr = end_addr = ((addr - size) & -align) + size;
307             } else if (addr && addr + size == end_addr) {
308                 /* Success!  All pages between ADDR and END_ADDR are free.  */
309                 if (start == mmap_next_start) {
310                     mmap_next_start = addr;
311                 }
312                 return addr;
313             }
314         }
315     }
316 }
317 
318 /*
319  * Find and reserve a free memory area of size 'size'. The search
320  * starts at 'start'.
321  * It must be called with mmap_lock() held.
322  * Return -1 if error.
323  */
324 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
325 {
326     void *ptr, *prev;
327     abi_ulong addr;
328     int wrapped, repeat;
329 
330     align = MAX(align, qemu_host_page_size);
331 
332     /* If 'start' == 0, then a default start address is used. */
333     if (start == 0) {
334         start = mmap_next_start;
335     } else {
336         start &= qemu_host_page_mask;
337     }
338     start = ROUND_UP(start, align);
339 
340     size = HOST_PAGE_ALIGN(size);
341 
342     if (reserved_va) {
343         return mmap_find_vma_reserved(start, size, align);
344     }
345 
346     addr = start;
347     wrapped = repeat = 0;
348     prev = 0;
349 
350     for (;; prev = ptr) {
351         /*
352          * Reserve needed memory area to avoid a race.
353          * It should be discarded using:
354          *  - mmap() with MAP_FIXED flag
355          *  - mremap() with MREMAP_FIXED flag
356          *  - shmat() with SHM_REMAP flag
357          */
358         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
359                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
360 
361         /* ENOMEM, if host address space has no memory */
362         if (ptr == MAP_FAILED) {
363             return (abi_ulong)-1;
364         }
365 
366         /* Count the number of sequential returns of the same address.
367            This is used to modify the search algorithm below.  */
368         repeat = (ptr == prev ? repeat + 1 : 0);
369 
370         if (h2g_valid(ptr + size - 1)) {
371             addr = h2g(ptr);
372 
373             if ((addr & (align - 1)) == 0) {
374                 /* Success.  */
375                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
376                     mmap_next_start = addr + size;
377                 }
378                 return addr;
379             }
380 
381             /* The address is not properly aligned for the target.  */
382             switch (repeat) {
383             case 0:
384                 /* Assume the result that the kernel gave us is the
385                    first with enough free space, so start again at the
386                    next higher target page.  */
387                 addr = ROUND_UP(addr, align);
388                 break;
389             case 1:
390                 /* Sometimes the kernel decides to perform the allocation
391                    at the top end of memory instead.  */
392                 addr &= -align;
393                 break;
394             case 2:
395                 /* Start over at low memory.  */
396                 addr = 0;
397                 break;
398             default:
399                 /* Fail.  This unaligned block must the last.  */
400                 addr = -1;
401                 break;
402             }
403         } else {
404             /* Since the result the kernel gave didn't fit, start
405                again at low memory.  If any repetition, fail.  */
406             addr = (repeat ? -1 : 0);
407         }
408 
409         /* Unmap and try again.  */
410         munmap(ptr, size);
411 
412         /* ENOMEM if we checked the whole of the target address space.  */
413         if (addr == (abi_ulong)-1) {
414             return (abi_ulong)-1;
415         } else if (addr == 0) {
416             if (wrapped) {
417                 return (abi_ulong)-1;
418             }
419             wrapped = 1;
420             /* Don't actually use 0 when wrapping, instead indicate
421                that we'd truly like an allocation in low memory.  */
422             addr = (mmap_min_addr > TARGET_PAGE_SIZE
423                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
424                      : TARGET_PAGE_SIZE);
425         } else if (wrapped && addr >= start) {
426             return (abi_ulong)-1;
427         }
428     }
429 }
430 
431 /* NOTE: all the constants are the HOST ones */
432 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
433                      int flags, int fd, abi_ulong offset)
434 {
435     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
436               passthrough_start = -1, passthrough_end = -1;
437     int page_flags, host_prot;
438 
439     mmap_lock();
440     trace_target_mmap(start, len, target_prot, flags, fd, offset);
441 
442     if (!len) {
443         errno = EINVAL;
444         goto fail;
445     }
446 
447     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
448     if (!page_flags) {
449         errno = EINVAL;
450         goto fail;
451     }
452 
453     /* Also check for overflows... */
454     len = TARGET_PAGE_ALIGN(len);
455     if (!len) {
456         errno = ENOMEM;
457         goto fail;
458     }
459 
460     if (offset & ~TARGET_PAGE_MASK) {
461         errno = EINVAL;
462         goto fail;
463     }
464 
465     /*
466      * If we're mapping shared memory, ensure we generate code for parallel
467      * execution and flush old translations.  This will work up to the level
468      * supported by the host -- anything that requires EXCP_ATOMIC will not
469      * be atomic with respect to an external process.
470      */
471     if (flags & MAP_SHARED) {
472         CPUState *cpu = thread_cpu;
473         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
474             cpu->tcg_cflags |= CF_PARALLEL;
475             tb_flush(cpu);
476         }
477     }
478 
479     real_start = start & qemu_host_page_mask;
480     host_offset = offset & qemu_host_page_mask;
481 
482     /* If the user is asking for the kernel to find a location, do that
483        before we truncate the length for mapping files below.  */
484     if (!(flags & MAP_FIXED)) {
485         host_len = len + offset - host_offset;
486         host_len = HOST_PAGE_ALIGN(host_len);
487         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
488         if (start == (abi_ulong)-1) {
489             errno = ENOMEM;
490             goto fail;
491         }
492     }
493 
494     /* When mapping files into a memory area larger than the file, accesses
495        to pages beyond the file size will cause a SIGBUS.
496 
497        For example, if mmaping a file of 100 bytes on a host with 4K pages
498        emulating a target with 8K pages, the target expects to be able to
499        access the first 8K. But the host will trap us on any access beyond
500        4K.
501 
502        When emulating a target with a larger page-size than the hosts, we
503        may need to truncate file maps at EOF and add extra anonymous pages
504        up to the targets page boundary.  */
505 
506     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
507         !(flags & MAP_ANONYMOUS)) {
508         struct stat sb;
509 
510        if (fstat (fd, &sb) == -1)
511            goto fail;
512 
513        /* Are we trying to create a map beyond EOF?.  */
514        if (offset + len > sb.st_size) {
515            /* If so, truncate the file map at eof aligned with
516               the hosts real pagesize. Additional anonymous maps
517               will be created beyond EOF.  */
518            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
519        }
520     }
521 
522     if (!(flags & MAP_FIXED)) {
523         unsigned long host_start;
524         void *p;
525 
526         host_len = len + offset - host_offset;
527         host_len = HOST_PAGE_ALIGN(host_len);
528 
529         /* Note: we prefer to control the mapping address. It is
530            especially important if qemu_host_page_size >
531            qemu_real_host_page_size */
532         p = mmap(g2h_untagged(start), host_len, host_prot,
533                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
534         if (p == MAP_FAILED) {
535             goto fail;
536         }
537         /* update start so that it points to the file position at 'offset' */
538         host_start = (unsigned long)p;
539         if (!(flags & MAP_ANONYMOUS)) {
540             p = mmap(g2h_untagged(start), len, host_prot,
541                      flags | MAP_FIXED, fd, host_offset);
542             if (p == MAP_FAILED) {
543                 munmap(g2h_untagged(start), host_len);
544                 goto fail;
545             }
546             host_start += offset - host_offset;
547         }
548         start = h2g(host_start);
549         passthrough_start = start;
550         passthrough_end = start + len;
551     } else {
552         if (start & ~TARGET_PAGE_MASK) {
553             errno = EINVAL;
554             goto fail;
555         }
556         end = start + len;
557         real_end = HOST_PAGE_ALIGN(end);
558 
559         /*
560          * Test if requested memory area fits target address space
561          * It can fail only on 64-bit host with 32-bit target.
562          * On any other target/host host mmap() handles this error correctly.
563          */
564         if (end < start || !guest_range_valid_untagged(start, len)) {
565             errno = ENOMEM;
566             goto fail;
567         }
568 
569         /* worst case: we cannot map the file because the offset is not
570            aligned, so we read it */
571         if (!(flags & MAP_ANONYMOUS) &&
572             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
573             /* msync() won't work here, so we return an error if write is
574                possible while it is a shared mapping */
575             if ((flags & MAP_TYPE) == MAP_SHARED &&
576                 (host_prot & PROT_WRITE)) {
577                 errno = EINVAL;
578                 goto fail;
579             }
580             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
581                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
582                                   -1, 0);
583             if (retaddr == -1)
584                 goto fail;
585             if (pread(fd, g2h_untagged(start), len, offset) == -1)
586                 goto fail;
587             if (!(host_prot & PROT_WRITE)) {
588                 ret = target_mprotect(start, len, target_prot);
589                 assert(ret == 0);
590             }
591             goto the_end;
592         }
593 
594         /* handle the start of the mapping */
595         if (start > real_start) {
596             if (real_end == real_start + qemu_host_page_size) {
597                 /* one single host page */
598                 ret = mmap_frag(real_start, start, end,
599                                 host_prot, flags, fd, offset);
600                 if (ret == -1)
601                     goto fail;
602                 goto the_end1;
603             }
604             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
605                             host_prot, flags, fd, offset);
606             if (ret == -1)
607                 goto fail;
608             real_start += qemu_host_page_size;
609         }
610         /* handle the end of the mapping */
611         if (end < real_end) {
612             ret = mmap_frag(real_end - qemu_host_page_size,
613                             real_end - qemu_host_page_size, end,
614                             host_prot, flags, fd,
615                             offset + real_end - qemu_host_page_size - start);
616             if (ret == -1)
617                 goto fail;
618             real_end -= qemu_host_page_size;
619         }
620 
621         /* map the middle (easier) */
622         if (real_start < real_end) {
623             void *p;
624             unsigned long offset1;
625             if (flags & MAP_ANONYMOUS)
626                 offset1 = 0;
627             else
628                 offset1 = offset + real_start - start;
629             p = mmap(g2h_untagged(real_start), real_end - real_start,
630                      host_prot, flags, fd, offset1);
631             if (p == MAP_FAILED)
632                 goto fail;
633             passthrough_start = real_start;
634             passthrough_end = real_end;
635         }
636     }
637  the_end1:
638     if (flags & MAP_ANONYMOUS) {
639         page_flags |= PAGE_ANON;
640     }
641     page_flags |= PAGE_RESET;
642     if (passthrough_start == passthrough_end) {
643         page_set_flags(start, start + len, page_flags);
644     } else {
645         if (start < passthrough_start) {
646             page_set_flags(start, passthrough_start, page_flags);
647         }
648         page_set_flags(passthrough_start, passthrough_end,
649                        page_flags | PAGE_PASSTHROUGH);
650         if (passthrough_end < start + len) {
651             page_set_flags(passthrough_end, start + len, page_flags);
652         }
653     }
654  the_end:
655     trace_target_mmap_complete(start);
656     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
657         FILE *f = qemu_log_trylock();
658         if (f) {
659             fprintf(f, "page layout changed following mmap\n");
660             page_dump(f);
661             qemu_log_unlock(f);
662         }
663     }
664     mmap_unlock();
665     return start;
666 fail:
667     mmap_unlock();
668     return -1;
669 }
670 
671 static void mmap_reserve(abi_ulong start, abi_ulong size)
672 {
673     abi_ulong real_start;
674     abi_ulong real_end;
675     abi_ulong addr;
676     abi_ulong end;
677     int prot;
678 
679     real_start = start & qemu_host_page_mask;
680     real_end = HOST_PAGE_ALIGN(start + size);
681     end = start + size;
682     if (start > real_start) {
683         /* handle host page containing start */
684         prot = 0;
685         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
686             prot |= page_get_flags(addr);
687         }
688         if (real_end == real_start + qemu_host_page_size) {
689             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
690                 prot |= page_get_flags(addr);
691             }
692             end = real_end;
693         }
694         if (prot != 0)
695             real_start += qemu_host_page_size;
696     }
697     if (end < real_end) {
698         prot = 0;
699         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
700             prot |= page_get_flags(addr);
701         }
702         if (prot != 0)
703             real_end -= qemu_host_page_size;
704     }
705     if (real_start != real_end) {
706         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
707                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
708                  -1, 0);
709     }
710 }
711 
712 int target_munmap(abi_ulong start, abi_ulong len)
713 {
714     abi_ulong end, real_start, real_end, addr;
715     int prot, ret;
716 
717     trace_target_munmap(start, len);
718 
719     if (start & ~TARGET_PAGE_MASK)
720         return -TARGET_EINVAL;
721     len = TARGET_PAGE_ALIGN(len);
722     if (len == 0 || !guest_range_valid_untagged(start, len)) {
723         return -TARGET_EINVAL;
724     }
725 
726     mmap_lock();
727     end = start + len;
728     real_start = start & qemu_host_page_mask;
729     real_end = HOST_PAGE_ALIGN(end);
730 
731     if (start > real_start) {
732         /* handle host page containing start */
733         prot = 0;
734         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
735             prot |= page_get_flags(addr);
736         }
737         if (real_end == real_start + qemu_host_page_size) {
738             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
739                 prot |= page_get_flags(addr);
740             }
741             end = real_end;
742         }
743         if (prot != 0)
744             real_start += qemu_host_page_size;
745     }
746     if (end < real_end) {
747         prot = 0;
748         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
749             prot |= page_get_flags(addr);
750         }
751         if (prot != 0)
752             real_end -= qemu_host_page_size;
753     }
754 
755     ret = 0;
756     /* unmap what we can */
757     if (real_start < real_end) {
758         if (reserved_va) {
759             mmap_reserve(real_start, real_end - real_start);
760         } else {
761             ret = munmap(g2h_untagged(real_start), real_end - real_start);
762         }
763     }
764 
765     if (ret == 0) {
766         page_set_flags(start, start + len, 0);
767     }
768     mmap_unlock();
769     return ret;
770 }
771 
772 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
773                        abi_ulong new_size, unsigned long flags,
774                        abi_ulong new_addr)
775 {
776     int prot;
777     void *host_addr;
778 
779     if (!guest_range_valid_untagged(old_addr, old_size) ||
780         ((flags & MREMAP_FIXED) &&
781          !guest_range_valid_untagged(new_addr, new_size)) ||
782         ((flags & MREMAP_MAYMOVE) == 0 &&
783          !guest_range_valid_untagged(old_addr, new_size))) {
784         errno = ENOMEM;
785         return -1;
786     }
787 
788     mmap_lock();
789 
790     if (flags & MREMAP_FIXED) {
791         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
792                            flags, g2h_untagged(new_addr));
793 
794         if (reserved_va && host_addr != MAP_FAILED) {
795             /* If new and old addresses overlap then the above mremap will
796                already have failed with EINVAL.  */
797             mmap_reserve(old_addr, old_size);
798         }
799     } else if (flags & MREMAP_MAYMOVE) {
800         abi_ulong mmap_start;
801 
802         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
803 
804         if (mmap_start == -1) {
805             errno = ENOMEM;
806             host_addr = MAP_FAILED;
807         } else {
808             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
809                                flags | MREMAP_FIXED,
810                                g2h_untagged(mmap_start));
811             if (reserved_va) {
812                 mmap_reserve(old_addr, old_size);
813             }
814         }
815     } else {
816         int prot = 0;
817         if (reserved_va && old_size < new_size) {
818             abi_ulong addr;
819             for (addr = old_addr + old_size;
820                  addr < old_addr + new_size;
821                  addr++) {
822                 prot |= page_get_flags(addr);
823             }
824         }
825         if (prot == 0) {
826             host_addr = mremap(g2h_untagged(old_addr),
827                                old_size, new_size, flags);
828 
829             if (host_addr != MAP_FAILED) {
830                 /* Check if address fits target address space */
831                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
832                     /* Revert mremap() changes */
833                     host_addr = mremap(g2h_untagged(old_addr),
834                                        new_size, old_size, flags);
835                     errno = ENOMEM;
836                     host_addr = MAP_FAILED;
837                 } else if (reserved_va && old_size > new_size) {
838                     mmap_reserve(old_addr + old_size, old_size - new_size);
839                 }
840             }
841         } else {
842             errno = ENOMEM;
843             host_addr = MAP_FAILED;
844         }
845     }
846 
847     if (host_addr == MAP_FAILED) {
848         new_addr = -1;
849     } else {
850         new_addr = h2g(host_addr);
851         prot = page_get_flags(old_addr);
852         page_set_flags(old_addr, old_addr + old_size, 0);
853         page_set_flags(new_addr, new_addr + new_size,
854                        prot | PAGE_VALID | PAGE_RESET);
855     }
856     mmap_unlock();
857     return new_addr;
858 }
859 
860 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
861 {
862     ulong addr;
863 
864     if ((start | end) & ~qemu_host_page_mask) {
865         return false;
866     }
867 
868     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
869         if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
870             return false;
871         }
872     }
873 
874     return true;
875 }
876 
877 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
878 {
879     abi_ulong len, end;
880     int ret = 0;
881 
882     if (start & ~TARGET_PAGE_MASK) {
883         return -TARGET_EINVAL;
884     }
885     len = TARGET_PAGE_ALIGN(len_in);
886 
887     if (len_in && !len) {
888         return -TARGET_EINVAL;
889     }
890 
891     end = start + len;
892     if (end < start) {
893         return -TARGET_EINVAL;
894     }
895 
896     if (end == start) {
897         return 0;
898     }
899 
900     if (!guest_range_valid_untagged(start, len)) {
901         return -TARGET_EINVAL;
902     }
903 
904     /* Translate for some architectures which have different MADV_xxx values */
905     switch (advice) {
906     case TARGET_MADV_DONTNEED:      /* alpha */
907         advice = MADV_DONTNEED;
908         break;
909     case TARGET_MADV_WIPEONFORK:    /* parisc */
910         advice = MADV_WIPEONFORK;
911         break;
912     case TARGET_MADV_KEEPONFORK:    /* parisc */
913         advice = MADV_KEEPONFORK;
914         break;
915     /* we do not care about the other MADV_xxx values yet */
916     }
917 
918     /*
919      * Most advice values are hints, so ignoring and returning success is ok.
920      *
921      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
922      * MADV_KEEPONFORK are not hints and need to be emulated.
923      *
924      * A straight passthrough for those may not be safe because qemu sometimes
925      * turns private file-backed mappings into anonymous mappings.
926      * can_passthrough_madvise() helps to check if a passthrough is possible by
927      * comparing mappings that are known to have the same semantics in the host
928      * and the guest. In this case passthrough is safe.
929      *
930      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
931      * return failure if not.
932      *
933      * MADV_DONTNEED is passed through as well, if possible.
934      * If passthrough isn't possible, we nevertheless (wrongly!) return
935      * success, which is broken but some userspace programs fail to work
936      * otherwise. Completely implementing such emulation is quite complicated
937      * though.
938      */
939     mmap_lock();
940     switch (advice) {
941     case MADV_WIPEONFORK:
942     case MADV_KEEPONFORK:
943         ret = -EINVAL;
944         /* fall through */
945     case MADV_DONTNEED:
946         if (can_passthrough_madvise(start, end)) {
947             ret = get_errno(madvise(g2h_untagged(start), len, advice));
948             if ((advice == MADV_DONTNEED) && (ret == 0)) {
949                 page_reset_target_data(start, start + len);
950             }
951         }
952     }
953     mmap_unlock();
954 
955     return ret;
956 }
957