xref: /openbmc/qemu/linux-user/mmap.c (revision 3c4a8a8f)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     assert(mmap_lock_count > 0);
40     if (--mmap_lock_count == 0) {
41         pthread_mutex_unlock(&mmap_mutex);
42     }
43 }
44 
45 bool have_mmap_lock(void)
46 {
47     return mmap_lock_count > 0 ? true : false;
48 }
49 
50 /* Grab lock to make sure things are in a consistent state after fork().  */
51 void mmap_fork_start(void)
52 {
53     if (mmap_lock_count)
54         abort();
55     pthread_mutex_lock(&mmap_mutex);
56 }
57 
58 void mmap_fork_end(int child)
59 {
60     if (child) {
61         pthread_mutex_init(&mmap_mutex, NULL);
62     } else {
63         pthread_mutex_unlock(&mmap_mutex);
64     }
65 }
66 
67 /*
68  * Validate target prot bitmask.
69  * Return the prot bitmask for the host in *HOST_PROT.
70  * Return 0 if the target prot bitmask is invalid, otherwise
71  * the internal qemu page_flags (which will include PAGE_VALID).
72  */
73 static int validate_prot_to_pageflags(int prot)
74 {
75     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
76     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77 
78 #ifdef TARGET_AARCH64
79     {
80         ARMCPU *cpu = ARM_CPU(thread_cpu);
81 
82         /*
83          * The PROT_BTI bit is only accepted if the cpu supports the feature.
84          * Since this is the unusual case, don't bother checking unless
85          * the bit has been requested.  If set and valid, record the bit
86          * within QEMU's page_flags.
87          */
88         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
89             valid |= TARGET_PROT_BTI;
90             page_flags |= PAGE_BTI;
91         }
92         /* Similarly for the PROT_MTE bit. */
93         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
94             valid |= TARGET_PROT_MTE;
95             page_flags |= PAGE_MTE;
96         }
97     }
98 #elif defined(TARGET_HPPA)
99     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
100 #endif
101 
102     return prot & ~valid ? 0 : page_flags;
103 }
104 
105 /*
106  * For the host, we need not pass anything except read/write/exec.
107  * While PROT_SEM is allowed by all hosts, it is also ignored, so
108  * don't bother transforming guest bit to host bit.  Any other
109  * target-specific prot bits will not be understood by the host
110  * and will need to be encoded into page_flags for qemu emulation.
111  *
112  * Pages that are executable by the guest will never be executed
113  * by the host, but the host will need to be able to read them.
114  */
115 static int target_to_host_prot(int prot)
116 {
117     return (prot & (PROT_READ | PROT_WRITE)) |
118            (prot & PROT_EXEC ? PROT_READ : 0);
119 }
120 
121 /* NOTE: all the constants are the HOST ones, but addresses are target. */
122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
123 {
124     abi_ulong starts[3];
125     abi_ulong lens[3];
126     int prots[3];
127     abi_ulong host_start, host_last, last;
128     int prot1, ret, page_flags, nranges;
129 
130     trace_target_mprotect(start, len, target_prot);
131 
132     if ((start & ~TARGET_PAGE_MASK) != 0) {
133         return -TARGET_EINVAL;
134     }
135     page_flags = validate_prot_to_pageflags(target_prot);
136     if (!page_flags) {
137         return -TARGET_EINVAL;
138     }
139     if (len == 0) {
140         return 0;
141     }
142     len = TARGET_PAGE_ALIGN(len);
143     if (!guest_range_valid_untagged(start, len)) {
144         return -TARGET_ENOMEM;
145     }
146 
147     last = start + len - 1;
148     host_start = start & qemu_host_page_mask;
149     host_last = HOST_PAGE_ALIGN(last) - 1;
150     nranges = 0;
151 
152     mmap_lock();
153 
154     if (host_last - host_start < qemu_host_page_size) {
155         /* Single host page contains all guest pages: sum the prot. */
156         prot1 = target_prot;
157         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
158             prot1 |= page_get_flags(a);
159         }
160         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
161             prot1 |= page_get_flags(a + 1);
162         }
163         starts[nranges] = host_start;
164         lens[nranges] = qemu_host_page_size;
165         prots[nranges] = prot1;
166         nranges++;
167     } else {
168         if (host_start < start) {
169             /* Host page contains more than one guest page: sum the prot. */
170             prot1 = target_prot;
171             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
172                 prot1 |= page_get_flags(a);
173             }
174             /* If the resulting sum differs, create a new range. */
175             if (prot1 != target_prot) {
176                 starts[nranges] = host_start;
177                 lens[nranges] = qemu_host_page_size;
178                 prots[nranges] = prot1;
179                 nranges++;
180                 host_start += qemu_host_page_size;
181             }
182         }
183 
184         if (last < host_last) {
185             /* Host page contains more than one guest page: sum the prot. */
186             prot1 = target_prot;
187             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
188                 prot1 |= page_get_flags(a + 1);
189             }
190             /* If the resulting sum differs, create a new range. */
191             if (prot1 != target_prot) {
192                 host_last -= qemu_host_page_size;
193                 starts[nranges] = host_last + 1;
194                 lens[nranges] = qemu_host_page_size;
195                 prots[nranges] = prot1;
196                 nranges++;
197             }
198         }
199 
200         /* Create a range for the middle, if any remains. */
201         if (host_start < host_last) {
202             starts[nranges] = host_start;
203             lens[nranges] = host_last - host_start + 1;
204             prots[nranges] = target_prot;
205             nranges++;
206         }
207     }
208 
209     for (int i = 0; i < nranges; ++i) {
210         ret = mprotect(g2h_untagged(starts[i]), lens[i],
211                        target_to_host_prot(prots[i]));
212         if (ret != 0) {
213             goto error;
214         }
215     }
216 
217     page_set_flags(start, last, page_flags);
218     ret = 0;
219 
220  error:
221     mmap_unlock();
222     return ret;
223 }
224 
225 /* map an incomplete host page */
226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
227                       int prot, int flags, int fd, off_t offset)
228 {
229     abi_ulong real_last;
230     void *host_start;
231     int prot_old, prot_new;
232     int host_prot_old, host_prot_new;
233 
234     if (!(flags & MAP_ANONYMOUS)
235         && (flags & MAP_TYPE) == MAP_SHARED
236         && (prot & PROT_WRITE)) {
237         /*
238          * msync() won't work with the partial page, so we return an
239          * error if write is possible while it is a shared mapping.
240          */
241         errno = EINVAL;
242         return false;
243     }
244 
245     real_last = real_start + qemu_host_page_size - 1;
246     host_start = g2h_untagged(real_start);
247 
248     /* Get the protection of the target pages outside the mapping. */
249     prot_old = 0;
250     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
251         prot_old |= page_get_flags(a);
252     }
253     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
254         prot_old |= page_get_flags(a);
255     }
256 
257     if (prot_old == 0) {
258         /*
259          * Since !(prot_old & PAGE_VALID), there were no guest pages
260          * outside of the fragment we need to map.  Allocate a new host
261          * page to cover, discarding whatever else may have been present.
262          */
263         void *p = mmap(host_start, qemu_host_page_size,
264                        target_to_host_prot(prot),
265                        flags | MAP_ANONYMOUS, -1, 0);
266         if (p != host_start) {
267             if (p != MAP_FAILED) {
268                 munmap(p, qemu_host_page_size);
269                 errno = EEXIST;
270             }
271             return false;
272         }
273         prot_old = prot;
274     }
275     prot_new = prot | prot_old;
276 
277     host_prot_old = target_to_host_prot(prot_old);
278     host_prot_new = target_to_host_prot(prot_new);
279 
280     /* Adjust protection to be able to write. */
281     if (!(host_prot_old & PROT_WRITE)) {
282         host_prot_old |= PROT_WRITE;
283         mprotect(host_start, qemu_host_page_size, host_prot_old);
284     }
285 
286     /* Read or zero the new guest pages. */
287     if (flags & MAP_ANONYMOUS) {
288         memset(g2h_untagged(start), 0, last - start + 1);
289     } else {
290         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
291             return false;
292         }
293     }
294 
295     /* Put final protection */
296     if (host_prot_new != host_prot_old) {
297         mprotect(host_start, qemu_host_page_size, host_prot_new);
298     }
299     return true;
300 }
301 
302 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
303 #ifdef TARGET_AARCH64
304 # define TASK_UNMAPPED_BASE  0x5500000000
305 #else
306 # define TASK_UNMAPPED_BASE  (1ul << 38)
307 #endif
308 #else
309 #ifdef TARGET_HPPA
310 # define TASK_UNMAPPED_BASE  0xfa000000
311 #else
312 # define TASK_UNMAPPED_BASE  0x40000000
313 #endif
314 #endif
315 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
316 
317 /*
318  * Subroutine of mmap_find_vma, used when we have pre-allocated
319  * a chunk of guest address space.
320  */
321 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
322                                         abi_ulong align)
323 {
324     target_ulong ret;
325 
326     ret = page_find_range_empty(start, reserved_va, size, align);
327     if (ret == -1 && start > mmap_min_addr) {
328         /* Restart at the beginning of the address space. */
329         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
330     }
331 
332     return ret;
333 }
334 
335 /*
336  * Find and reserve a free memory area of size 'size'. The search
337  * starts at 'start'.
338  * It must be called with mmap_lock() held.
339  * Return -1 if error.
340  */
341 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
342 {
343     void *ptr, *prev;
344     abi_ulong addr;
345     int wrapped, repeat;
346 
347     align = MAX(align, qemu_host_page_size);
348 
349     /* If 'start' == 0, then a default start address is used. */
350     if (start == 0) {
351         start = mmap_next_start;
352     } else {
353         start &= qemu_host_page_mask;
354     }
355     start = ROUND_UP(start, align);
356 
357     size = HOST_PAGE_ALIGN(size);
358 
359     if (reserved_va) {
360         return mmap_find_vma_reserved(start, size, align);
361     }
362 
363     addr = start;
364     wrapped = repeat = 0;
365     prev = 0;
366 
367     for (;; prev = ptr) {
368         /*
369          * Reserve needed memory area to avoid a race.
370          * It should be discarded using:
371          *  - mmap() with MAP_FIXED flag
372          *  - mremap() with MREMAP_FIXED flag
373          *  - shmat() with SHM_REMAP flag
374          */
375         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
376                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
377 
378         /* ENOMEM, if host address space has no memory */
379         if (ptr == MAP_FAILED) {
380             return (abi_ulong)-1;
381         }
382 
383         /*
384          * Count the number of sequential returns of the same address.
385          * This is used to modify the search algorithm below.
386          */
387         repeat = (ptr == prev ? repeat + 1 : 0);
388 
389         if (h2g_valid(ptr + size - 1)) {
390             addr = h2g(ptr);
391 
392             if ((addr & (align - 1)) == 0) {
393                 /* Success.  */
394                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
395                     mmap_next_start = addr + size;
396                 }
397                 return addr;
398             }
399 
400             /* The address is not properly aligned for the target.  */
401             switch (repeat) {
402             case 0:
403                 /*
404                  * Assume the result that the kernel gave us is the
405                  * first with enough free space, so start again at the
406                  * next higher target page.
407                  */
408                 addr = ROUND_UP(addr, align);
409                 break;
410             case 1:
411                 /*
412                  * Sometimes the kernel decides to perform the allocation
413                  * at the top end of memory instead.
414                  */
415                 addr &= -align;
416                 break;
417             case 2:
418                 /* Start over at low memory.  */
419                 addr = 0;
420                 break;
421             default:
422                 /* Fail.  This unaligned block must the last.  */
423                 addr = -1;
424                 break;
425             }
426         } else {
427             /*
428              * Since the result the kernel gave didn't fit, start
429              * again at low memory.  If any repetition, fail.
430              */
431             addr = (repeat ? -1 : 0);
432         }
433 
434         /* Unmap and try again.  */
435         munmap(ptr, size);
436 
437         /* ENOMEM if we checked the whole of the target address space.  */
438         if (addr == (abi_ulong)-1) {
439             return (abi_ulong)-1;
440         } else if (addr == 0) {
441             if (wrapped) {
442                 return (abi_ulong)-1;
443             }
444             wrapped = 1;
445             /*
446              * Don't actually use 0 when wrapping, instead indicate
447              * that we'd truly like an allocation in low memory.
448              */
449             addr = (mmap_min_addr > TARGET_PAGE_SIZE
450                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
451                      : TARGET_PAGE_SIZE);
452         } else if (wrapped && addr >= start) {
453             return (abi_ulong)-1;
454         }
455     }
456 }
457 
458 /* NOTE: all the constants are the HOST ones */
459 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
460                      int flags, int fd, off_t offset)
461 {
462     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
463     abi_ulong passthrough_start = -1, passthrough_last = 0;
464     int page_flags;
465     off_t host_offset;
466 
467     mmap_lock();
468     trace_target_mmap(start, len, target_prot, flags, fd, offset);
469 
470     if (!len) {
471         errno = EINVAL;
472         goto fail;
473     }
474 
475     page_flags = validate_prot_to_pageflags(target_prot);
476     if (!page_flags) {
477         errno = EINVAL;
478         goto fail;
479     }
480 
481     /* Also check for overflows... */
482     len = TARGET_PAGE_ALIGN(len);
483     if (!len) {
484         errno = ENOMEM;
485         goto fail;
486     }
487 
488     if (offset & ~TARGET_PAGE_MASK) {
489         errno = EINVAL;
490         goto fail;
491     }
492 
493     /*
494      * If we're mapping shared memory, ensure we generate code for parallel
495      * execution and flush old translations.  This will work up to the level
496      * supported by the host -- anything that requires EXCP_ATOMIC will not
497      * be atomic with respect to an external process.
498      */
499     if (flags & MAP_SHARED) {
500         CPUState *cpu = thread_cpu;
501         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
502             cpu->tcg_cflags |= CF_PARALLEL;
503             tb_flush(cpu);
504         }
505     }
506 
507     real_start = start & qemu_host_page_mask;
508     host_offset = offset & qemu_host_page_mask;
509 
510     /*
511      * If the user is asking for the kernel to find a location, do that
512      * before we truncate the length for mapping files below.
513      */
514     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
515         host_len = len + offset - host_offset;
516         host_len = HOST_PAGE_ALIGN(host_len);
517         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
518         if (start == (abi_ulong)-1) {
519             errno = ENOMEM;
520             goto fail;
521         }
522     }
523 
524     /*
525      * When mapping files into a memory area larger than the file, accesses
526      * to pages beyond the file size will cause a SIGBUS.
527      *
528      * For example, if mmaping a file of 100 bytes on a host with 4K pages
529      * emulating a target with 8K pages, the target expects to be able to
530      * access the first 8K. But the host will trap us on any access beyond
531      * 4K.
532      *
533      * When emulating a target with a larger page-size than the hosts, we
534      * may need to truncate file maps at EOF and add extra anonymous pages
535      * up to the targets page boundary.
536      */
537     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
538         !(flags & MAP_ANONYMOUS)) {
539         struct stat sb;
540 
541         if (fstat(fd, &sb) == -1) {
542             goto fail;
543         }
544 
545         /* Are we trying to create a map beyond EOF?.  */
546         if (offset + len > sb.st_size) {
547             /*
548              * If so, truncate the file map at eof aligned with
549              * the hosts real pagesize. Additional anonymous maps
550              * will be created beyond EOF.
551              */
552             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
553         }
554     }
555 
556     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
557         uintptr_t host_start;
558         int host_prot;
559         void *p;
560 
561         host_len = len + offset - host_offset;
562         host_len = HOST_PAGE_ALIGN(host_len);
563         host_prot = target_to_host_prot(target_prot);
564 
565         /*
566          * Note: we prefer to control the mapping address. It is
567          * especially important if qemu_host_page_size >
568          * qemu_real_host_page_size.
569          */
570         p = mmap(g2h_untagged(start), host_len, host_prot,
571                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
572         if (p == MAP_FAILED) {
573             goto fail;
574         }
575         /* update start so that it points to the file position at 'offset' */
576         host_start = (uintptr_t)p;
577         if (!(flags & MAP_ANONYMOUS)) {
578             p = mmap(g2h_untagged(start), len, host_prot,
579                      flags | MAP_FIXED, fd, host_offset);
580             if (p == MAP_FAILED) {
581                 munmap(g2h_untagged(start), host_len);
582                 goto fail;
583             }
584             host_start += offset - host_offset;
585         }
586         start = h2g(host_start);
587         last = start + len - 1;
588         passthrough_start = start;
589         passthrough_last = last;
590     } else {
591         if (start & ~TARGET_PAGE_MASK) {
592             errno = EINVAL;
593             goto fail;
594         }
595         last = start + len - 1;
596         real_last = HOST_PAGE_ALIGN(last) - 1;
597 
598         /*
599          * Test if requested memory area fits target address space
600          * It can fail only on 64-bit host with 32-bit target.
601          * On any other target/host host mmap() handles this error correctly.
602          */
603         if (last < start || !guest_range_valid_untagged(start, len)) {
604             errno = ENOMEM;
605             goto fail;
606         }
607 
608         if (flags & MAP_FIXED_NOREPLACE) {
609             /* Validate that the chosen range is empty. */
610             if (!page_check_range_empty(start, last)) {
611                 errno = EEXIST;
612                 goto fail;
613             }
614 
615             /*
616              * With reserved_va, the entire address space is mmaped in the
617              * host to ensure it isn't accidentally used for something else.
618              * We have just checked that the guest address is not mapped
619              * within the guest, but need to replace the host reservation.
620              *
621              * Without reserved_va, despite the guest address check above,
622              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
623              * any host address mappings.
624              */
625             if (reserved_va) {
626                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
627             }
628         }
629 
630         /*
631          * worst case: we cannot map the file because the offset is not
632          * aligned, so we read it
633          */
634         if (!(flags & MAP_ANONYMOUS) &&
635             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
636             /*
637              * msync() won't work here, so we return an error if write is
638              * possible while it is a shared mapping
639              */
640             if ((flags & MAP_TYPE) == MAP_SHARED
641                 && (target_prot & PROT_WRITE)) {
642                 errno = EINVAL;
643                 goto fail;
644             }
645             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
646                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
647                                   | MAP_PRIVATE | MAP_ANONYMOUS,
648                                   -1, 0);
649             if (retaddr == -1) {
650                 goto fail;
651             }
652             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
653                 goto fail;
654             }
655             if (!(target_prot & PROT_WRITE)) {
656                 ret = target_mprotect(start, len, target_prot);
657                 assert(ret == 0);
658             }
659             goto the_end;
660         }
661 
662         /* handle the start of the mapping */
663         if (start > real_start) {
664             if (real_last == real_start + qemu_host_page_size - 1) {
665                 /* one single host page */
666                 if (!mmap_frag(real_start, start, last,
667                                target_prot, flags, fd, offset)) {
668                     goto fail;
669                 }
670                 goto the_end1;
671             }
672             if (!mmap_frag(real_start, start,
673                            real_start + qemu_host_page_size - 1,
674                            target_prot, flags, fd, offset)) {
675                 goto fail;
676             }
677             real_start += qemu_host_page_size;
678         }
679         /* handle the end of the mapping */
680         if (last < real_last) {
681             abi_ulong real_page = real_last - qemu_host_page_size + 1;
682             if (!mmap_frag(real_page, real_page, last,
683                            target_prot, flags, fd,
684                            offset + real_page - start)) {
685                 goto fail;
686             }
687             real_last -= qemu_host_page_size;
688         }
689 
690         /* map the middle (easier) */
691         if (real_start < real_last) {
692             void *p, *want_p;
693             off_t offset1;
694             size_t len1;
695 
696             if (flags & MAP_ANONYMOUS) {
697                 offset1 = 0;
698             } else {
699                 offset1 = offset + real_start - start;
700             }
701             len1 = real_last - real_start + 1;
702             want_p = g2h_untagged(real_start);
703 
704             p = mmap(want_p, len1, target_to_host_prot(target_prot),
705                      flags, fd, offset1);
706             if (p != want_p) {
707                 if (p != MAP_FAILED) {
708                     munmap(p, len1);
709                     errno = EEXIST;
710                 }
711                 goto fail;
712             }
713             passthrough_start = real_start;
714             passthrough_last = real_last;
715         }
716     }
717  the_end1:
718     if (flags & MAP_ANONYMOUS) {
719         page_flags |= PAGE_ANON;
720     }
721     page_flags |= PAGE_RESET;
722     if (passthrough_start > passthrough_last) {
723         page_set_flags(start, last, page_flags);
724     } else {
725         if (start < passthrough_start) {
726             page_set_flags(start, passthrough_start - 1, page_flags);
727         }
728         page_set_flags(passthrough_start, passthrough_last,
729                        page_flags | PAGE_PASSTHROUGH);
730         if (passthrough_last < last) {
731             page_set_flags(passthrough_last + 1, last, page_flags);
732         }
733     }
734  the_end:
735     trace_target_mmap_complete(start);
736     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
737         FILE *f = qemu_log_trylock();
738         if (f) {
739             fprintf(f, "page layout changed following mmap\n");
740             page_dump(f);
741             qemu_log_unlock(f);
742         }
743     }
744     mmap_unlock();
745     return start;
746 fail:
747     mmap_unlock();
748     return -1;
749 }
750 
751 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
752 {
753     abi_ulong real_start;
754     abi_ulong real_last;
755     abi_ulong real_len;
756     abi_ulong last;
757     abi_ulong a;
758     void *host_start;
759     int prot;
760 
761     last = start + len - 1;
762     real_start = start & qemu_host_page_mask;
763     real_last = HOST_PAGE_ALIGN(last) - 1;
764 
765     /*
766      * If guest pages remain on the first or last host pages,
767      * adjust the deallocation to retain those guest pages.
768      * The single page special case is required for the last page,
769      * lest real_start overflow to zero.
770      */
771     if (real_last - real_start < qemu_host_page_size) {
772         prot = 0;
773         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
774             prot |= page_get_flags(a);
775         }
776         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
777             prot |= page_get_flags(a + 1);
778         }
779         if (prot != 0) {
780             return;
781         }
782     } else {
783         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
784             prot |= page_get_flags(a);
785         }
786         if (prot != 0) {
787             real_start += qemu_host_page_size;
788         }
789 
790         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
791             prot |= page_get_flags(a + 1);
792         }
793         if (prot != 0) {
794             real_last -= qemu_host_page_size;
795         }
796 
797         if (real_last < real_start) {
798             return;
799         }
800     }
801 
802     real_len = real_last - real_start + 1;
803     host_start = g2h_untagged(real_start);
804 
805     if (reserved_va) {
806         void *ptr = mmap(host_start, real_len, PROT_NONE,
807                          MAP_FIXED | MAP_ANONYMOUS
808                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
809         assert(ptr == host_start);
810     } else {
811         int ret = munmap(host_start, real_len);
812         assert(ret == 0);
813     }
814 }
815 
816 int target_munmap(abi_ulong start, abi_ulong len)
817 {
818     trace_target_munmap(start, len);
819 
820     if (start & ~TARGET_PAGE_MASK) {
821         return -TARGET_EINVAL;
822     }
823     len = TARGET_PAGE_ALIGN(len);
824     if (len == 0 || !guest_range_valid_untagged(start, len)) {
825         return -TARGET_EINVAL;
826     }
827 
828     mmap_lock();
829     mmap_reserve_or_unmap(start, len);
830     page_set_flags(start, start + len - 1, 0);
831     mmap_unlock();
832 
833     return 0;
834 }
835 
836 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
837                        abi_ulong new_size, unsigned long flags,
838                        abi_ulong new_addr)
839 {
840     int prot;
841     void *host_addr;
842 
843     if (!guest_range_valid_untagged(old_addr, old_size) ||
844         ((flags & MREMAP_FIXED) &&
845          !guest_range_valid_untagged(new_addr, new_size)) ||
846         ((flags & MREMAP_MAYMOVE) == 0 &&
847          !guest_range_valid_untagged(old_addr, new_size))) {
848         errno = ENOMEM;
849         return -1;
850     }
851 
852     mmap_lock();
853 
854     if (flags & MREMAP_FIXED) {
855         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
856                            flags, g2h_untagged(new_addr));
857 
858         if (reserved_va && host_addr != MAP_FAILED) {
859             /*
860              * If new and old addresses overlap then the above mremap will
861              * already have failed with EINVAL.
862              */
863             mmap_reserve_or_unmap(old_addr, old_size);
864         }
865     } else if (flags & MREMAP_MAYMOVE) {
866         abi_ulong mmap_start;
867 
868         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
869 
870         if (mmap_start == -1) {
871             errno = ENOMEM;
872             host_addr = MAP_FAILED;
873         } else {
874             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
875                                flags | MREMAP_FIXED,
876                                g2h_untagged(mmap_start));
877             if (reserved_va) {
878                 mmap_reserve_or_unmap(old_addr, old_size);
879             }
880         }
881     } else {
882         int prot = 0;
883         if (reserved_va && old_size < new_size) {
884             abi_ulong addr;
885             for (addr = old_addr + old_size;
886                  addr < old_addr + new_size;
887                  addr++) {
888                 prot |= page_get_flags(addr);
889             }
890         }
891         if (prot == 0) {
892             host_addr = mremap(g2h_untagged(old_addr),
893                                old_size, new_size, flags);
894 
895             if (host_addr != MAP_FAILED) {
896                 /* Check if address fits target address space */
897                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
898                     /* Revert mremap() changes */
899                     host_addr = mremap(g2h_untagged(old_addr),
900                                        new_size, old_size, flags);
901                     errno = ENOMEM;
902                     host_addr = MAP_FAILED;
903                 } else if (reserved_va && old_size > new_size) {
904                     mmap_reserve_or_unmap(old_addr + old_size,
905                                           old_size - new_size);
906                 }
907             }
908         } else {
909             errno = ENOMEM;
910             host_addr = MAP_FAILED;
911         }
912     }
913 
914     if (host_addr == MAP_FAILED) {
915         new_addr = -1;
916     } else {
917         new_addr = h2g(host_addr);
918         prot = page_get_flags(old_addr);
919         page_set_flags(old_addr, old_addr + old_size - 1, 0);
920         page_set_flags(new_addr, new_addr + new_size - 1,
921                        prot | PAGE_VALID | PAGE_RESET);
922     }
923     mmap_unlock();
924     return new_addr;
925 }
926 
927 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
928 {
929     abi_ulong len;
930     int ret = 0;
931 
932     if (start & ~TARGET_PAGE_MASK) {
933         return -TARGET_EINVAL;
934     }
935     if (len_in == 0) {
936         return 0;
937     }
938     len = TARGET_PAGE_ALIGN(len_in);
939     if (len == 0 || !guest_range_valid_untagged(start, len)) {
940         return -TARGET_EINVAL;
941     }
942 
943     /* Translate for some architectures which have different MADV_xxx values */
944     switch (advice) {
945     case TARGET_MADV_DONTNEED:      /* alpha */
946         advice = MADV_DONTNEED;
947         break;
948     case TARGET_MADV_WIPEONFORK:    /* parisc */
949         advice = MADV_WIPEONFORK;
950         break;
951     case TARGET_MADV_KEEPONFORK:    /* parisc */
952         advice = MADV_KEEPONFORK;
953         break;
954     /* we do not care about the other MADV_xxx values yet */
955     }
956 
957     /*
958      * Most advice values are hints, so ignoring and returning success is ok.
959      *
960      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
961      * MADV_KEEPONFORK are not hints and need to be emulated.
962      *
963      * A straight passthrough for those may not be safe because qemu sometimes
964      * turns private file-backed mappings into anonymous mappings.
965      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
966      * same semantics for the host as for the guest.
967      *
968      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
969      * return failure if not.
970      *
971      * MADV_DONTNEED is passed through as well, if possible.
972      * If passthrough isn't possible, we nevertheless (wrongly!) return
973      * success, which is broken but some userspace programs fail to work
974      * otherwise. Completely implementing such emulation is quite complicated
975      * though.
976      */
977     mmap_lock();
978     switch (advice) {
979     case MADV_WIPEONFORK:
980     case MADV_KEEPONFORK:
981         ret = -EINVAL;
982         /* fall through */
983     case MADV_DONTNEED:
984         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
985             ret = get_errno(madvise(g2h_untagged(start), len, advice));
986             if ((advice == MADV_DONTNEED) && (ret == 0)) {
987                 page_reset_target_data(start, start + len - 1);
988             }
989         }
990     }
991     mmap_unlock();
992 
993     return ret;
994 }
995