1 /*
2 * mmap support for qemu
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include <sys/shm.h>
21 #include "trace.h"
22 #include "exec/log.h"
23 #include "exec/page-protection.h"
24 #include "exec/mmap-lock.h"
25 #include "qemu.h"
26 #include "user/page-protection.h"
27 #include "user-internals.h"
28 #include "user-mmap.h"
29 #include "target_mman.h"
30 #include "qemu/interval-tree.h"
31
32 #ifdef TARGET_ARM
33 #include "target/arm/cpu-features.h"
34 #endif
35
36 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
37 static __thread int mmap_lock_count;
38
mmap_lock(void)39 void mmap_lock(void)
40 {
41 if (mmap_lock_count++ == 0) {
42 pthread_mutex_lock(&mmap_mutex);
43 }
44 }
45
mmap_unlock(void)46 void mmap_unlock(void)
47 {
48 assert(mmap_lock_count > 0);
49 if (--mmap_lock_count == 0) {
50 pthread_mutex_unlock(&mmap_mutex);
51 }
52 }
53
have_mmap_lock(void)54 bool have_mmap_lock(void)
55 {
56 return mmap_lock_count > 0 ? true : false;
57 }
58
59 /* Grab lock to make sure things are in a consistent state after fork(). */
mmap_fork_start(void)60 void mmap_fork_start(void)
61 {
62 if (mmap_lock_count)
63 abort();
64 pthread_mutex_lock(&mmap_mutex);
65 }
66
mmap_fork_end(int child)67 void mmap_fork_end(int child)
68 {
69 if (child) {
70 pthread_mutex_init(&mmap_mutex, NULL);
71 } else {
72 pthread_mutex_unlock(&mmap_mutex);
73 }
74 }
75
76 /* Protected by mmap_lock. */
77 static IntervalTreeRoot shm_regions;
78
shm_region_add(abi_ptr start,abi_ptr last)79 static void shm_region_add(abi_ptr start, abi_ptr last)
80 {
81 IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
82
83 i->start = start;
84 i->last = last;
85 interval_tree_insert(i, &shm_regions);
86 }
87
shm_region_find(abi_ptr start)88 static abi_ptr shm_region_find(abi_ptr start)
89 {
90 IntervalTreeNode *i;
91
92 for (i = interval_tree_iter_first(&shm_regions, start, start); i;
93 i = interval_tree_iter_next(i, start, start)) {
94 if (i->start == start) {
95 return i->last;
96 }
97 }
98 return 0;
99 }
100
shm_region_rm_complete(abi_ptr start,abi_ptr last)101 static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
102 {
103 IntervalTreeNode *i, *n;
104
105 for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
106 n = interval_tree_iter_next(i, start, last);
107 if (i->start >= start && i->last <= last) {
108 interval_tree_remove(i, &shm_regions);
109 g_free(i);
110 }
111 }
112 }
113
114 /*
115 * Validate target prot bitmask.
116 * Return the prot bitmask for the host in *HOST_PROT.
117 * Return 0 if the target prot bitmask is invalid, otherwise
118 * the internal qemu page_flags (which will include PAGE_VALID).
119 */
validate_prot_to_pageflags(int prot)120 static int validate_prot_to_pageflags(int prot)
121 {
122 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
123 int page_flags = (prot & PAGE_RWX) | PAGE_VALID;
124
125 #ifdef TARGET_AARCH64
126 {
127 ARMCPU *cpu = ARM_CPU(thread_cpu);
128
129 /*
130 * The PROT_BTI bit is only accepted if the cpu supports the feature.
131 * Since this is the unusual case, don't bother checking unless
132 * the bit has been requested. If set and valid, record the bit
133 * within QEMU's page_flags.
134 */
135 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
136 valid |= TARGET_PROT_BTI;
137 page_flags |= PAGE_BTI;
138 }
139 /* Similarly for the PROT_MTE bit. */
140 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
141 valid |= TARGET_PROT_MTE;
142 page_flags |= PAGE_MTE;
143 }
144 }
145 #elif defined(TARGET_HPPA)
146 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
147 #endif
148
149 return prot & ~valid ? 0 : page_flags;
150 }
151
152 /*
153 * For the host, we need not pass anything except read/write/exec.
154 * While PROT_SEM is allowed by all hosts, it is also ignored, so
155 * don't bother transforming guest bit to host bit. Any other
156 * target-specific prot bits will not be understood by the host
157 * and will need to be encoded into page_flags for qemu emulation.
158 *
159 * Pages that are executable by the guest will never be executed
160 * by the host, but the host will need to be able to read them.
161 */
target_to_host_prot(int prot)162 static int target_to_host_prot(int prot)
163 {
164 return (prot & (PROT_READ | PROT_WRITE)) |
165 (prot & PROT_EXEC ? PROT_READ : 0);
166 }
167
168 /* Target bits to be cleared by mprotect if not present in target_prot. */
169 #ifdef TARGET_AARCH64
170 #define TARGET_PAGE_NOTSTICKY PAGE_BTI
171 #else
172 #define TARGET_PAGE_NOTSTICKY 0
173 #endif
174
175 /* NOTE: all the constants are the HOST ones, but addresses are target. */
target_mprotect(abi_ulong start,abi_ulong len,int target_prot)176 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
177 {
178 int host_page_size = qemu_real_host_page_size();
179 abi_ulong starts[3];
180 abi_ulong lens[3];
181 int prots[3];
182 abi_ulong host_start, host_last, last;
183 int prot1, ret, page_flags, nranges;
184
185 trace_target_mprotect(start, len, target_prot);
186
187 if ((start & ~TARGET_PAGE_MASK) != 0) {
188 return -TARGET_EINVAL;
189 }
190 page_flags = validate_prot_to_pageflags(target_prot);
191 if (!page_flags) {
192 return -TARGET_EINVAL;
193 }
194 if (len == 0) {
195 return 0;
196 }
197 len = TARGET_PAGE_ALIGN(len);
198 if (!guest_range_valid_untagged(start, len)) {
199 return -TARGET_ENOMEM;
200 }
201
202 last = start + len - 1;
203 host_start = start & -host_page_size;
204 host_last = ROUND_UP(last, host_page_size) - 1;
205 nranges = 0;
206
207 mmap_lock();
208
209 if (host_last - host_start < host_page_size) {
210 /* Single host page contains all guest pages: sum the prot. */
211 prot1 = target_prot;
212 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
213 prot1 |= page_get_flags(a);
214 }
215 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
216 prot1 |= page_get_flags(a + 1);
217 }
218 starts[nranges] = host_start;
219 lens[nranges] = host_page_size;
220 prots[nranges] = prot1;
221 nranges++;
222 } else {
223 if (host_start < start) {
224 /* Host page contains more than one guest page: sum the prot. */
225 prot1 = target_prot;
226 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
227 prot1 |= page_get_flags(a);
228 }
229 /* If the resulting sum differs, create a new range. */
230 if (prot1 != target_prot) {
231 starts[nranges] = host_start;
232 lens[nranges] = host_page_size;
233 prots[nranges] = prot1;
234 nranges++;
235 host_start += host_page_size;
236 }
237 }
238
239 if (last < host_last) {
240 /* Host page contains more than one guest page: sum the prot. */
241 prot1 = target_prot;
242 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
243 prot1 |= page_get_flags(a + 1);
244 }
245 /* If the resulting sum differs, create a new range. */
246 if (prot1 != target_prot) {
247 host_last -= host_page_size;
248 starts[nranges] = host_last + 1;
249 lens[nranges] = host_page_size;
250 prots[nranges] = prot1;
251 nranges++;
252 }
253 }
254
255 /* Create a range for the middle, if any remains. */
256 if (host_start < host_last) {
257 starts[nranges] = host_start;
258 lens[nranges] = host_last - host_start + 1;
259 prots[nranges] = target_prot;
260 nranges++;
261 }
262 }
263
264 for (int i = 0; i < nranges; ++i) {
265 ret = mprotect(g2h_untagged(starts[i]), lens[i],
266 target_to_host_prot(prots[i]));
267 if (ret != 0) {
268 goto error;
269 }
270 }
271
272 page_set_flags(start, last, page_flags, PAGE_RWX | TARGET_PAGE_NOTSTICKY);
273 ret = 0;
274
275 error:
276 mmap_unlock();
277 return ret;
278 }
279
280 /*
281 * Perform munmap on behalf of the target, with host parameters.
282 * If reserved_va, we must replace the memory reservation.
283 */
do_munmap(void * addr,size_t len)284 static int do_munmap(void *addr, size_t len)
285 {
286 if (reserved_va) {
287 void *ptr = mmap(addr, len, PROT_NONE,
288 MAP_FIXED | MAP_ANONYMOUS
289 | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
290 return ptr == addr ? 0 : -1;
291 }
292 return munmap(addr, len);
293 }
294
295 /*
296 * Perform a pread on behalf of target_mmap. We can reach EOF, we can be
297 * interrupted by signals, and in general there's no good error return path.
298 * If @zero, zero the rest of the block at EOF.
299 * Return true on success.
300 */
mmap_pread(int fd,void * p,size_t len,off_t offset,bool zero)301 static bool mmap_pread(int fd, void *p, size_t len, off_t offset, bool zero)
302 {
303 while (1) {
304 ssize_t r = pread(fd, p, len, offset);
305
306 if (likely(r == len)) {
307 /* Complete */
308 return true;
309 }
310 if (r == 0) {
311 /* EOF */
312 if (zero) {
313 memset(p, 0, len);
314 }
315 return true;
316 }
317 if (r > 0) {
318 /* Short read */
319 p += r;
320 len -= r;
321 offset += r;
322 } else if (errno != EINTR) {
323 /* Error */
324 return false;
325 }
326 }
327 }
328
329 /*
330 * Map an incomplete host page.
331 *
332 * Here be dragons. This case will not work if there is an existing
333 * overlapping host page, which is file mapped, and for which the mapping
334 * is beyond the end of the file. In that case, we will see SIGBUS when
335 * trying to write a portion of this page.
336 *
337 * FIXME: Work around this with a temporary signal handler and longjmp.
338 */
mmap_frag(abi_ulong real_start,abi_ulong start,abi_ulong last,int prot,int flags,int fd,off_t offset)339 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
340 int prot, int flags, int fd, off_t offset)
341 {
342 int host_page_size = qemu_real_host_page_size();
343 abi_ulong real_last;
344 void *host_start;
345 int prot_old, prot_new;
346 int host_prot_old, host_prot_new;
347
348 if (!(flags & MAP_ANONYMOUS)
349 && (flags & MAP_TYPE) == MAP_SHARED
350 && (prot & PROT_WRITE)) {
351 /*
352 * msync() won't work with the partial page, so we return an
353 * error if write is possible while it is a shared mapping.
354 */
355 errno = EINVAL;
356 return false;
357 }
358
359 real_last = real_start + host_page_size - 1;
360 host_start = g2h_untagged(real_start);
361
362 /* Get the protection of the target pages outside the mapping. */
363 prot_old = 0;
364 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
365 prot_old |= page_get_flags(a);
366 }
367 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
368 prot_old |= page_get_flags(a);
369 }
370
371 if (prot_old == 0) {
372 /*
373 * Since !(prot_old & PAGE_VALID), there were no guest pages
374 * outside of the fragment we need to map. Allocate a new host
375 * page to cover, discarding whatever else may have been present.
376 */
377 void *p = mmap(host_start, host_page_size,
378 target_to_host_prot(prot),
379 flags | MAP_ANONYMOUS, -1, 0);
380 if (p != host_start) {
381 if (p != MAP_FAILED) {
382 do_munmap(p, host_page_size);
383 errno = EEXIST;
384 }
385 return false;
386 }
387 prot_old = prot;
388 }
389 prot_new = prot | prot_old;
390
391 host_prot_old = target_to_host_prot(prot_old);
392 host_prot_new = target_to_host_prot(prot_new);
393
394 /* Adjust protection to be able to write. */
395 if (!(host_prot_old & PROT_WRITE)) {
396 host_prot_old |= PROT_WRITE;
397 mprotect(host_start, host_page_size, host_prot_old);
398 }
399
400 /* Read or zero the new guest pages. */
401 if (flags & MAP_ANONYMOUS) {
402 memset(g2h_untagged(start), 0, last - start + 1);
403 } else if (!mmap_pread(fd, g2h_untagged(start), last - start + 1,
404 offset, true)) {
405 return false;
406 }
407
408 /* Put final protection */
409 if (host_prot_new != host_prot_old) {
410 mprotect(host_start, host_page_size, host_prot_new);
411 }
412 return true;
413 }
414
415 abi_ulong task_unmapped_base;
416 abi_ulong elf_et_dyn_base;
417 abi_ulong mmap_next_start;
418
419 /*
420 * Subroutine of mmap_find_vma, used when we have pre-allocated
421 * a chunk of guest address space.
422 */
mmap_find_vma_reserved(abi_ulong start,abi_ulong size,abi_ulong align)423 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
424 abi_ulong align)
425 {
426 target_ulong ret;
427
428 ret = page_find_range_empty(start, reserved_va, size, align);
429 if (ret == -1 && start > mmap_min_addr) {
430 /* Restart at the beginning of the address space. */
431 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
432 }
433
434 return ret;
435 }
436
437 /*
438 * Find and reserve a free memory area of size 'size'. The search
439 * starts at 'start'.
440 * It must be called with mmap_lock() held.
441 * Return -1 if error.
442 */
mmap_find_vma(abi_ulong start,abi_ulong size,abi_ulong align)443 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
444 {
445 int host_page_size = qemu_real_host_page_size();
446 void *ptr, *prev;
447 abi_ulong addr;
448 int wrapped, repeat;
449
450 align = MAX(align, host_page_size);
451
452 /* If 'start' == 0, then a default start address is used. */
453 if (start == 0) {
454 start = mmap_next_start;
455 } else {
456 start &= -host_page_size;
457 }
458 start = ROUND_UP(start, align);
459 size = ROUND_UP(size, host_page_size);
460
461 if (reserved_va) {
462 return mmap_find_vma_reserved(start, size, align);
463 }
464
465 addr = start;
466 wrapped = repeat = 0;
467 prev = 0;
468
469 for (;; prev = ptr) {
470 /*
471 * Reserve needed memory area to avoid a race.
472 * It should be discarded using:
473 * - mmap() with MAP_FIXED flag
474 * - mremap() with MREMAP_FIXED flag
475 * - shmat() with SHM_REMAP flag
476 */
477 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
478 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
479
480 /* ENOMEM, if host address space has no memory */
481 if (ptr == MAP_FAILED) {
482 return (abi_ulong)-1;
483 }
484
485 /*
486 * Count the number of sequential returns of the same address.
487 * This is used to modify the search algorithm below.
488 */
489 repeat = (ptr == prev ? repeat + 1 : 0);
490
491 if (h2g_valid(ptr + size - 1)) {
492 addr = h2g(ptr);
493
494 if ((addr & (align - 1)) == 0) {
495 /* Success. */
496 if (start == mmap_next_start && addr >= task_unmapped_base) {
497 mmap_next_start = addr + size;
498 }
499 return addr;
500 }
501
502 /* The address is not properly aligned for the target. */
503 switch (repeat) {
504 case 0:
505 /*
506 * Assume the result that the kernel gave us is the
507 * first with enough free space, so start again at the
508 * next higher target page.
509 */
510 addr = ROUND_UP(addr, align);
511 break;
512 case 1:
513 /*
514 * Sometimes the kernel decides to perform the allocation
515 * at the top end of memory instead.
516 */
517 addr &= -align;
518 break;
519 case 2:
520 /* Start over at low memory. */
521 addr = 0;
522 break;
523 default:
524 /* Fail. This unaligned block must the last. */
525 addr = -1;
526 break;
527 }
528 } else {
529 /*
530 * Since the result the kernel gave didn't fit, start
531 * again at low memory. If any repetition, fail.
532 */
533 addr = (repeat ? -1 : 0);
534 }
535
536 /* Unmap and try again. */
537 munmap(ptr, size);
538
539 /* ENOMEM if we checked the whole of the target address space. */
540 if (addr == (abi_ulong)-1) {
541 return (abi_ulong)-1;
542 } else if (addr == 0) {
543 if (wrapped) {
544 return (abi_ulong)-1;
545 }
546 wrapped = 1;
547 /*
548 * Don't actually use 0 when wrapping, instead indicate
549 * that we'd truly like an allocation in low memory.
550 */
551 addr = (mmap_min_addr > TARGET_PAGE_SIZE
552 ? TARGET_PAGE_ALIGN(mmap_min_addr)
553 : TARGET_PAGE_SIZE);
554 } else if (wrapped && addr >= start) {
555 return (abi_ulong)-1;
556 }
557 }
558 }
559
560 /*
561 * Record a successful mmap within the user-exec interval tree.
562 */
mmap_end(abi_ulong start,abi_ulong last,abi_ulong passthrough_start,abi_ulong passthrough_last,int flags,int page_flags)563 static abi_long mmap_end(abi_ulong start, abi_ulong last,
564 abi_ulong passthrough_start,
565 abi_ulong passthrough_last,
566 int flags, int page_flags)
567 {
568 if (flags & MAP_ANONYMOUS) {
569 page_flags |= PAGE_ANON;
570 }
571 if (passthrough_start > passthrough_last) {
572 page_set_flags(start, last, page_flags, PAGE_VALID);
573 } else {
574 if (start < passthrough_start) {
575 page_set_flags(start, passthrough_start - 1,
576 page_flags, PAGE_VALID);
577 }
578 page_set_flags(passthrough_start, passthrough_last,
579 page_flags | PAGE_PASSTHROUGH, PAGE_VALID);
580 if (passthrough_last < last) {
581 page_set_flags(passthrough_last + 1, last, page_flags, PAGE_VALID);
582 }
583 }
584 shm_region_rm_complete(start, last);
585 trace_target_mmap_complete(start);
586 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
587 FILE *f = qemu_log_trylock();
588 if (f) {
589 fprintf(f, "page layout changed following mmap\n");
590 page_dump(f);
591 qemu_log_unlock(f);
592 }
593 }
594 return start;
595 }
596
597 /*
598 * Special case host page size == target page size,
599 * where there are no edge conditions.
600 */
mmap_h_eq_g(abi_ulong start,abi_ulong len,int host_prot,int flags,int page_flags,int fd,off_t offset)601 static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
602 int host_prot, int flags, int page_flags,
603 int fd, off_t offset)
604 {
605 void *p, *want_p = NULL;
606 abi_ulong last;
607
608 if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
609 want_p = g2h_untagged(start);
610 }
611
612 p = mmap(want_p, len, host_prot, flags, fd, offset);
613 if (p == MAP_FAILED) {
614 return -1;
615 }
616 /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */
617 if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) {
618 do_munmap(p, len);
619 errno = EEXIST;
620 return -1;
621 }
622
623 start = h2g(p);
624 last = start + len - 1;
625 return mmap_end(start, last, start, last, flags, page_flags);
626 }
627
628 /*
629 * Special case host page size < target page size.
630 *
631 * The two special cases are increased guest alignment, and mapping
632 * past the end of a file.
633 *
634 * When mapping files into a memory area larger than the file,
635 * accesses to pages beyond the file size will cause a SIGBUS.
636 *
637 * For example, if mmaping a file of 100 bytes on a host with 4K
638 * pages emulating a target with 8K pages, the target expects to
639 * be able to access the first 8K. But the host will trap us on
640 * any access beyond 4K.
641 *
642 * When emulating a target with a larger page-size than the hosts,
643 * we may need to truncate file maps at EOF and add extra anonymous
644 * pages up to the targets page boundary.
645 *
646 * This workaround only works for files that do not change.
647 * If the file is later extended (e.g. ftruncate), the SIGBUS
648 * vanishes and the proper behaviour is that changes within the
649 * anon page should be reflected in the file.
650 *
651 * However, this case is rather common with executable images,
652 * so the workaround is important for even trivial tests, whereas
653 * the mmap of a file being extended is less common.
654 */
mmap_h_lt_g(abi_ulong start,abi_ulong len,int host_prot,int mmap_flags,int page_flags,int fd,off_t offset,int host_page_size)655 static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
656 int mmap_flags, int page_flags, int fd,
657 off_t offset, int host_page_size)
658 {
659 void *p, *want_p = NULL;
660 off_t fileend_adj = 0;
661 int flags = mmap_flags;
662 abi_ulong last, pass_last;
663
664 if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
665 want_p = g2h_untagged(start);
666 }
667
668 if (!(flags & MAP_ANONYMOUS)) {
669 struct stat sb;
670
671 if (fstat(fd, &sb) == -1) {
672 return -1;
673 }
674 if (offset >= sb.st_size) {
675 /*
676 * The entire map is beyond the end of the file.
677 * Transform it to an anonymous mapping.
678 */
679 flags |= MAP_ANONYMOUS;
680 fd = -1;
681 offset = 0;
682 } else if (offset + len > sb.st_size) {
683 /*
684 * A portion of the map is beyond the end of the file.
685 * Truncate the file portion of the allocation.
686 */
687 fileend_adj = offset + len - sb.st_size;
688 }
689 }
690
691 if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
692 if (fileend_adj) {
693 p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
694 } else {
695 p = mmap(want_p, len, host_prot, flags, fd, offset);
696 }
697 if (p != want_p) {
698 if (p != MAP_FAILED) {
699 /* Host does not support MAP_FIXED_NOREPLACE: emulate. */
700 do_munmap(p, len);
701 errno = EEXIST;
702 }
703 return -1;
704 }
705
706 if (fileend_adj) {
707 void *t = mmap(p, len - fileend_adj, host_prot,
708 (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
709 fd, offset);
710
711 if (t == MAP_FAILED) {
712 int save_errno = errno;
713
714 /*
715 * We failed a map over the top of the successful anonymous
716 * mapping above. The only failure mode is running out of VMAs,
717 * and there's nothing that we can do to detect that earlier.
718 * If we have replaced an existing mapping with MAP_FIXED,
719 * then we cannot properly recover. It's a coin toss whether
720 * it would be better to exit or continue here.
721 */
722 if (!(flags & MAP_FIXED_NOREPLACE) &&
723 !page_check_range_empty(start, start + len - 1)) {
724 qemu_log("QEMU target_mmap late failure: %s",
725 strerror(save_errno));
726 }
727
728 do_munmap(want_p, len);
729 errno = save_errno;
730 return -1;
731 }
732 }
733 } else {
734 size_t host_len, part_len;
735
736 /*
737 * Take care to align the host memory. Perform a larger anonymous
738 * allocation and extract the aligned portion. Remap the file on
739 * top of that.
740 */
741 host_len = len + TARGET_PAGE_SIZE - host_page_size;
742 p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
743 if (p == MAP_FAILED) {
744 return -1;
745 }
746
747 part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
748 if (part_len) {
749 part_len = TARGET_PAGE_SIZE - part_len;
750 do_munmap(p, part_len);
751 p += part_len;
752 host_len -= part_len;
753 }
754 if (len < host_len) {
755 do_munmap(p + len, host_len - len);
756 }
757
758 if (!(flags & MAP_ANONYMOUS)) {
759 void *t = mmap(p, len - fileend_adj, host_prot,
760 flags | MAP_FIXED, fd, offset);
761
762 if (t == MAP_FAILED) {
763 int save_errno = errno;
764 do_munmap(p, len);
765 errno = save_errno;
766 return -1;
767 }
768 }
769
770 start = h2g(p);
771 }
772
773 last = start + len - 1;
774 if (fileend_adj) {
775 pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
776 } else {
777 pass_last = last;
778 }
779 return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
780 }
781
782 /*
783 * Special case host page size > target page size.
784 *
785 * The two special cases are address and file offsets that are valid
786 * for the guest that cannot be directly represented by the host.
787 */
mmap_h_gt_g(abi_ulong start,abi_ulong len,int target_prot,int host_prot,int flags,int page_flags,int fd,off_t offset,int host_page_size)788 static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len,
789 int target_prot, int host_prot,
790 int flags, int page_flags, int fd,
791 off_t offset, int host_page_size)
792 {
793 void *p, *want_p = NULL;
794 off_t host_offset = offset & -host_page_size;
795 abi_ulong last, real_start, real_last;
796 bool misaligned_offset = false;
797 size_t host_len;
798
799 if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
800 want_p = g2h_untagged(start);
801 }
802
803 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
804 /*
805 * Adjust the offset to something representable on the host.
806 */
807 host_len = len + offset - host_offset;
808 p = mmap(want_p, host_len, host_prot, flags, fd, host_offset);
809 if (p == MAP_FAILED) {
810 return -1;
811 }
812
813 /* Update start to the file position at offset. */
814 p += offset - host_offset;
815
816 start = h2g(p);
817 last = start + len - 1;
818 return mmap_end(start, last, start, last, flags, page_flags);
819 }
820
821 if (!(flags & MAP_ANONYMOUS)) {
822 misaligned_offset = (start ^ offset) & (host_page_size - 1);
823
824 /*
825 * The fallback for misalignment is a private mapping + read.
826 * This carries none of semantics required of MAP_SHARED.
827 */
828 if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) {
829 errno = EINVAL;
830 return -1;
831 }
832 }
833
834 last = start + len - 1;
835 real_start = start & -host_page_size;
836 real_last = ROUND_UP(last, host_page_size) - 1;
837
838 /*
839 * Handle the start and end of the mapping.
840 */
841 if (real_start < start) {
842 abi_ulong real_page_last = real_start + host_page_size - 1;
843 if (last <= real_page_last) {
844 /* Entire allocation a subset of one host page. */
845 if (!mmap_frag(real_start, start, last, target_prot,
846 flags, fd, offset)) {
847 return -1;
848 }
849 return mmap_end(start, last, -1, 0, flags, page_flags);
850 }
851
852 if (!mmap_frag(real_start, start, real_page_last, target_prot,
853 flags, fd, offset)) {
854 return -1;
855 }
856 real_start = real_page_last + 1;
857 }
858
859 if (last < real_last) {
860 abi_ulong real_page_start = real_last - host_page_size + 1;
861 if (!mmap_frag(real_page_start, real_page_start, last,
862 target_prot, flags, fd,
863 offset + real_page_start - start)) {
864 return -1;
865 }
866 real_last = real_page_start - 1;
867 }
868
869 if (real_start > real_last) {
870 return mmap_end(start, last, -1, 0, flags, page_flags);
871 }
872
873 /*
874 * Handle the middle of the mapping.
875 */
876
877 host_len = real_last - real_start + 1;
878 want_p += real_start - start;
879
880 if (flags & MAP_ANONYMOUS) {
881 p = mmap(want_p, host_len, host_prot, flags, -1, 0);
882 } else if (!misaligned_offset) {
883 p = mmap(want_p, host_len, host_prot, flags, fd,
884 offset + real_start - start);
885 } else {
886 p = mmap(want_p, host_len, host_prot | PROT_WRITE,
887 flags | MAP_ANONYMOUS, -1, 0);
888 }
889 if (p != want_p) {
890 if (p != MAP_FAILED) {
891 do_munmap(p, host_len);
892 errno = EEXIST;
893 }
894 return -1;
895 }
896
897 if (misaligned_offset) {
898 if (!mmap_pread(fd, p, host_len, offset + real_start - start, false)) {
899 do_munmap(p, host_len);
900 return -1;
901 }
902 if (!(host_prot & PROT_WRITE)) {
903 mprotect(p, host_len, host_prot);
904 }
905 }
906
907 return mmap_end(start, last, -1, 0, flags, page_flags);
908 }
909
target_mmap__locked(abi_ulong start,abi_ulong len,int target_prot,int flags,int page_flags,int fd,off_t offset)910 static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
911 int target_prot, int flags, int page_flags,
912 int fd, off_t offset)
913 {
914 int host_page_size = qemu_real_host_page_size();
915 int host_prot;
916
917 /*
918 * For reserved_va, we are in full control of the allocation.
919 * Find a suitable hole and convert to MAP_FIXED.
920 */
921 if (reserved_va) {
922 if (flags & MAP_FIXED_NOREPLACE) {
923 /* Validate that the chosen range is empty. */
924 if (!page_check_range_empty(start, start + len - 1)) {
925 errno = EEXIST;
926 return -1;
927 }
928 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
929 } else if (!(flags & MAP_FIXED)) {
930 abi_ulong real_start = start & -host_page_size;
931 off_t host_offset = offset & -host_page_size;
932 size_t real_len = len + offset - host_offset;
933 abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE);
934
935 start = mmap_find_vma(real_start, real_len, align);
936 if (start == (abi_ulong)-1) {
937 errno = ENOMEM;
938 return -1;
939 }
940 start += offset - host_offset;
941 flags |= MAP_FIXED;
942 }
943 }
944
945 host_prot = target_to_host_prot(target_prot);
946
947 if (host_page_size == TARGET_PAGE_SIZE) {
948 return mmap_h_eq_g(start, len, host_prot, flags,
949 page_flags, fd, offset);
950 } else if (host_page_size < TARGET_PAGE_SIZE) {
951 return mmap_h_lt_g(start, len, host_prot, flags,
952 page_flags, fd, offset, host_page_size);
953 } else {
954 return mmap_h_gt_g(start, len, target_prot, host_prot, flags,
955 page_flags, fd, offset, host_page_size);
956 }
957 }
958
959 /* NOTE: all the constants are the HOST ones */
target_mmap(abi_ulong start,abi_ulong len,int target_prot,int flags,int fd,off_t offset)960 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
961 int flags, int fd, off_t offset)
962 {
963 abi_long ret;
964 int page_flags;
965
966 trace_target_mmap(start, len, target_prot, flags, fd, offset);
967
968 if (!len) {
969 errno = EINVAL;
970 return -1;
971 }
972
973 page_flags = validate_prot_to_pageflags(target_prot);
974 if (!page_flags) {
975 errno = EINVAL;
976 return -1;
977 }
978
979 /* Also check for overflows... */
980 len = TARGET_PAGE_ALIGN(len);
981 if (!len || len != (size_t)len) {
982 errno = ENOMEM;
983 return -1;
984 }
985
986 if (offset & ~TARGET_PAGE_MASK) {
987 errno = EINVAL;
988 return -1;
989 }
990 if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
991 if (start & ~TARGET_PAGE_MASK) {
992 errno = EINVAL;
993 return -1;
994 }
995 if (!guest_range_valid_untagged(start, len)) {
996 errno = ENOMEM;
997 return -1;
998 }
999 }
1000
1001 mmap_lock();
1002
1003 ret = target_mmap__locked(start, len, target_prot, flags,
1004 page_flags, fd, offset);
1005
1006 mmap_unlock();
1007
1008 /*
1009 * If we're mapping shared memory, ensure we generate code for parallel
1010 * execution and flush old translations. This will work up to the level
1011 * supported by the host -- anything that requires EXCP_ATOMIC will not
1012 * be atomic with respect to an external process.
1013 */
1014 if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
1015 begin_parallel_context(thread_cpu);
1016 }
1017
1018 return ret;
1019 }
1020
mmap_reserve_or_unmap(abi_ulong start,abi_ulong len)1021 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
1022 {
1023 int host_page_size = qemu_real_host_page_size();
1024 abi_ulong real_start;
1025 abi_ulong real_last;
1026 abi_ulong real_len;
1027 abi_ulong last;
1028 abi_ulong a;
1029 void *host_start;
1030 int prot;
1031
1032 last = ROUND_UP(start + len, TARGET_PAGE_SIZE) - 1;
1033 real_start = start & -host_page_size;
1034 real_last = ROUND_UP(last + 1, host_page_size) - 1;
1035
1036 /*
1037 * If guest pages remain on the first or last host pages,
1038 * adjust the deallocation to retain those guest pages.
1039 * The single page special case is required for the last page,
1040 * lest real_start overflow to zero.
1041 */
1042 if (real_last - real_start < host_page_size) {
1043 prot = 0;
1044 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1045 prot |= page_get_flags(a);
1046 }
1047 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1048 prot |= page_get_flags(a + 1);
1049 }
1050 if (prot != 0) {
1051 return 0;
1052 }
1053 } else {
1054 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1055 prot |= page_get_flags(a);
1056 }
1057 if (prot != 0) {
1058 real_start += host_page_size;
1059 }
1060
1061 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1062 prot |= page_get_flags(a + 1);
1063 }
1064 if (prot != 0) {
1065 real_last -= host_page_size;
1066 }
1067
1068 if (real_last < real_start) {
1069 return 0;
1070 }
1071 }
1072
1073 real_len = real_last - real_start + 1;
1074 host_start = g2h_untagged(real_start);
1075
1076 return do_munmap(host_start, real_len);
1077 }
1078
target_munmap(abi_ulong start,abi_ulong len)1079 int target_munmap(abi_ulong start, abi_ulong len)
1080 {
1081 int ret;
1082
1083 trace_target_munmap(start, len);
1084
1085 if (start & ~TARGET_PAGE_MASK) {
1086 errno = EINVAL;
1087 return -1;
1088 }
1089 len = TARGET_PAGE_ALIGN(len);
1090 if (len == 0 || !guest_range_valid_untagged(start, len)) {
1091 errno = EINVAL;
1092 return -1;
1093 }
1094
1095 mmap_lock();
1096 ret = mmap_reserve_or_unmap(start, len);
1097 if (likely(ret == 0)) {
1098 page_set_flags(start, start + len - 1, 0, PAGE_VALID);
1099 shm_region_rm_complete(start, start + len - 1);
1100 }
1101 mmap_unlock();
1102
1103 return ret;
1104 }
1105
target_mremap(abi_ulong old_addr,abi_ulong old_size,abi_ulong new_size,unsigned long flags,abi_ulong new_addr)1106 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
1107 abi_ulong new_size, unsigned long flags,
1108 abi_ulong new_addr)
1109 {
1110 int prot;
1111 void *host_addr;
1112
1113 if (((flags & MREMAP_FIXED) &&
1114 !guest_range_valid_untagged(new_addr, new_size)) ||
1115 ((flags & MREMAP_MAYMOVE) == 0 &&
1116 !guest_range_valid_untagged(old_addr, new_size))) {
1117 errno = EINVAL;
1118 return -1;
1119 }
1120 if (!guest_range_valid_untagged(old_addr, old_size)) {
1121 errno = EFAULT;
1122 return -1;
1123 }
1124
1125 mmap_lock();
1126
1127 if (flags & MREMAP_FIXED) {
1128 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1129 flags, g2h_untagged(new_addr));
1130
1131 if (reserved_va && host_addr != MAP_FAILED) {
1132 /*
1133 * If new and old addresses overlap then the above mremap will
1134 * already have failed with EINVAL.
1135 */
1136 mmap_reserve_or_unmap(old_addr, old_size);
1137 }
1138 } else if (flags & MREMAP_MAYMOVE) {
1139 abi_ulong mmap_start;
1140
1141 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
1142
1143 if (mmap_start == -1) {
1144 errno = ENOMEM;
1145 host_addr = MAP_FAILED;
1146 } else {
1147 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1148 flags | MREMAP_FIXED,
1149 g2h_untagged(mmap_start));
1150 if (reserved_va) {
1151 mmap_reserve_or_unmap(old_addr, old_size);
1152 }
1153 }
1154 } else {
1155 int page_flags = 0;
1156 if (reserved_va && old_size < new_size) {
1157 abi_ulong addr;
1158 for (addr = old_addr + old_size;
1159 addr < old_addr + new_size;
1160 addr++) {
1161 page_flags |= page_get_flags(addr);
1162 }
1163 }
1164 if (page_flags == 0) {
1165 host_addr = mremap(g2h_untagged(old_addr),
1166 old_size, new_size, flags);
1167
1168 if (host_addr != MAP_FAILED) {
1169 /* Check if address fits target address space */
1170 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
1171 /* Revert mremap() changes */
1172 host_addr = mremap(g2h_untagged(old_addr),
1173 new_size, old_size, flags);
1174 errno = ENOMEM;
1175 host_addr = MAP_FAILED;
1176 } else if (reserved_va && old_size > new_size) {
1177 /* Re-reserve pages we just shrunk out of the mapping */
1178 mmap_reserve_or_unmap(old_addr + new_size,
1179 old_size - new_size);
1180 }
1181 }
1182 } else {
1183 errno = ENOMEM;
1184 host_addr = MAP_FAILED;
1185 }
1186 }
1187
1188 if (host_addr == MAP_FAILED) {
1189 new_addr = -1;
1190 } else {
1191 new_addr = h2g(host_addr);
1192 prot = page_get_flags(old_addr);
1193 page_set_flags(old_addr, old_addr + old_size - 1, 0, PAGE_VALID);
1194 shm_region_rm_complete(old_addr, old_addr + old_size - 1);
1195 page_set_flags(new_addr, new_addr + new_size - 1,
1196 prot | PAGE_VALID, PAGE_VALID);
1197 shm_region_rm_complete(new_addr, new_addr + new_size - 1);
1198 }
1199 mmap_unlock();
1200 return new_addr;
1201 }
1202
target_madvise(abi_ulong start,abi_ulong len_in,int advice)1203 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
1204 {
1205 abi_ulong len;
1206 int ret = 0;
1207
1208 if (start & ~TARGET_PAGE_MASK) {
1209 return -TARGET_EINVAL;
1210 }
1211 if (len_in == 0) {
1212 return 0;
1213 }
1214 len = TARGET_PAGE_ALIGN(len_in);
1215 if (len == 0 || !guest_range_valid_untagged(start, len)) {
1216 return -TARGET_EINVAL;
1217 }
1218
1219 /* Translate for some architectures which have different MADV_xxx values */
1220 switch (advice) {
1221 case TARGET_MADV_DONTNEED: /* alpha */
1222 advice = MADV_DONTNEED;
1223 break;
1224 case TARGET_MADV_WIPEONFORK: /* parisc */
1225 advice = MADV_WIPEONFORK;
1226 break;
1227 case TARGET_MADV_KEEPONFORK: /* parisc */
1228 advice = MADV_KEEPONFORK;
1229 break;
1230 /* we do not care about the other MADV_xxx values yet */
1231 }
1232
1233 /*
1234 * Most advice values are hints, so ignoring and returning success is ok.
1235 *
1236 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1237 * MADV_KEEPONFORK are not hints and need to be emulated.
1238 *
1239 * A straight passthrough for those may not be safe because qemu sometimes
1240 * turns private file-backed mappings into anonymous mappings.
1241 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1242 * same semantics for the host as for the guest.
1243 *
1244 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1245 * return failure if not.
1246 *
1247 * MADV_DONTNEED is passed through as well, if possible.
1248 * If passthrough isn't possible, we nevertheless (wrongly!) return
1249 * success, which is broken but some userspace programs fail to work
1250 * otherwise. Completely implementing such emulation is quite complicated
1251 * though.
1252 */
1253 mmap_lock();
1254 switch (advice) {
1255 case MADV_DONTDUMP:
1256 page_set_flags(start, start + len - 1, PAGE_DONTDUMP, 0);
1257 break;
1258 case MADV_DODUMP:
1259 page_set_flags(start, start + len - 1, 0, PAGE_DONTDUMP);
1260 break;
1261 case MADV_WIPEONFORK:
1262 case MADV_KEEPONFORK:
1263 ret = -EINVAL;
1264 /* fall through */
1265 case MADV_DONTNEED:
1266 if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1267 ret = get_errno(madvise(g2h_untagged(start), len, advice));
1268 if ((advice == MADV_DONTNEED) && (ret == 0)) {
1269 page_reset_target_data(start, start + len - 1);
1270 }
1271 }
1272 }
1273 mmap_unlock();
1274
1275 return ret;
1276 }
1277
1278 #ifndef TARGET_FORCE_SHMLBA
1279 /*
1280 * For most architectures, SHMLBA is the same as the page size;
1281 * some architectures have larger values, in which case they should
1282 * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1283 * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1284 * and defining its own value for SHMLBA.
1285 *
1286 * The kernel also permits SHMLBA to be set by the architecture to a
1287 * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1288 * this means that addresses are rounded to the large size if
1289 * SHM_RND is set but addresses not aligned to that size are not rejected
1290 * as long as they are at least page-aligned. Since the only architecture
1291 * which uses this is ia64 this code doesn't provide for that oddity.
1292 */
target_shmlba(CPUArchState * cpu_env)1293 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1294 {
1295 return TARGET_PAGE_SIZE;
1296 }
1297 #endif
1298
1299 #if defined(__arm__) || defined(__mips__) || defined(__sparc__)
1300 #define HOST_FORCE_SHMLBA 1
1301 #else
1302 #define HOST_FORCE_SHMLBA 0
1303 #endif
1304
target_shmat(CPUArchState * cpu_env,int shmid,abi_ulong shmaddr,int shmflg)1305 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1306 abi_ulong shmaddr, int shmflg)
1307 {
1308 CPUState *cpu = env_cpu(cpu_env);
1309 struct shmid_ds shm_info;
1310 int ret;
1311 int h_pagesize;
1312 int t_shmlba, h_shmlba, m_shmlba;
1313 size_t t_len, h_len, m_len;
1314
1315 /* shmat pointers are always untagged */
1316
1317 /*
1318 * Because we can't use host shmat() unless the address is sufficiently
1319 * aligned for the host, we'll need to check both.
1320 * TODO: Could be fixed with softmmu.
1321 */
1322 t_shmlba = target_shmlba(cpu_env);
1323 h_pagesize = qemu_real_host_page_size();
1324 h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize);
1325 m_shmlba = MAX(t_shmlba, h_shmlba);
1326
1327 if (shmaddr) {
1328 if (shmaddr & (m_shmlba - 1)) {
1329 if (shmflg & SHM_RND) {
1330 /*
1331 * The guest is allowing the kernel to round the address.
1332 * Assume that the guest is ok with us rounding to the
1333 * host required alignment too. Anyway if we don't, we'll
1334 * get an error from the kernel.
1335 */
1336 shmaddr &= ~(m_shmlba - 1);
1337 if (shmaddr == 0 && (shmflg & SHM_REMAP)) {
1338 return -TARGET_EINVAL;
1339 }
1340 } else {
1341 int require = TARGET_PAGE_SIZE;
1342 #ifdef TARGET_FORCE_SHMLBA
1343 require = t_shmlba;
1344 #endif
1345 /*
1346 * Include host required alignment, as otherwise we cannot
1347 * use host shmat at all.
1348 */
1349 require = MAX(require, h_shmlba);
1350 if (shmaddr & (require - 1)) {
1351 return -TARGET_EINVAL;
1352 }
1353 }
1354 }
1355 } else {
1356 if (shmflg & SHM_REMAP) {
1357 return -TARGET_EINVAL;
1358 }
1359 }
1360 /* All rounding now manually concluded. */
1361 shmflg &= ~SHM_RND;
1362
1363 /* Find out the length of the shared memory segment. */
1364 ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1365 if (is_error(ret)) {
1366 /* can't get length, bail out */
1367 return ret;
1368 }
1369 t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz);
1370 h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize);
1371 m_len = MAX(t_len, h_len);
1372
1373 if (!guest_range_valid_untagged(shmaddr, m_len)) {
1374 return -TARGET_EINVAL;
1375 }
1376
1377 WITH_MMAP_LOCK_GUARD() {
1378 bool mapped = false;
1379 void *want, *test;
1380 abi_ulong last;
1381
1382 if (!shmaddr) {
1383 shmaddr = mmap_find_vma(0, m_len, m_shmlba);
1384 if (shmaddr == -1) {
1385 return -TARGET_ENOMEM;
1386 }
1387 mapped = !reserved_va;
1388 } else if (shmflg & SHM_REMAP) {
1389 /*
1390 * If host page size > target page size, the host shmat may map
1391 * more memory than the guest expects. Reject a mapping that
1392 * would replace memory in the unexpected gap.
1393 * TODO: Could be fixed with softmmu.
1394 */
1395 if (t_len < h_len &&
1396 !page_check_range_empty(shmaddr + t_len,
1397 shmaddr + h_len - 1)) {
1398 return -TARGET_EINVAL;
1399 }
1400 } else {
1401 if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) {
1402 return -TARGET_EINVAL;
1403 }
1404 }
1405
1406 /* All placement is now complete. */
1407 want = (void *)g2h_untagged(shmaddr);
1408
1409 /*
1410 * Map anonymous pages across the entire range, then remap with
1411 * the shared memory. This is required for a number of corner
1412 * cases for which host and guest page sizes differ.
1413 */
1414 if (h_len != t_len) {
1415 int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE);
1416 int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS
1417 | (reserved_va || mapped || (shmflg & SHM_REMAP)
1418 ? MAP_FIXED : MAP_FIXED_NOREPLACE);
1419
1420 test = mmap(want, m_len, mmap_p, mmap_f, -1, 0);
1421 if (unlikely(test != want)) {
1422 /* shmat returns EINVAL not EEXIST like mmap. */
1423 ret = (test == MAP_FAILED && errno != EEXIST
1424 ? get_errno(-1) : -TARGET_EINVAL);
1425 if (mapped) {
1426 do_munmap(want, m_len);
1427 }
1428 return ret;
1429 }
1430 mapped = true;
1431 }
1432
1433 if (reserved_va || mapped) {
1434 shmflg |= SHM_REMAP;
1435 }
1436 test = shmat(shmid, want, shmflg);
1437 if (test == MAP_FAILED) {
1438 ret = get_errno(-1);
1439 if (mapped) {
1440 do_munmap(want, m_len);
1441 }
1442 return ret;
1443 }
1444 assert(test == want);
1445
1446 last = shmaddr + m_len - 1;
1447 page_set_flags(shmaddr, last,
1448 PAGE_VALID | PAGE_READ |
1449 (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) |
1450 (shmflg & SHM_EXEC ? PAGE_EXEC : 0),
1451 PAGE_VALID);
1452
1453 shm_region_rm_complete(shmaddr, last);
1454 shm_region_add(shmaddr, last);
1455 }
1456
1457 /*
1458 * We're mapping shared memory, so ensure we generate code for parallel
1459 * execution and flush old translations. This will work up to the level
1460 * supported by the host -- anything that requires EXCP_ATOMIC will not
1461 * be atomic with respect to an external process.
1462 */
1463 begin_parallel_context(cpu);
1464
1465 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
1466 FILE *f = qemu_log_trylock();
1467 if (f) {
1468 fprintf(f, "page layout changed following shmat\n");
1469 page_dump(f);
1470 qemu_log_unlock(f);
1471 }
1472 }
1473 return shmaddr;
1474 }
1475
target_shmdt(abi_ulong shmaddr)1476 abi_long target_shmdt(abi_ulong shmaddr)
1477 {
1478 abi_long rv;
1479
1480 /* shmdt pointers are always untagged */
1481
1482 WITH_MMAP_LOCK_GUARD() {
1483 abi_ulong last = shm_region_find(shmaddr);
1484 if (last == 0) {
1485 return -TARGET_EINVAL;
1486 }
1487
1488 rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1489 if (rv == 0) {
1490 abi_ulong size = last - shmaddr + 1;
1491
1492 page_set_flags(shmaddr, last, 0, PAGE_VALID);
1493 shm_region_rm_complete(shmaddr, last);
1494 mmap_reserve_or_unmap(shmaddr, size);
1495 }
1496 }
1497 return rv;
1498 }
1499