1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 assert(mmap_lock_count > 0); 40 if (--mmap_lock_count == 0) { 41 pthread_mutex_unlock(&mmap_mutex); 42 } 43 } 44 45 bool have_mmap_lock(void) 46 { 47 return mmap_lock_count > 0 ? true : false; 48 } 49 50 /* Grab lock to make sure things are in a consistent state after fork(). */ 51 void mmap_fork_start(void) 52 { 53 if (mmap_lock_count) 54 abort(); 55 pthread_mutex_lock(&mmap_mutex); 56 } 57 58 void mmap_fork_end(int child) 59 { 60 if (child) { 61 pthread_mutex_init(&mmap_mutex, NULL); 62 } else { 63 pthread_mutex_unlock(&mmap_mutex); 64 } 65 } 66 67 /* 68 * Validate target prot bitmask. 69 * Return the prot bitmask for the host in *HOST_PROT. 70 * Return 0 if the target prot bitmask is invalid, otherwise 71 * the internal qemu page_flags (which will include PAGE_VALID). 72 */ 73 static int validate_prot_to_pageflags(int prot) 74 { 75 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 76 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 77 78 #ifdef TARGET_AARCH64 79 { 80 ARMCPU *cpu = ARM_CPU(thread_cpu); 81 82 /* 83 * The PROT_BTI bit is only accepted if the cpu supports the feature. 84 * Since this is the unusual case, don't bother checking unless 85 * the bit has been requested. If set and valid, record the bit 86 * within QEMU's page_flags. 87 */ 88 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 89 valid |= TARGET_PROT_BTI; 90 page_flags |= PAGE_BTI; 91 } 92 /* Similarly for the PROT_MTE bit. */ 93 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 94 valid |= TARGET_PROT_MTE; 95 page_flags |= PAGE_MTE; 96 } 97 } 98 #elif defined(TARGET_HPPA) 99 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 100 #endif 101 102 return prot & ~valid ? 0 : page_flags; 103 } 104 105 /* 106 * For the host, we need not pass anything except read/write/exec. 107 * While PROT_SEM is allowed by all hosts, it is also ignored, so 108 * don't bother transforming guest bit to host bit. Any other 109 * target-specific prot bits will not be understood by the host 110 * and will need to be encoded into page_flags for qemu emulation. 111 * 112 * Pages that are executable by the guest will never be executed 113 * by the host, but the host will need to be able to read them. 114 */ 115 static int target_to_host_prot(int prot) 116 { 117 return (prot & (PROT_READ | PROT_WRITE)) | 118 (prot & PROT_EXEC ? PROT_READ : 0); 119 } 120 121 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 123 { 124 abi_ulong starts[3]; 125 abi_ulong lens[3]; 126 int prots[3]; 127 abi_ulong host_start, host_last, last; 128 int prot1, ret, page_flags, nranges; 129 130 trace_target_mprotect(start, len, target_prot); 131 132 if ((start & ~TARGET_PAGE_MASK) != 0) { 133 return -TARGET_EINVAL; 134 } 135 page_flags = validate_prot_to_pageflags(target_prot); 136 if (!page_flags) { 137 return -TARGET_EINVAL; 138 } 139 if (len == 0) { 140 return 0; 141 } 142 len = TARGET_PAGE_ALIGN(len); 143 if (!guest_range_valid_untagged(start, len)) { 144 return -TARGET_ENOMEM; 145 } 146 147 last = start + len - 1; 148 host_start = start & qemu_host_page_mask; 149 host_last = HOST_PAGE_ALIGN(last) - 1; 150 nranges = 0; 151 152 mmap_lock(); 153 154 if (host_last - host_start < qemu_host_page_size) { 155 /* Single host page contains all guest pages: sum the prot. */ 156 prot1 = target_prot; 157 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 158 prot1 |= page_get_flags(a); 159 } 160 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 161 prot1 |= page_get_flags(a + 1); 162 } 163 starts[nranges] = host_start; 164 lens[nranges] = qemu_host_page_size; 165 prots[nranges] = prot1; 166 nranges++; 167 } else { 168 if (host_start < start) { 169 /* Host page contains more than one guest page: sum the prot. */ 170 prot1 = target_prot; 171 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 172 prot1 |= page_get_flags(a); 173 } 174 /* If the resulting sum differs, create a new range. */ 175 if (prot1 != target_prot) { 176 starts[nranges] = host_start; 177 lens[nranges] = qemu_host_page_size; 178 prots[nranges] = prot1; 179 nranges++; 180 host_start += qemu_host_page_size; 181 } 182 } 183 184 if (last < host_last) { 185 /* Host page contains more than one guest page: sum the prot. */ 186 prot1 = target_prot; 187 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 188 prot1 |= page_get_flags(a + 1); 189 } 190 /* If the resulting sum differs, create a new range. */ 191 if (prot1 != target_prot) { 192 host_last -= qemu_host_page_size; 193 starts[nranges] = host_last + 1; 194 lens[nranges] = qemu_host_page_size; 195 prots[nranges] = prot1; 196 nranges++; 197 } 198 } 199 200 /* Create a range for the middle, if any remains. */ 201 if (host_start < host_last) { 202 starts[nranges] = host_start; 203 lens[nranges] = host_last - host_start + 1; 204 prots[nranges] = target_prot; 205 nranges++; 206 } 207 } 208 209 for (int i = 0; i < nranges; ++i) { 210 ret = mprotect(g2h_untagged(starts[i]), lens[i], 211 target_to_host_prot(prots[i])); 212 if (ret != 0) { 213 goto error; 214 } 215 } 216 217 page_set_flags(start, last, page_flags); 218 ret = 0; 219 220 error: 221 mmap_unlock(); 222 return ret; 223 } 224 225 /* map an incomplete host page */ 226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 227 int prot, int flags, int fd, off_t offset) 228 { 229 abi_ulong real_last; 230 void *host_start; 231 int prot_old, prot_new; 232 int host_prot_old, host_prot_new; 233 234 if (!(flags & MAP_ANONYMOUS) 235 && (flags & MAP_TYPE) == MAP_SHARED 236 && (prot & PROT_WRITE)) { 237 /* 238 * msync() won't work with the partial page, so we return an 239 * error if write is possible while it is a shared mapping. 240 */ 241 errno = EINVAL; 242 return false; 243 } 244 245 real_last = real_start + qemu_host_page_size - 1; 246 host_start = g2h_untagged(real_start); 247 248 /* Get the protection of the target pages outside the mapping. */ 249 prot_old = 0; 250 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 251 prot_old |= page_get_flags(a); 252 } 253 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 254 prot_old |= page_get_flags(a); 255 } 256 257 if (prot_old == 0) { 258 /* 259 * Since !(prot_old & PAGE_VALID), there were no guest pages 260 * outside of the fragment we need to map. Allocate a new host 261 * page to cover, discarding whatever else may have been present. 262 */ 263 void *p = mmap(host_start, qemu_host_page_size, 264 target_to_host_prot(prot), 265 flags | MAP_ANONYMOUS, -1, 0); 266 if (p != host_start) { 267 if (p != MAP_FAILED) { 268 munmap(p, qemu_host_page_size); 269 errno = EEXIST; 270 } 271 return false; 272 } 273 prot_old = prot; 274 } 275 prot_new = prot | prot_old; 276 277 host_prot_old = target_to_host_prot(prot_old); 278 host_prot_new = target_to_host_prot(prot_new); 279 280 /* Adjust protection to be able to write. */ 281 if (!(host_prot_old & PROT_WRITE)) { 282 host_prot_old |= PROT_WRITE; 283 mprotect(host_start, qemu_host_page_size, host_prot_old); 284 } 285 286 /* Read or zero the new guest pages. */ 287 if (flags & MAP_ANONYMOUS) { 288 memset(g2h_untagged(start), 0, last - start + 1); 289 } else { 290 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 291 return false; 292 } 293 } 294 295 /* Put final protection */ 296 if (host_prot_new != host_prot_old) { 297 mprotect(host_start, qemu_host_page_size, host_prot_new); 298 } 299 return true; 300 } 301 302 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 303 #ifdef TARGET_AARCH64 304 # define TASK_UNMAPPED_BASE 0x5500000000 305 #else 306 # define TASK_UNMAPPED_BASE (1ul << 38) 307 #endif 308 #else 309 #ifdef TARGET_HPPA 310 # define TASK_UNMAPPED_BASE 0xfa000000 311 #else 312 # define TASK_UNMAPPED_BASE 0x40000000 313 #endif 314 #endif 315 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 316 317 /* 318 * Subroutine of mmap_find_vma, used when we have pre-allocated 319 * a chunk of guest address space. 320 */ 321 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 322 abi_ulong align) 323 { 324 target_ulong ret; 325 326 ret = page_find_range_empty(start, reserved_va, size, align); 327 if (ret == -1 && start > mmap_min_addr) { 328 /* Restart at the beginning of the address space. */ 329 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 330 } 331 332 return ret; 333 } 334 335 /* 336 * Find and reserve a free memory area of size 'size'. The search 337 * starts at 'start'. 338 * It must be called with mmap_lock() held. 339 * Return -1 if error. 340 */ 341 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 342 { 343 void *ptr, *prev; 344 abi_ulong addr; 345 int wrapped, repeat; 346 347 align = MAX(align, qemu_host_page_size); 348 349 /* If 'start' == 0, then a default start address is used. */ 350 if (start == 0) { 351 start = mmap_next_start; 352 } else { 353 start &= qemu_host_page_mask; 354 } 355 start = ROUND_UP(start, align); 356 357 size = HOST_PAGE_ALIGN(size); 358 359 if (reserved_va) { 360 return mmap_find_vma_reserved(start, size, align); 361 } 362 363 addr = start; 364 wrapped = repeat = 0; 365 prev = 0; 366 367 for (;; prev = ptr) { 368 /* 369 * Reserve needed memory area to avoid a race. 370 * It should be discarded using: 371 * - mmap() with MAP_FIXED flag 372 * - mremap() with MREMAP_FIXED flag 373 * - shmat() with SHM_REMAP flag 374 */ 375 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 376 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 377 378 /* ENOMEM, if host address space has no memory */ 379 if (ptr == MAP_FAILED) { 380 return (abi_ulong)-1; 381 } 382 383 /* 384 * Count the number of sequential returns of the same address. 385 * This is used to modify the search algorithm below. 386 */ 387 repeat = (ptr == prev ? repeat + 1 : 0); 388 389 if (h2g_valid(ptr + size - 1)) { 390 addr = h2g(ptr); 391 392 if ((addr & (align - 1)) == 0) { 393 /* Success. */ 394 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 395 mmap_next_start = addr + size; 396 } 397 return addr; 398 } 399 400 /* The address is not properly aligned for the target. */ 401 switch (repeat) { 402 case 0: 403 /* 404 * Assume the result that the kernel gave us is the 405 * first with enough free space, so start again at the 406 * next higher target page. 407 */ 408 addr = ROUND_UP(addr, align); 409 break; 410 case 1: 411 /* 412 * Sometimes the kernel decides to perform the allocation 413 * at the top end of memory instead. 414 */ 415 addr &= -align; 416 break; 417 case 2: 418 /* Start over at low memory. */ 419 addr = 0; 420 break; 421 default: 422 /* Fail. This unaligned block must the last. */ 423 addr = -1; 424 break; 425 } 426 } else { 427 /* 428 * Since the result the kernel gave didn't fit, start 429 * again at low memory. If any repetition, fail. 430 */ 431 addr = (repeat ? -1 : 0); 432 } 433 434 /* Unmap and try again. */ 435 munmap(ptr, size); 436 437 /* ENOMEM if we checked the whole of the target address space. */ 438 if (addr == (abi_ulong)-1) { 439 return (abi_ulong)-1; 440 } else if (addr == 0) { 441 if (wrapped) { 442 return (abi_ulong)-1; 443 } 444 wrapped = 1; 445 /* 446 * Don't actually use 0 when wrapping, instead indicate 447 * that we'd truly like an allocation in low memory. 448 */ 449 addr = (mmap_min_addr > TARGET_PAGE_SIZE 450 ? TARGET_PAGE_ALIGN(mmap_min_addr) 451 : TARGET_PAGE_SIZE); 452 } else if (wrapped && addr >= start) { 453 return (abi_ulong)-1; 454 } 455 } 456 } 457 458 /* NOTE: all the constants are the HOST ones */ 459 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 460 int flags, int fd, off_t offset) 461 { 462 abi_ulong ret, last, real_start, real_last, retaddr, host_len; 463 abi_ulong passthrough_start = -1, passthrough_last = 0; 464 int page_flags; 465 off_t host_offset; 466 467 mmap_lock(); 468 trace_target_mmap(start, len, target_prot, flags, fd, offset); 469 470 if (!len) { 471 errno = EINVAL; 472 goto fail; 473 } 474 475 page_flags = validate_prot_to_pageflags(target_prot); 476 if (!page_flags) { 477 errno = EINVAL; 478 goto fail; 479 } 480 481 /* Also check for overflows... */ 482 len = TARGET_PAGE_ALIGN(len); 483 if (!len) { 484 errno = ENOMEM; 485 goto fail; 486 } 487 488 if (offset & ~TARGET_PAGE_MASK) { 489 errno = EINVAL; 490 goto fail; 491 } 492 493 /* 494 * If we're mapping shared memory, ensure we generate code for parallel 495 * execution and flush old translations. This will work up to the level 496 * supported by the host -- anything that requires EXCP_ATOMIC will not 497 * be atomic with respect to an external process. 498 */ 499 if (flags & MAP_SHARED) { 500 CPUState *cpu = thread_cpu; 501 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 502 cpu->tcg_cflags |= CF_PARALLEL; 503 tb_flush(cpu); 504 } 505 } 506 507 real_start = start & qemu_host_page_mask; 508 host_offset = offset & qemu_host_page_mask; 509 510 /* 511 * If the user is asking for the kernel to find a location, do that 512 * before we truncate the length for mapping files below. 513 */ 514 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 515 host_len = len + offset - host_offset; 516 host_len = HOST_PAGE_ALIGN(host_len); 517 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 518 if (start == (abi_ulong)-1) { 519 errno = ENOMEM; 520 goto fail; 521 } 522 } 523 524 /* 525 * When mapping files into a memory area larger than the file, accesses 526 * to pages beyond the file size will cause a SIGBUS. 527 * 528 * For example, if mmaping a file of 100 bytes on a host with 4K pages 529 * emulating a target with 8K pages, the target expects to be able to 530 * access the first 8K. But the host will trap us on any access beyond 531 * 4K. 532 * 533 * When emulating a target with a larger page-size than the hosts, we 534 * may need to truncate file maps at EOF and add extra anonymous pages 535 * up to the targets page boundary. 536 */ 537 if ((qemu_real_host_page_size() < qemu_host_page_size) && 538 !(flags & MAP_ANONYMOUS)) { 539 struct stat sb; 540 541 if (fstat(fd, &sb) == -1) { 542 goto fail; 543 } 544 545 /* Are we trying to create a map beyond EOF?. */ 546 if (offset + len > sb.st_size) { 547 /* 548 * If so, truncate the file map at eof aligned with 549 * the hosts real pagesize. Additional anonymous maps 550 * will be created beyond EOF. 551 */ 552 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 553 } 554 } 555 556 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 557 uintptr_t host_start; 558 int host_prot; 559 void *p; 560 561 host_len = len + offset - host_offset; 562 host_len = HOST_PAGE_ALIGN(host_len); 563 host_prot = target_to_host_prot(target_prot); 564 565 /* 566 * Note: we prefer to control the mapping address. It is 567 * especially important if qemu_host_page_size > 568 * qemu_real_host_page_size. 569 */ 570 p = mmap(g2h_untagged(start), host_len, host_prot, 571 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 572 if (p == MAP_FAILED) { 573 goto fail; 574 } 575 /* update start so that it points to the file position at 'offset' */ 576 host_start = (uintptr_t)p; 577 if (!(flags & MAP_ANONYMOUS)) { 578 p = mmap(g2h_untagged(start), len, host_prot, 579 flags | MAP_FIXED, fd, host_offset); 580 if (p == MAP_FAILED) { 581 munmap(g2h_untagged(start), host_len); 582 goto fail; 583 } 584 host_start += offset - host_offset; 585 } 586 start = h2g(host_start); 587 last = start + len - 1; 588 passthrough_start = start; 589 passthrough_last = last; 590 } else { 591 if (start & ~TARGET_PAGE_MASK) { 592 errno = EINVAL; 593 goto fail; 594 } 595 last = start + len - 1; 596 real_last = HOST_PAGE_ALIGN(last) - 1; 597 598 /* 599 * Test if requested memory area fits target address space 600 * It can fail only on 64-bit host with 32-bit target. 601 * On any other target/host host mmap() handles this error correctly. 602 */ 603 if (last < start || !guest_range_valid_untagged(start, len)) { 604 errno = ENOMEM; 605 goto fail; 606 } 607 608 if (flags & MAP_FIXED_NOREPLACE) { 609 /* Validate that the chosen range is empty. */ 610 if (!page_check_range_empty(start, last)) { 611 errno = EEXIST; 612 goto fail; 613 } 614 615 /* 616 * With reserved_va, the entire address space is mmaped in the 617 * host to ensure it isn't accidentally used for something else. 618 * We have just checked that the guest address is not mapped 619 * within the guest, but need to replace the host reservation. 620 * 621 * Without reserved_va, despite the guest address check above, 622 * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite 623 * any host address mappings. 624 */ 625 if (reserved_va) { 626 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 627 } 628 } 629 630 /* 631 * worst case: we cannot map the file because the offset is not 632 * aligned, so we read it 633 */ 634 if (!(flags & MAP_ANONYMOUS) && 635 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 636 /* 637 * msync() won't work here, so we return an error if write is 638 * possible while it is a shared mapping 639 */ 640 if ((flags & MAP_TYPE) == MAP_SHARED 641 && (target_prot & PROT_WRITE)) { 642 errno = EINVAL; 643 goto fail; 644 } 645 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 646 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 647 | MAP_PRIVATE | MAP_ANONYMOUS, 648 -1, 0); 649 if (retaddr == -1) { 650 goto fail; 651 } 652 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 653 goto fail; 654 } 655 if (!(target_prot & PROT_WRITE)) { 656 ret = target_mprotect(start, len, target_prot); 657 assert(ret == 0); 658 } 659 goto the_end; 660 } 661 662 /* handle the start of the mapping */ 663 if (start > real_start) { 664 if (real_last == real_start + qemu_host_page_size - 1) { 665 /* one single host page */ 666 if (!mmap_frag(real_start, start, last, 667 target_prot, flags, fd, offset)) { 668 goto fail; 669 } 670 goto the_end1; 671 } 672 if (!mmap_frag(real_start, start, 673 real_start + qemu_host_page_size - 1, 674 target_prot, flags, fd, offset)) { 675 goto fail; 676 } 677 real_start += qemu_host_page_size; 678 } 679 /* handle the end of the mapping */ 680 if (last < real_last) { 681 abi_ulong real_page = real_last - qemu_host_page_size + 1; 682 if (!mmap_frag(real_page, real_page, last, 683 target_prot, flags, fd, 684 offset + real_page - start)) { 685 goto fail; 686 } 687 real_last -= qemu_host_page_size; 688 } 689 690 /* map the middle (easier) */ 691 if (real_start < real_last) { 692 void *p, *want_p; 693 off_t offset1; 694 size_t len1; 695 696 if (flags & MAP_ANONYMOUS) { 697 offset1 = 0; 698 } else { 699 offset1 = offset + real_start - start; 700 } 701 len1 = real_last - real_start + 1; 702 want_p = g2h_untagged(real_start); 703 704 p = mmap(want_p, len1, target_to_host_prot(target_prot), 705 flags, fd, offset1); 706 if (p != want_p) { 707 if (p != MAP_FAILED) { 708 munmap(p, len1); 709 errno = EEXIST; 710 } 711 goto fail; 712 } 713 passthrough_start = real_start; 714 passthrough_last = real_last; 715 } 716 } 717 the_end1: 718 if (flags & MAP_ANONYMOUS) { 719 page_flags |= PAGE_ANON; 720 } 721 page_flags |= PAGE_RESET; 722 if (passthrough_start > passthrough_last) { 723 page_set_flags(start, last, page_flags); 724 } else { 725 if (start < passthrough_start) { 726 page_set_flags(start, passthrough_start - 1, page_flags); 727 } 728 page_set_flags(passthrough_start, passthrough_last, 729 page_flags | PAGE_PASSTHROUGH); 730 if (passthrough_last < last) { 731 page_set_flags(passthrough_last + 1, last, page_flags); 732 } 733 } 734 the_end: 735 trace_target_mmap_complete(start); 736 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 737 FILE *f = qemu_log_trylock(); 738 if (f) { 739 fprintf(f, "page layout changed following mmap\n"); 740 page_dump(f); 741 qemu_log_unlock(f); 742 } 743 } 744 mmap_unlock(); 745 return start; 746 fail: 747 mmap_unlock(); 748 return -1; 749 } 750 751 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 752 { 753 abi_ulong real_start; 754 abi_ulong real_last; 755 abi_ulong real_len; 756 abi_ulong last; 757 abi_ulong a; 758 void *host_start; 759 int prot; 760 761 last = start + len - 1; 762 real_start = start & qemu_host_page_mask; 763 real_last = HOST_PAGE_ALIGN(last) - 1; 764 765 /* 766 * If guest pages remain on the first or last host pages, 767 * adjust the deallocation to retain those guest pages. 768 * The single page special case is required for the last page, 769 * lest real_start overflow to zero. 770 */ 771 if (real_last - real_start < qemu_host_page_size) { 772 prot = 0; 773 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 774 prot |= page_get_flags(a); 775 } 776 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 777 prot |= page_get_flags(a + 1); 778 } 779 if (prot != 0) { 780 return; 781 } 782 } else { 783 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 784 prot |= page_get_flags(a); 785 } 786 if (prot != 0) { 787 real_start += qemu_host_page_size; 788 } 789 790 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 791 prot |= page_get_flags(a + 1); 792 } 793 if (prot != 0) { 794 real_last -= qemu_host_page_size; 795 } 796 797 if (real_last < real_start) { 798 return; 799 } 800 } 801 802 real_len = real_last - real_start + 1; 803 host_start = g2h_untagged(real_start); 804 805 if (reserved_va) { 806 void *ptr = mmap(host_start, real_len, PROT_NONE, 807 MAP_FIXED | MAP_ANONYMOUS 808 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 809 assert(ptr == host_start); 810 } else { 811 int ret = munmap(host_start, real_len); 812 assert(ret == 0); 813 } 814 } 815 816 int target_munmap(abi_ulong start, abi_ulong len) 817 { 818 trace_target_munmap(start, len); 819 820 if (start & ~TARGET_PAGE_MASK) { 821 return -TARGET_EINVAL; 822 } 823 len = TARGET_PAGE_ALIGN(len); 824 if (len == 0 || !guest_range_valid_untagged(start, len)) { 825 return -TARGET_EINVAL; 826 } 827 828 mmap_lock(); 829 mmap_reserve_or_unmap(start, len); 830 page_set_flags(start, start + len - 1, 0); 831 mmap_unlock(); 832 833 return 0; 834 } 835 836 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 837 abi_ulong new_size, unsigned long flags, 838 abi_ulong new_addr) 839 { 840 int prot; 841 void *host_addr; 842 843 if (!guest_range_valid_untagged(old_addr, old_size) || 844 ((flags & MREMAP_FIXED) && 845 !guest_range_valid_untagged(new_addr, new_size)) || 846 ((flags & MREMAP_MAYMOVE) == 0 && 847 !guest_range_valid_untagged(old_addr, new_size))) { 848 errno = ENOMEM; 849 return -1; 850 } 851 852 mmap_lock(); 853 854 if (flags & MREMAP_FIXED) { 855 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 856 flags, g2h_untagged(new_addr)); 857 858 if (reserved_va && host_addr != MAP_FAILED) { 859 /* 860 * If new and old addresses overlap then the above mremap will 861 * already have failed with EINVAL. 862 */ 863 mmap_reserve_or_unmap(old_addr, old_size); 864 } 865 } else if (flags & MREMAP_MAYMOVE) { 866 abi_ulong mmap_start; 867 868 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 869 870 if (mmap_start == -1) { 871 errno = ENOMEM; 872 host_addr = MAP_FAILED; 873 } else { 874 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 875 flags | MREMAP_FIXED, 876 g2h_untagged(mmap_start)); 877 if (reserved_va) { 878 mmap_reserve_or_unmap(old_addr, old_size); 879 } 880 } 881 } else { 882 int prot = 0; 883 if (reserved_va && old_size < new_size) { 884 abi_ulong addr; 885 for (addr = old_addr + old_size; 886 addr < old_addr + new_size; 887 addr++) { 888 prot |= page_get_flags(addr); 889 } 890 } 891 if (prot == 0) { 892 host_addr = mremap(g2h_untagged(old_addr), 893 old_size, new_size, flags); 894 895 if (host_addr != MAP_FAILED) { 896 /* Check if address fits target address space */ 897 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 898 /* Revert mremap() changes */ 899 host_addr = mremap(g2h_untagged(old_addr), 900 new_size, old_size, flags); 901 errno = ENOMEM; 902 host_addr = MAP_FAILED; 903 } else if (reserved_va && old_size > new_size) { 904 mmap_reserve_or_unmap(old_addr + old_size, 905 old_size - new_size); 906 } 907 } 908 } else { 909 errno = ENOMEM; 910 host_addr = MAP_FAILED; 911 } 912 } 913 914 if (host_addr == MAP_FAILED) { 915 new_addr = -1; 916 } else { 917 new_addr = h2g(host_addr); 918 prot = page_get_flags(old_addr); 919 page_set_flags(old_addr, old_addr + old_size - 1, 0); 920 page_set_flags(new_addr, new_addr + new_size - 1, 921 prot | PAGE_VALID | PAGE_RESET); 922 } 923 mmap_unlock(); 924 return new_addr; 925 } 926 927 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 928 { 929 abi_ulong len; 930 int ret = 0; 931 932 if (start & ~TARGET_PAGE_MASK) { 933 return -TARGET_EINVAL; 934 } 935 if (len_in == 0) { 936 return 0; 937 } 938 len = TARGET_PAGE_ALIGN(len_in); 939 if (len == 0 || !guest_range_valid_untagged(start, len)) { 940 return -TARGET_EINVAL; 941 } 942 943 /* Translate for some architectures which have different MADV_xxx values */ 944 switch (advice) { 945 case TARGET_MADV_DONTNEED: /* alpha */ 946 advice = MADV_DONTNEED; 947 break; 948 case TARGET_MADV_WIPEONFORK: /* parisc */ 949 advice = MADV_WIPEONFORK; 950 break; 951 case TARGET_MADV_KEEPONFORK: /* parisc */ 952 advice = MADV_KEEPONFORK; 953 break; 954 /* we do not care about the other MADV_xxx values yet */ 955 } 956 957 /* 958 * Most advice values are hints, so ignoring and returning success is ok. 959 * 960 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 961 * MADV_KEEPONFORK are not hints and need to be emulated. 962 * 963 * A straight passthrough for those may not be safe because qemu sometimes 964 * turns private file-backed mappings into anonymous mappings. 965 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 966 * same semantics for the host as for the guest. 967 * 968 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 969 * return failure if not. 970 * 971 * MADV_DONTNEED is passed through as well, if possible. 972 * If passthrough isn't possible, we nevertheless (wrongly!) return 973 * success, which is broken but some userspace programs fail to work 974 * otherwise. Completely implementing such emulation is quite complicated 975 * though. 976 */ 977 mmap_lock(); 978 switch (advice) { 979 case MADV_WIPEONFORK: 980 case MADV_KEEPONFORK: 981 ret = -EINVAL; 982 /* fall through */ 983 case MADV_DONTNEED: 984 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 985 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 986 if ((advice == MADV_DONTNEED) && (ret == 0)) { 987 page_reset_target_data(start, start + len - 1); 988 } 989 } 990 } 991 mmap_unlock(); 992 993 return ret; 994 } 995