1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 assert(mmap_lock_count > 0); 40 if (--mmap_lock_count == 0) { 41 pthread_mutex_unlock(&mmap_mutex); 42 } 43 } 44 45 bool have_mmap_lock(void) 46 { 47 return mmap_lock_count > 0 ? true : false; 48 } 49 50 /* Grab lock to make sure things are in a consistent state after fork(). */ 51 void mmap_fork_start(void) 52 { 53 if (mmap_lock_count) 54 abort(); 55 pthread_mutex_lock(&mmap_mutex); 56 } 57 58 void mmap_fork_end(int child) 59 { 60 if (child) { 61 pthread_mutex_init(&mmap_mutex, NULL); 62 } else { 63 pthread_mutex_unlock(&mmap_mutex); 64 } 65 } 66 67 /* 68 * Validate target prot bitmask. 69 * Return the prot bitmask for the host in *HOST_PROT. 70 * Return 0 if the target prot bitmask is invalid, otherwise 71 * the internal qemu page_flags (which will include PAGE_VALID). 72 */ 73 static int validate_prot_to_pageflags(int prot) 74 { 75 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 76 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 77 78 #ifdef TARGET_AARCH64 79 { 80 ARMCPU *cpu = ARM_CPU(thread_cpu); 81 82 /* 83 * The PROT_BTI bit is only accepted if the cpu supports the feature. 84 * Since this is the unusual case, don't bother checking unless 85 * the bit has been requested. If set and valid, record the bit 86 * within QEMU's page_flags. 87 */ 88 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 89 valid |= TARGET_PROT_BTI; 90 page_flags |= PAGE_BTI; 91 } 92 /* Similarly for the PROT_MTE bit. */ 93 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 94 valid |= TARGET_PROT_MTE; 95 page_flags |= PAGE_MTE; 96 } 97 } 98 #elif defined(TARGET_HPPA) 99 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 100 #endif 101 102 return prot & ~valid ? 0 : page_flags; 103 } 104 105 /* 106 * For the host, we need not pass anything except read/write/exec. 107 * While PROT_SEM is allowed by all hosts, it is also ignored, so 108 * don't bother transforming guest bit to host bit. Any other 109 * target-specific prot bits will not be understood by the host 110 * and will need to be encoded into page_flags for qemu emulation. 111 * 112 * Pages that are executable by the guest will never be executed 113 * by the host, but the host will need to be able to read them. 114 */ 115 static int target_to_host_prot(int prot) 116 { 117 return (prot & (PROT_READ | PROT_WRITE)) | 118 (prot & PROT_EXEC ? PROT_READ : 0); 119 } 120 121 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 123 { 124 abi_ulong starts[3]; 125 abi_ulong lens[3]; 126 int prots[3]; 127 abi_ulong host_start, host_last, last; 128 int prot1, ret, page_flags, nranges; 129 130 trace_target_mprotect(start, len, target_prot); 131 132 if ((start & ~TARGET_PAGE_MASK) != 0) { 133 return -TARGET_EINVAL; 134 } 135 page_flags = validate_prot_to_pageflags(target_prot); 136 if (!page_flags) { 137 return -TARGET_EINVAL; 138 } 139 if (len == 0) { 140 return 0; 141 } 142 len = TARGET_PAGE_ALIGN(len); 143 if (!guest_range_valid_untagged(start, len)) { 144 return -TARGET_ENOMEM; 145 } 146 147 last = start + len - 1; 148 host_start = start & qemu_host_page_mask; 149 host_last = HOST_PAGE_ALIGN(last) - 1; 150 nranges = 0; 151 152 mmap_lock(); 153 154 if (host_last - host_start < qemu_host_page_size) { 155 /* Single host page contains all guest pages: sum the prot. */ 156 prot1 = target_prot; 157 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 158 prot1 |= page_get_flags(a); 159 } 160 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 161 prot1 |= page_get_flags(a + 1); 162 } 163 starts[nranges] = host_start; 164 lens[nranges] = qemu_host_page_size; 165 prots[nranges] = prot1; 166 nranges++; 167 } else { 168 if (host_start < start) { 169 /* Host page contains more than one guest page: sum the prot. */ 170 prot1 = target_prot; 171 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 172 prot1 |= page_get_flags(a); 173 } 174 /* If the resulting sum differs, create a new range. */ 175 if (prot1 != target_prot) { 176 starts[nranges] = host_start; 177 lens[nranges] = qemu_host_page_size; 178 prots[nranges] = prot1; 179 nranges++; 180 host_start += qemu_host_page_size; 181 } 182 } 183 184 if (last < host_last) { 185 /* Host page contains more than one guest page: sum the prot. */ 186 prot1 = target_prot; 187 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 188 prot1 |= page_get_flags(a + 1); 189 } 190 /* If the resulting sum differs, create a new range. */ 191 if (prot1 != target_prot) { 192 host_last -= qemu_host_page_size; 193 starts[nranges] = host_last + 1; 194 lens[nranges] = qemu_host_page_size; 195 prots[nranges] = prot1; 196 nranges++; 197 } 198 } 199 200 /* Create a range for the middle, if any remains. */ 201 if (host_start < host_last) { 202 starts[nranges] = host_start; 203 lens[nranges] = host_last - host_start + 1; 204 prots[nranges] = target_prot; 205 nranges++; 206 } 207 } 208 209 for (int i = 0; i < nranges; ++i) { 210 ret = mprotect(g2h_untagged(starts[i]), lens[i], 211 target_to_host_prot(prots[i])); 212 if (ret != 0) { 213 goto error; 214 } 215 } 216 217 page_set_flags(start, last, page_flags); 218 ret = 0; 219 220 error: 221 mmap_unlock(); 222 return ret; 223 } 224 225 /* map an incomplete host page */ 226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 227 int prot, int flags, int fd, off_t offset) 228 { 229 abi_ulong real_last; 230 void *host_start; 231 int prot_old, prot_new; 232 int host_prot_old, host_prot_new; 233 234 if (!(flags & MAP_ANONYMOUS) 235 && (flags & MAP_TYPE) == MAP_SHARED 236 && (prot & PROT_WRITE)) { 237 /* 238 * msync() won't work with the partial page, so we return an 239 * error if write is possible while it is a shared mapping. 240 */ 241 errno = EINVAL; 242 return false; 243 } 244 245 real_last = real_start + qemu_host_page_size - 1; 246 host_start = g2h_untagged(real_start); 247 248 /* Get the protection of the target pages outside the mapping. */ 249 prot_old = 0; 250 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 251 prot_old |= page_get_flags(a); 252 } 253 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 254 prot_old |= page_get_flags(a); 255 } 256 257 if (prot_old == 0) { 258 /* 259 * Since !(prot_old & PAGE_VALID), there were no guest pages 260 * outside of the fragment we need to map. Allocate a new host 261 * page to cover, discarding whatever else may have been present. 262 */ 263 void *p = mmap(host_start, qemu_host_page_size, 264 target_to_host_prot(prot), 265 flags | MAP_ANONYMOUS, -1, 0); 266 if (p != host_start) { 267 if (p != MAP_FAILED) { 268 munmap(p, qemu_host_page_size); 269 errno = EEXIST; 270 } 271 return false; 272 } 273 prot_old = prot; 274 } 275 prot_new = prot | prot_old; 276 277 host_prot_old = target_to_host_prot(prot_old); 278 host_prot_new = target_to_host_prot(prot_new); 279 280 /* Adjust protection to be able to write. */ 281 if (!(host_prot_old & PROT_WRITE)) { 282 host_prot_old |= PROT_WRITE; 283 mprotect(host_start, qemu_host_page_size, host_prot_old); 284 } 285 286 /* Read or zero the new guest pages. */ 287 if (flags & MAP_ANONYMOUS) { 288 memset(g2h_untagged(start), 0, last - start + 1); 289 } else { 290 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 291 return false; 292 } 293 } 294 295 /* Put final protection */ 296 if (host_prot_new != host_prot_old) { 297 mprotect(host_start, qemu_host_page_size, host_prot_new); 298 } 299 return true; 300 } 301 302 abi_ulong task_unmapped_base; 303 abi_ulong elf_et_dyn_base; 304 abi_ulong mmap_next_start; 305 306 /* 307 * Subroutine of mmap_find_vma, used when we have pre-allocated 308 * a chunk of guest address space. 309 */ 310 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 311 abi_ulong align) 312 { 313 target_ulong ret; 314 315 ret = page_find_range_empty(start, reserved_va, size, align); 316 if (ret == -1 && start > mmap_min_addr) { 317 /* Restart at the beginning of the address space. */ 318 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 319 } 320 321 return ret; 322 } 323 324 /* 325 * Find and reserve a free memory area of size 'size'. The search 326 * starts at 'start'. 327 * It must be called with mmap_lock() held. 328 * Return -1 if error. 329 */ 330 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 331 { 332 void *ptr, *prev; 333 abi_ulong addr; 334 int wrapped, repeat; 335 336 align = MAX(align, qemu_host_page_size); 337 338 /* If 'start' == 0, then a default start address is used. */ 339 if (start == 0) { 340 start = mmap_next_start; 341 } else { 342 start &= qemu_host_page_mask; 343 } 344 start = ROUND_UP(start, align); 345 346 size = HOST_PAGE_ALIGN(size); 347 348 if (reserved_va) { 349 return mmap_find_vma_reserved(start, size, align); 350 } 351 352 addr = start; 353 wrapped = repeat = 0; 354 prev = 0; 355 356 for (;; prev = ptr) { 357 /* 358 * Reserve needed memory area to avoid a race. 359 * It should be discarded using: 360 * - mmap() with MAP_FIXED flag 361 * - mremap() with MREMAP_FIXED flag 362 * - shmat() with SHM_REMAP flag 363 */ 364 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 365 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 366 367 /* ENOMEM, if host address space has no memory */ 368 if (ptr == MAP_FAILED) { 369 return (abi_ulong)-1; 370 } 371 372 /* 373 * Count the number of sequential returns of the same address. 374 * This is used to modify the search algorithm below. 375 */ 376 repeat = (ptr == prev ? repeat + 1 : 0); 377 378 if (h2g_valid(ptr + size - 1)) { 379 addr = h2g(ptr); 380 381 if ((addr & (align - 1)) == 0) { 382 /* Success. */ 383 if (start == mmap_next_start && addr >= task_unmapped_base) { 384 mmap_next_start = addr + size; 385 } 386 return addr; 387 } 388 389 /* The address is not properly aligned for the target. */ 390 switch (repeat) { 391 case 0: 392 /* 393 * Assume the result that the kernel gave us is the 394 * first with enough free space, so start again at the 395 * next higher target page. 396 */ 397 addr = ROUND_UP(addr, align); 398 break; 399 case 1: 400 /* 401 * Sometimes the kernel decides to perform the allocation 402 * at the top end of memory instead. 403 */ 404 addr &= -align; 405 break; 406 case 2: 407 /* Start over at low memory. */ 408 addr = 0; 409 break; 410 default: 411 /* Fail. This unaligned block must the last. */ 412 addr = -1; 413 break; 414 } 415 } else { 416 /* 417 * Since the result the kernel gave didn't fit, start 418 * again at low memory. If any repetition, fail. 419 */ 420 addr = (repeat ? -1 : 0); 421 } 422 423 /* Unmap and try again. */ 424 munmap(ptr, size); 425 426 /* ENOMEM if we checked the whole of the target address space. */ 427 if (addr == (abi_ulong)-1) { 428 return (abi_ulong)-1; 429 } else if (addr == 0) { 430 if (wrapped) { 431 return (abi_ulong)-1; 432 } 433 wrapped = 1; 434 /* 435 * Don't actually use 0 when wrapping, instead indicate 436 * that we'd truly like an allocation in low memory. 437 */ 438 addr = (mmap_min_addr > TARGET_PAGE_SIZE 439 ? TARGET_PAGE_ALIGN(mmap_min_addr) 440 : TARGET_PAGE_SIZE); 441 } else if (wrapped && addr >= start) { 442 return (abi_ulong)-1; 443 } 444 } 445 } 446 447 /* NOTE: all the constants are the HOST ones */ 448 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 449 int flags, int fd, off_t offset) 450 { 451 abi_ulong ret, last, real_start, real_last, retaddr, host_len; 452 abi_ulong passthrough_start = -1, passthrough_last = 0; 453 int page_flags; 454 off_t host_offset; 455 456 mmap_lock(); 457 trace_target_mmap(start, len, target_prot, flags, fd, offset); 458 459 if (!len) { 460 errno = EINVAL; 461 goto fail; 462 } 463 464 page_flags = validate_prot_to_pageflags(target_prot); 465 if (!page_flags) { 466 errno = EINVAL; 467 goto fail; 468 } 469 470 /* Also check for overflows... */ 471 len = TARGET_PAGE_ALIGN(len); 472 if (!len) { 473 errno = ENOMEM; 474 goto fail; 475 } 476 477 if (offset & ~TARGET_PAGE_MASK) { 478 errno = EINVAL; 479 goto fail; 480 } 481 482 /* 483 * If we're mapping shared memory, ensure we generate code for parallel 484 * execution and flush old translations. This will work up to the level 485 * supported by the host -- anything that requires EXCP_ATOMIC will not 486 * be atomic with respect to an external process. 487 */ 488 if (flags & MAP_SHARED) { 489 CPUState *cpu = thread_cpu; 490 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 491 cpu->tcg_cflags |= CF_PARALLEL; 492 tb_flush(cpu); 493 } 494 } 495 496 real_start = start & qemu_host_page_mask; 497 host_offset = offset & qemu_host_page_mask; 498 499 /* 500 * If the user is asking for the kernel to find a location, do that 501 * before we truncate the length for mapping files below. 502 */ 503 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 504 host_len = len + offset - host_offset; 505 host_len = HOST_PAGE_ALIGN(host_len); 506 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 507 if (start == (abi_ulong)-1) { 508 errno = ENOMEM; 509 goto fail; 510 } 511 } 512 513 /* 514 * When mapping files into a memory area larger than the file, accesses 515 * to pages beyond the file size will cause a SIGBUS. 516 * 517 * For example, if mmaping a file of 100 bytes on a host with 4K pages 518 * emulating a target with 8K pages, the target expects to be able to 519 * access the first 8K. But the host will trap us on any access beyond 520 * 4K. 521 * 522 * When emulating a target with a larger page-size than the hosts, we 523 * may need to truncate file maps at EOF and add extra anonymous pages 524 * up to the targets page boundary. 525 */ 526 if ((qemu_real_host_page_size() < qemu_host_page_size) && 527 !(flags & MAP_ANONYMOUS)) { 528 struct stat sb; 529 530 if (fstat(fd, &sb) == -1) { 531 goto fail; 532 } 533 534 /* Are we trying to create a map beyond EOF?. */ 535 if (offset + len > sb.st_size) { 536 /* 537 * If so, truncate the file map at eof aligned with 538 * the hosts real pagesize. Additional anonymous maps 539 * will be created beyond EOF. 540 */ 541 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 542 } 543 } 544 545 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 546 uintptr_t host_start; 547 int host_prot; 548 void *p; 549 550 host_len = len + offset - host_offset; 551 host_len = HOST_PAGE_ALIGN(host_len); 552 host_prot = target_to_host_prot(target_prot); 553 554 /* 555 * Note: we prefer to control the mapping address. It is 556 * especially important if qemu_host_page_size > 557 * qemu_real_host_page_size. 558 */ 559 p = mmap(g2h_untagged(start), host_len, host_prot, 560 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 561 if (p == MAP_FAILED) { 562 goto fail; 563 } 564 /* update start so that it points to the file position at 'offset' */ 565 host_start = (uintptr_t)p; 566 if (!(flags & MAP_ANONYMOUS)) { 567 p = mmap(g2h_untagged(start), len, host_prot, 568 flags | MAP_FIXED, fd, host_offset); 569 if (p == MAP_FAILED) { 570 munmap(g2h_untagged(start), host_len); 571 goto fail; 572 } 573 host_start += offset - host_offset; 574 } 575 start = h2g(host_start); 576 last = start + len - 1; 577 passthrough_start = start; 578 passthrough_last = last; 579 } else { 580 if (start & ~TARGET_PAGE_MASK) { 581 errno = EINVAL; 582 goto fail; 583 } 584 last = start + len - 1; 585 real_last = HOST_PAGE_ALIGN(last) - 1; 586 587 /* 588 * Test if requested memory area fits target address space 589 * It can fail only on 64-bit host with 32-bit target. 590 * On any other target/host host mmap() handles this error correctly. 591 */ 592 if (last < start || !guest_range_valid_untagged(start, len)) { 593 errno = ENOMEM; 594 goto fail; 595 } 596 597 if (flags & MAP_FIXED_NOREPLACE) { 598 /* Validate that the chosen range is empty. */ 599 if (!page_check_range_empty(start, last)) { 600 errno = EEXIST; 601 goto fail; 602 } 603 604 /* 605 * With reserved_va, the entire address space is mmaped in the 606 * host to ensure it isn't accidentally used for something else. 607 * We have just checked that the guest address is not mapped 608 * within the guest, but need to replace the host reservation. 609 * 610 * Without reserved_va, despite the guest address check above, 611 * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite 612 * any host address mappings. 613 */ 614 if (reserved_va) { 615 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 616 } 617 } 618 619 /* 620 * worst case: we cannot map the file because the offset is not 621 * aligned, so we read it 622 */ 623 if (!(flags & MAP_ANONYMOUS) && 624 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 625 /* 626 * msync() won't work here, so we return an error if write is 627 * possible while it is a shared mapping 628 */ 629 if ((flags & MAP_TYPE) == MAP_SHARED 630 && (target_prot & PROT_WRITE)) { 631 errno = EINVAL; 632 goto fail; 633 } 634 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 635 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 636 | MAP_PRIVATE | MAP_ANONYMOUS, 637 -1, 0); 638 if (retaddr == -1) { 639 goto fail; 640 } 641 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 642 goto fail; 643 } 644 if (!(target_prot & PROT_WRITE)) { 645 ret = target_mprotect(start, len, target_prot); 646 assert(ret == 0); 647 } 648 goto the_end; 649 } 650 651 /* handle the start of the mapping */ 652 if (start > real_start) { 653 if (real_last == real_start + qemu_host_page_size - 1) { 654 /* one single host page */ 655 if (!mmap_frag(real_start, start, last, 656 target_prot, flags, fd, offset)) { 657 goto fail; 658 } 659 goto the_end1; 660 } 661 if (!mmap_frag(real_start, start, 662 real_start + qemu_host_page_size - 1, 663 target_prot, flags, fd, offset)) { 664 goto fail; 665 } 666 real_start += qemu_host_page_size; 667 } 668 /* handle the end of the mapping */ 669 if (last < real_last) { 670 abi_ulong real_page = real_last - qemu_host_page_size + 1; 671 if (!mmap_frag(real_page, real_page, last, 672 target_prot, flags, fd, 673 offset + real_page - start)) { 674 goto fail; 675 } 676 real_last -= qemu_host_page_size; 677 } 678 679 /* map the middle (easier) */ 680 if (real_start < real_last) { 681 void *p, *want_p; 682 off_t offset1; 683 size_t len1; 684 685 if (flags & MAP_ANONYMOUS) { 686 offset1 = 0; 687 } else { 688 offset1 = offset + real_start - start; 689 } 690 len1 = real_last - real_start + 1; 691 want_p = g2h_untagged(real_start); 692 693 p = mmap(want_p, len1, target_to_host_prot(target_prot), 694 flags, fd, offset1); 695 if (p != want_p) { 696 if (p != MAP_FAILED) { 697 munmap(p, len1); 698 errno = EEXIST; 699 } 700 goto fail; 701 } 702 passthrough_start = real_start; 703 passthrough_last = real_last; 704 } 705 } 706 the_end1: 707 if (flags & MAP_ANONYMOUS) { 708 page_flags |= PAGE_ANON; 709 } 710 page_flags |= PAGE_RESET; 711 if (passthrough_start > passthrough_last) { 712 page_set_flags(start, last, page_flags); 713 } else { 714 if (start < passthrough_start) { 715 page_set_flags(start, passthrough_start - 1, page_flags); 716 } 717 page_set_flags(passthrough_start, passthrough_last, 718 page_flags | PAGE_PASSTHROUGH); 719 if (passthrough_last < last) { 720 page_set_flags(passthrough_last + 1, last, page_flags); 721 } 722 } 723 the_end: 724 trace_target_mmap_complete(start); 725 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 726 FILE *f = qemu_log_trylock(); 727 if (f) { 728 fprintf(f, "page layout changed following mmap\n"); 729 page_dump(f); 730 qemu_log_unlock(f); 731 } 732 } 733 mmap_unlock(); 734 return start; 735 fail: 736 mmap_unlock(); 737 return -1; 738 } 739 740 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 741 { 742 abi_ulong real_start; 743 abi_ulong real_last; 744 abi_ulong real_len; 745 abi_ulong last; 746 abi_ulong a; 747 void *host_start; 748 int prot; 749 750 last = start + len - 1; 751 real_start = start & qemu_host_page_mask; 752 real_last = HOST_PAGE_ALIGN(last) - 1; 753 754 /* 755 * If guest pages remain on the first or last host pages, 756 * adjust the deallocation to retain those guest pages. 757 * The single page special case is required for the last page, 758 * lest real_start overflow to zero. 759 */ 760 if (real_last - real_start < qemu_host_page_size) { 761 prot = 0; 762 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 763 prot |= page_get_flags(a); 764 } 765 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 766 prot |= page_get_flags(a + 1); 767 } 768 if (prot != 0) { 769 return; 770 } 771 } else { 772 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 773 prot |= page_get_flags(a); 774 } 775 if (prot != 0) { 776 real_start += qemu_host_page_size; 777 } 778 779 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 780 prot |= page_get_flags(a + 1); 781 } 782 if (prot != 0) { 783 real_last -= qemu_host_page_size; 784 } 785 786 if (real_last < real_start) { 787 return; 788 } 789 } 790 791 real_len = real_last - real_start + 1; 792 host_start = g2h_untagged(real_start); 793 794 if (reserved_va) { 795 void *ptr = mmap(host_start, real_len, PROT_NONE, 796 MAP_FIXED | MAP_ANONYMOUS 797 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 798 assert(ptr == host_start); 799 } else { 800 int ret = munmap(host_start, real_len); 801 assert(ret == 0); 802 } 803 } 804 805 int target_munmap(abi_ulong start, abi_ulong len) 806 { 807 trace_target_munmap(start, len); 808 809 if (start & ~TARGET_PAGE_MASK) { 810 return -TARGET_EINVAL; 811 } 812 len = TARGET_PAGE_ALIGN(len); 813 if (len == 0 || !guest_range_valid_untagged(start, len)) { 814 return -TARGET_EINVAL; 815 } 816 817 mmap_lock(); 818 mmap_reserve_or_unmap(start, len); 819 page_set_flags(start, start + len - 1, 0); 820 mmap_unlock(); 821 822 return 0; 823 } 824 825 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 826 abi_ulong new_size, unsigned long flags, 827 abi_ulong new_addr) 828 { 829 int prot; 830 void *host_addr; 831 832 if (!guest_range_valid_untagged(old_addr, old_size) || 833 ((flags & MREMAP_FIXED) && 834 !guest_range_valid_untagged(new_addr, new_size)) || 835 ((flags & MREMAP_MAYMOVE) == 0 && 836 !guest_range_valid_untagged(old_addr, new_size))) { 837 errno = ENOMEM; 838 return -1; 839 } 840 841 mmap_lock(); 842 843 if (flags & MREMAP_FIXED) { 844 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 845 flags, g2h_untagged(new_addr)); 846 847 if (reserved_va && host_addr != MAP_FAILED) { 848 /* 849 * If new and old addresses overlap then the above mremap will 850 * already have failed with EINVAL. 851 */ 852 mmap_reserve_or_unmap(old_addr, old_size); 853 } 854 } else if (flags & MREMAP_MAYMOVE) { 855 abi_ulong mmap_start; 856 857 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 858 859 if (mmap_start == -1) { 860 errno = ENOMEM; 861 host_addr = MAP_FAILED; 862 } else { 863 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 864 flags | MREMAP_FIXED, 865 g2h_untagged(mmap_start)); 866 if (reserved_va) { 867 mmap_reserve_or_unmap(old_addr, old_size); 868 } 869 } 870 } else { 871 int prot = 0; 872 if (reserved_va && old_size < new_size) { 873 abi_ulong addr; 874 for (addr = old_addr + old_size; 875 addr < old_addr + new_size; 876 addr++) { 877 prot |= page_get_flags(addr); 878 } 879 } 880 if (prot == 0) { 881 host_addr = mremap(g2h_untagged(old_addr), 882 old_size, new_size, flags); 883 884 if (host_addr != MAP_FAILED) { 885 /* Check if address fits target address space */ 886 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 887 /* Revert mremap() changes */ 888 host_addr = mremap(g2h_untagged(old_addr), 889 new_size, old_size, flags); 890 errno = ENOMEM; 891 host_addr = MAP_FAILED; 892 } else if (reserved_va && old_size > new_size) { 893 mmap_reserve_or_unmap(old_addr + old_size, 894 old_size - new_size); 895 } 896 } 897 } else { 898 errno = ENOMEM; 899 host_addr = MAP_FAILED; 900 } 901 } 902 903 if (host_addr == MAP_FAILED) { 904 new_addr = -1; 905 } else { 906 new_addr = h2g(host_addr); 907 prot = page_get_flags(old_addr); 908 page_set_flags(old_addr, old_addr + old_size - 1, 0); 909 page_set_flags(new_addr, new_addr + new_size - 1, 910 prot | PAGE_VALID | PAGE_RESET); 911 } 912 mmap_unlock(); 913 return new_addr; 914 } 915 916 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 917 { 918 abi_ulong len; 919 int ret = 0; 920 921 if (start & ~TARGET_PAGE_MASK) { 922 return -TARGET_EINVAL; 923 } 924 if (len_in == 0) { 925 return 0; 926 } 927 len = TARGET_PAGE_ALIGN(len_in); 928 if (len == 0 || !guest_range_valid_untagged(start, len)) { 929 return -TARGET_EINVAL; 930 } 931 932 /* Translate for some architectures which have different MADV_xxx values */ 933 switch (advice) { 934 case TARGET_MADV_DONTNEED: /* alpha */ 935 advice = MADV_DONTNEED; 936 break; 937 case TARGET_MADV_WIPEONFORK: /* parisc */ 938 advice = MADV_WIPEONFORK; 939 break; 940 case TARGET_MADV_KEEPONFORK: /* parisc */ 941 advice = MADV_KEEPONFORK; 942 break; 943 /* we do not care about the other MADV_xxx values yet */ 944 } 945 946 /* 947 * Most advice values are hints, so ignoring and returning success is ok. 948 * 949 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 950 * MADV_KEEPONFORK are not hints and need to be emulated. 951 * 952 * A straight passthrough for those may not be safe because qemu sometimes 953 * turns private file-backed mappings into anonymous mappings. 954 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 955 * same semantics for the host as for the guest. 956 * 957 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 958 * return failure if not. 959 * 960 * MADV_DONTNEED is passed through as well, if possible. 961 * If passthrough isn't possible, we nevertheless (wrongly!) return 962 * success, which is broken but some userspace programs fail to work 963 * otherwise. Completely implementing such emulation is quite complicated 964 * though. 965 */ 966 mmap_lock(); 967 switch (advice) { 968 case MADV_WIPEONFORK: 969 case MADV_KEEPONFORK: 970 ret = -EINVAL; 971 /* fall through */ 972 case MADV_DONTNEED: 973 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 974 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 975 if ((advice == MADV_DONTNEED) && (ret == 0)) { 976 page_reset_target_data(start, start + len - 1); 977 } 978 } 979 } 980 mmap_unlock(); 981 982 return ret; 983 } 984