1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 assert(mmap_lock_count > 0); 40 if (--mmap_lock_count == 0) { 41 pthread_mutex_unlock(&mmap_mutex); 42 } 43 } 44 45 bool have_mmap_lock(void) 46 { 47 return mmap_lock_count > 0 ? true : false; 48 } 49 50 /* Grab lock to make sure things are in a consistent state after fork(). */ 51 void mmap_fork_start(void) 52 { 53 if (mmap_lock_count) 54 abort(); 55 pthread_mutex_lock(&mmap_mutex); 56 } 57 58 void mmap_fork_end(int child) 59 { 60 if (child) { 61 pthread_mutex_init(&mmap_mutex, NULL); 62 } else { 63 pthread_mutex_unlock(&mmap_mutex); 64 } 65 } 66 67 /* 68 * Validate target prot bitmask. 69 * Return the prot bitmask for the host in *HOST_PROT. 70 * Return 0 if the target prot bitmask is invalid, otherwise 71 * the internal qemu page_flags (which will include PAGE_VALID). 72 */ 73 static int validate_prot_to_pageflags(int prot) 74 { 75 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 76 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 77 78 #ifdef TARGET_AARCH64 79 { 80 ARMCPU *cpu = ARM_CPU(thread_cpu); 81 82 /* 83 * The PROT_BTI bit is only accepted if the cpu supports the feature. 84 * Since this is the unusual case, don't bother checking unless 85 * the bit has been requested. If set and valid, record the bit 86 * within QEMU's page_flags. 87 */ 88 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 89 valid |= TARGET_PROT_BTI; 90 page_flags |= PAGE_BTI; 91 } 92 /* Similarly for the PROT_MTE bit. */ 93 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 94 valid |= TARGET_PROT_MTE; 95 page_flags |= PAGE_MTE; 96 } 97 } 98 #elif defined(TARGET_HPPA) 99 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 100 #endif 101 102 return prot & ~valid ? 0 : page_flags; 103 } 104 105 /* 106 * For the host, we need not pass anything except read/write/exec. 107 * While PROT_SEM is allowed by all hosts, it is also ignored, so 108 * don't bother transforming guest bit to host bit. Any other 109 * target-specific prot bits will not be understood by the host 110 * and will need to be encoded into page_flags for qemu emulation. 111 * 112 * Pages that are executable by the guest will never be executed 113 * by the host, but the host will need to be able to read them. 114 */ 115 static int target_to_host_prot(int prot) 116 { 117 return (prot & (PROT_READ | PROT_WRITE)) | 118 (prot & PROT_EXEC ? PROT_READ : 0); 119 } 120 121 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 123 { 124 abi_ulong starts[3]; 125 abi_ulong lens[3]; 126 int prots[3]; 127 abi_ulong host_start, host_last, last; 128 int prot1, ret, page_flags, nranges; 129 130 trace_target_mprotect(start, len, target_prot); 131 132 if ((start & ~TARGET_PAGE_MASK) != 0) { 133 return -TARGET_EINVAL; 134 } 135 page_flags = validate_prot_to_pageflags(target_prot); 136 if (!page_flags) { 137 return -TARGET_EINVAL; 138 } 139 if (len == 0) { 140 return 0; 141 } 142 len = TARGET_PAGE_ALIGN(len); 143 if (!guest_range_valid_untagged(start, len)) { 144 return -TARGET_ENOMEM; 145 } 146 147 last = start + len - 1; 148 host_start = start & qemu_host_page_mask; 149 host_last = HOST_PAGE_ALIGN(last) - 1; 150 nranges = 0; 151 152 mmap_lock(); 153 154 if (host_last - host_start < qemu_host_page_size) { 155 /* Single host page contains all guest pages: sum the prot. */ 156 prot1 = target_prot; 157 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 158 prot1 |= page_get_flags(a); 159 } 160 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 161 prot1 |= page_get_flags(a + 1); 162 } 163 starts[nranges] = host_start; 164 lens[nranges] = qemu_host_page_size; 165 prots[nranges] = prot1; 166 nranges++; 167 } else { 168 if (host_start < start) { 169 /* Host page contains more than one guest page: sum the prot. */ 170 prot1 = target_prot; 171 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 172 prot1 |= page_get_flags(a); 173 } 174 /* If the resulting sum differs, create a new range. */ 175 if (prot1 != target_prot) { 176 starts[nranges] = host_start; 177 lens[nranges] = qemu_host_page_size; 178 prots[nranges] = prot1; 179 nranges++; 180 host_start += qemu_host_page_size; 181 } 182 } 183 184 if (last < host_last) { 185 /* Host page contains more than one guest page: sum the prot. */ 186 prot1 = target_prot; 187 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 188 prot1 |= page_get_flags(a + 1); 189 } 190 /* If the resulting sum differs, create a new range. */ 191 if (prot1 != target_prot) { 192 host_last -= qemu_host_page_size; 193 starts[nranges] = host_last + 1; 194 lens[nranges] = qemu_host_page_size; 195 prots[nranges] = prot1; 196 nranges++; 197 } 198 } 199 200 /* Create a range for the middle, if any remains. */ 201 if (host_start < host_last) { 202 starts[nranges] = host_start; 203 lens[nranges] = host_last - host_start + 1; 204 prots[nranges] = target_prot; 205 nranges++; 206 } 207 } 208 209 for (int i = 0; i < nranges; ++i) { 210 ret = mprotect(g2h_untagged(starts[i]), lens[i], 211 target_to_host_prot(prots[i])); 212 if (ret != 0) { 213 goto error; 214 } 215 } 216 217 page_set_flags(start, last, page_flags); 218 ret = 0; 219 220 error: 221 mmap_unlock(); 222 return ret; 223 } 224 225 /* map an incomplete host page */ 226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 227 int prot, int flags, int fd, off_t offset) 228 { 229 abi_ulong real_last; 230 void *host_start; 231 int prot_old, prot_new; 232 int host_prot_old, host_prot_new; 233 234 if (!(flags & MAP_ANONYMOUS) 235 && (flags & MAP_TYPE) == MAP_SHARED 236 && (prot & PROT_WRITE)) { 237 /* 238 * msync() won't work with the partial page, so we return an 239 * error if write is possible while it is a shared mapping. 240 */ 241 errno = EINVAL; 242 return false; 243 } 244 245 real_last = real_start + qemu_host_page_size - 1; 246 host_start = g2h_untagged(real_start); 247 248 /* Get the protection of the target pages outside the mapping. */ 249 prot_old = 0; 250 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 251 prot_old |= page_get_flags(a); 252 } 253 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 254 prot_old |= page_get_flags(a); 255 } 256 257 if (prot_old == 0) { 258 /* 259 * Since !(prot_old & PAGE_VALID), there were no guest pages 260 * outside of the fragment we need to map. Allocate a new host 261 * page to cover, discarding whatever else may have been present. 262 */ 263 void *p = mmap(host_start, qemu_host_page_size, 264 target_to_host_prot(prot), 265 flags | MAP_ANONYMOUS, -1, 0); 266 if (p == MAP_FAILED) { 267 return false; 268 } 269 prot_old = prot; 270 } 271 prot_new = prot | prot_old; 272 273 host_prot_old = target_to_host_prot(prot_old); 274 host_prot_new = target_to_host_prot(prot_new); 275 276 /* Adjust protection to be able to write. */ 277 if (!(host_prot_old & PROT_WRITE)) { 278 host_prot_old |= PROT_WRITE; 279 mprotect(host_start, qemu_host_page_size, host_prot_old); 280 } 281 282 /* Read or zero the new guest pages. */ 283 if (flags & MAP_ANONYMOUS) { 284 memset(g2h_untagged(start), 0, last - start + 1); 285 } else { 286 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 287 return false; 288 } 289 } 290 291 /* Put final protection */ 292 if (host_prot_new != host_prot_old) { 293 mprotect(host_start, qemu_host_page_size, host_prot_new); 294 } 295 return true; 296 } 297 298 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 299 #ifdef TARGET_AARCH64 300 # define TASK_UNMAPPED_BASE 0x5500000000 301 #else 302 # define TASK_UNMAPPED_BASE (1ul << 38) 303 #endif 304 #else 305 #ifdef TARGET_HPPA 306 # define TASK_UNMAPPED_BASE 0xfa000000 307 #else 308 # define TASK_UNMAPPED_BASE 0x40000000 309 #endif 310 #endif 311 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 312 313 unsigned long last_brk; 314 315 /* 316 * Subroutine of mmap_find_vma, used when we have pre-allocated 317 * a chunk of guest address space. 318 */ 319 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 320 abi_ulong align) 321 { 322 target_ulong ret; 323 324 ret = page_find_range_empty(start, reserved_va, size, align); 325 if (ret == -1 && start > mmap_min_addr) { 326 /* Restart at the beginning of the address space. */ 327 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 328 } 329 330 return ret; 331 } 332 333 /* 334 * Find and reserve a free memory area of size 'size'. The search 335 * starts at 'start'. 336 * It must be called with mmap_lock() held. 337 * Return -1 if error. 338 */ 339 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 340 { 341 void *ptr, *prev; 342 abi_ulong addr; 343 int wrapped, repeat; 344 345 align = MAX(align, qemu_host_page_size); 346 347 /* If 'start' == 0, then a default start address is used. */ 348 if (start == 0) { 349 start = mmap_next_start; 350 } else { 351 start &= qemu_host_page_mask; 352 } 353 start = ROUND_UP(start, align); 354 355 size = HOST_PAGE_ALIGN(size); 356 357 if (reserved_va) { 358 return mmap_find_vma_reserved(start, size, align); 359 } 360 361 addr = start; 362 wrapped = repeat = 0; 363 prev = 0; 364 365 for (;; prev = ptr) { 366 /* 367 * Reserve needed memory area to avoid a race. 368 * It should be discarded using: 369 * - mmap() with MAP_FIXED flag 370 * - mremap() with MREMAP_FIXED flag 371 * - shmat() with SHM_REMAP flag 372 */ 373 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 374 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 375 376 /* ENOMEM, if host address space has no memory */ 377 if (ptr == MAP_FAILED) { 378 return (abi_ulong)-1; 379 } 380 381 /* 382 * Count the number of sequential returns of the same address. 383 * This is used to modify the search algorithm below. 384 */ 385 repeat = (ptr == prev ? repeat + 1 : 0); 386 387 if (h2g_valid(ptr + size - 1)) { 388 addr = h2g(ptr); 389 390 if ((addr & (align - 1)) == 0) { 391 /* Success. */ 392 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 393 mmap_next_start = addr + size; 394 } 395 return addr; 396 } 397 398 /* The address is not properly aligned for the target. */ 399 switch (repeat) { 400 case 0: 401 /* 402 * Assume the result that the kernel gave us is the 403 * first with enough free space, so start again at the 404 * next higher target page. 405 */ 406 addr = ROUND_UP(addr, align); 407 break; 408 case 1: 409 /* 410 * Sometimes the kernel decides to perform the allocation 411 * at the top end of memory instead. 412 */ 413 addr &= -align; 414 break; 415 case 2: 416 /* Start over at low memory. */ 417 addr = 0; 418 break; 419 default: 420 /* Fail. This unaligned block must the last. */ 421 addr = -1; 422 break; 423 } 424 } else { 425 /* 426 * Since the result the kernel gave didn't fit, start 427 * again at low memory. If any repetition, fail. 428 */ 429 addr = (repeat ? -1 : 0); 430 } 431 432 /* Unmap and try again. */ 433 munmap(ptr, size); 434 435 /* ENOMEM if we checked the whole of the target address space. */ 436 if (addr == (abi_ulong)-1) { 437 return (abi_ulong)-1; 438 } else if (addr == 0) { 439 if (wrapped) { 440 return (abi_ulong)-1; 441 } 442 wrapped = 1; 443 /* 444 * Don't actually use 0 when wrapping, instead indicate 445 * that we'd truly like an allocation in low memory. 446 */ 447 addr = (mmap_min_addr > TARGET_PAGE_SIZE 448 ? TARGET_PAGE_ALIGN(mmap_min_addr) 449 : TARGET_PAGE_SIZE); 450 } else if (wrapped && addr >= start) { 451 return (abi_ulong)-1; 452 } 453 } 454 } 455 456 /* NOTE: all the constants are the HOST ones */ 457 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 458 int flags, int fd, off_t offset) 459 { 460 abi_ulong ret, last, real_start, real_last, retaddr, host_len; 461 abi_ulong passthrough_start = -1, passthrough_last = 0; 462 int page_flags; 463 off_t host_offset; 464 465 mmap_lock(); 466 trace_target_mmap(start, len, target_prot, flags, fd, offset); 467 468 if (!len) { 469 errno = EINVAL; 470 goto fail; 471 } 472 473 page_flags = validate_prot_to_pageflags(target_prot); 474 if (!page_flags) { 475 errno = EINVAL; 476 goto fail; 477 } 478 479 /* Also check for overflows... */ 480 len = TARGET_PAGE_ALIGN(len); 481 if (!len) { 482 errno = ENOMEM; 483 goto fail; 484 } 485 486 if (offset & ~TARGET_PAGE_MASK) { 487 errno = EINVAL; 488 goto fail; 489 } 490 491 /* 492 * If we're mapping shared memory, ensure we generate code for parallel 493 * execution and flush old translations. This will work up to the level 494 * supported by the host -- anything that requires EXCP_ATOMIC will not 495 * be atomic with respect to an external process. 496 */ 497 if (flags & MAP_SHARED) { 498 CPUState *cpu = thread_cpu; 499 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 500 cpu->tcg_cflags |= CF_PARALLEL; 501 tb_flush(cpu); 502 } 503 } 504 505 real_start = start & qemu_host_page_mask; 506 host_offset = offset & qemu_host_page_mask; 507 508 /* 509 * If the user is asking for the kernel to find a location, do that 510 * before we truncate the length for mapping files below. 511 */ 512 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 513 host_len = len + offset - host_offset; 514 host_len = HOST_PAGE_ALIGN(host_len); 515 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 516 if (start == (abi_ulong)-1) { 517 errno = ENOMEM; 518 goto fail; 519 } 520 } 521 522 /* 523 * When mapping files into a memory area larger than the file, accesses 524 * to pages beyond the file size will cause a SIGBUS. 525 * 526 * For example, if mmaping a file of 100 bytes on a host with 4K pages 527 * emulating a target with 8K pages, the target expects to be able to 528 * access the first 8K. But the host will trap us on any access beyond 529 * 4K. 530 * 531 * When emulating a target with a larger page-size than the hosts, we 532 * may need to truncate file maps at EOF and add extra anonymous pages 533 * up to the targets page boundary. 534 */ 535 if ((qemu_real_host_page_size() < qemu_host_page_size) && 536 !(flags & MAP_ANONYMOUS)) { 537 struct stat sb; 538 539 if (fstat(fd, &sb) == -1) { 540 goto fail; 541 } 542 543 /* Are we trying to create a map beyond EOF?. */ 544 if (offset + len > sb.st_size) { 545 /* 546 * If so, truncate the file map at eof aligned with 547 * the hosts real pagesize. Additional anonymous maps 548 * will be created beyond EOF. 549 */ 550 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 551 } 552 } 553 554 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 555 uintptr_t host_start; 556 int host_prot; 557 void *p; 558 559 host_len = len + offset - host_offset; 560 host_len = HOST_PAGE_ALIGN(host_len); 561 host_prot = target_to_host_prot(target_prot); 562 563 /* 564 * Note: we prefer to control the mapping address. It is 565 * especially important if qemu_host_page_size > 566 * qemu_real_host_page_size. 567 */ 568 p = mmap(g2h_untagged(start), host_len, host_prot, 569 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 570 if (p == MAP_FAILED) { 571 goto fail; 572 } 573 /* update start so that it points to the file position at 'offset' */ 574 host_start = (uintptr_t)p; 575 if (!(flags & MAP_ANONYMOUS)) { 576 p = mmap(g2h_untagged(start), len, host_prot, 577 flags | MAP_FIXED, fd, host_offset); 578 if (p == MAP_FAILED) { 579 munmap(g2h_untagged(start), host_len); 580 goto fail; 581 } 582 host_start += offset - host_offset; 583 } 584 start = h2g(host_start); 585 last = start + len - 1; 586 passthrough_start = start; 587 passthrough_last = last; 588 } else { 589 if (start & ~TARGET_PAGE_MASK) { 590 errno = EINVAL; 591 goto fail; 592 } 593 last = start + len - 1; 594 real_last = HOST_PAGE_ALIGN(last) - 1; 595 596 /* 597 * Test if requested memory area fits target address space 598 * It can fail only on 64-bit host with 32-bit target. 599 * On any other target/host host mmap() handles this error correctly. 600 */ 601 if (last < start || !guest_range_valid_untagged(start, len)) { 602 errno = ENOMEM; 603 goto fail; 604 } 605 606 /* Validate that the chosen range is empty. */ 607 if ((flags & MAP_FIXED_NOREPLACE) 608 && !page_check_range_empty(start, last)) { 609 errno = EEXIST; 610 goto fail; 611 } 612 613 /* 614 * worst case: we cannot map the file because the offset is not 615 * aligned, so we read it 616 */ 617 if (!(flags & MAP_ANONYMOUS) && 618 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 619 /* 620 * msync() won't work here, so we return an error if write is 621 * possible while it is a shared mapping 622 */ 623 if ((flags & MAP_TYPE) == MAP_SHARED 624 && (target_prot & PROT_WRITE)) { 625 errno = EINVAL; 626 goto fail; 627 } 628 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 629 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 630 | MAP_PRIVATE | MAP_ANONYMOUS, 631 -1, 0); 632 if (retaddr == -1) { 633 goto fail; 634 } 635 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 636 goto fail; 637 } 638 if (!(target_prot & PROT_WRITE)) { 639 ret = target_mprotect(start, len, target_prot); 640 assert(ret == 0); 641 } 642 goto the_end; 643 } 644 645 /* handle the start of the mapping */ 646 if (start > real_start) { 647 if (real_last == real_start + qemu_host_page_size - 1) { 648 /* one single host page */ 649 if (!mmap_frag(real_start, start, last, 650 target_prot, flags, fd, offset)) { 651 goto fail; 652 } 653 goto the_end1; 654 } 655 if (!mmap_frag(real_start, start, 656 real_start + qemu_host_page_size - 1, 657 target_prot, flags, fd, offset)) { 658 goto fail; 659 } 660 real_start += qemu_host_page_size; 661 } 662 /* handle the end of the mapping */ 663 if (last < real_last) { 664 abi_ulong real_page = real_last - qemu_host_page_size + 1; 665 if (!mmap_frag(real_page, real_page, last, 666 target_prot, flags, fd, 667 offset + real_page - start)) { 668 goto fail; 669 } 670 real_last -= qemu_host_page_size; 671 } 672 673 /* map the middle (easier) */ 674 if (real_start < real_last) { 675 void *p; 676 off_t offset1; 677 678 if (flags & MAP_ANONYMOUS) { 679 offset1 = 0; 680 } else { 681 offset1 = offset + real_start - start; 682 } 683 p = mmap(g2h_untagged(real_start), real_last - real_start + 1, 684 target_to_host_prot(target_prot), flags, fd, offset1); 685 if (p == MAP_FAILED) { 686 goto fail; 687 } 688 passthrough_start = real_start; 689 passthrough_last = real_last; 690 } 691 } 692 the_end1: 693 if (flags & MAP_ANONYMOUS) { 694 page_flags |= PAGE_ANON; 695 } 696 page_flags |= PAGE_RESET; 697 if (passthrough_start > passthrough_last) { 698 page_set_flags(start, last, page_flags); 699 } else { 700 if (start < passthrough_start) { 701 page_set_flags(start, passthrough_start - 1, page_flags); 702 } 703 page_set_flags(passthrough_start, passthrough_last, 704 page_flags | PAGE_PASSTHROUGH); 705 if (passthrough_last < last) { 706 page_set_flags(passthrough_last + 1, last, page_flags); 707 } 708 } 709 the_end: 710 trace_target_mmap_complete(start); 711 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 712 FILE *f = qemu_log_trylock(); 713 if (f) { 714 fprintf(f, "page layout changed following mmap\n"); 715 page_dump(f); 716 qemu_log_unlock(f); 717 } 718 } 719 mmap_unlock(); 720 return start; 721 fail: 722 mmap_unlock(); 723 return -1; 724 } 725 726 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 727 { 728 abi_ulong real_start; 729 abi_ulong real_last; 730 abi_ulong real_len; 731 abi_ulong last; 732 abi_ulong a; 733 void *host_start; 734 int prot; 735 736 last = start + len - 1; 737 real_start = start & qemu_host_page_mask; 738 real_last = HOST_PAGE_ALIGN(last) - 1; 739 740 /* 741 * If guest pages remain on the first or last host pages, 742 * adjust the deallocation to retain those guest pages. 743 * The single page special case is required for the last page, 744 * lest real_start overflow to zero. 745 */ 746 if (real_last - real_start < qemu_host_page_size) { 747 prot = 0; 748 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 749 prot |= page_get_flags(a); 750 } 751 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 752 prot |= page_get_flags(a + 1); 753 } 754 if (prot != 0) { 755 return; 756 } 757 } else { 758 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 759 prot |= page_get_flags(a); 760 } 761 if (prot != 0) { 762 real_start += qemu_host_page_size; 763 } 764 765 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 766 prot |= page_get_flags(a + 1); 767 } 768 if (prot != 0) { 769 real_last -= qemu_host_page_size; 770 } 771 772 if (real_last < real_start) { 773 return; 774 } 775 } 776 777 real_len = real_last - real_start + 1; 778 host_start = g2h_untagged(real_start); 779 780 if (reserved_va) { 781 void *ptr = mmap(host_start, real_len, PROT_NONE, 782 MAP_FIXED | MAP_ANONYMOUS 783 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 784 assert(ptr == host_start); 785 } else { 786 int ret = munmap(host_start, real_len); 787 assert(ret == 0); 788 } 789 } 790 791 int target_munmap(abi_ulong start, abi_ulong len) 792 { 793 trace_target_munmap(start, len); 794 795 if (start & ~TARGET_PAGE_MASK) { 796 return -TARGET_EINVAL; 797 } 798 len = TARGET_PAGE_ALIGN(len); 799 if (len == 0 || !guest_range_valid_untagged(start, len)) { 800 return -TARGET_EINVAL; 801 } 802 803 mmap_lock(); 804 mmap_reserve_or_unmap(start, len); 805 page_set_flags(start, start + len - 1, 0); 806 mmap_unlock(); 807 808 return 0; 809 } 810 811 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 812 abi_ulong new_size, unsigned long flags, 813 abi_ulong new_addr) 814 { 815 int prot; 816 void *host_addr; 817 818 if (!guest_range_valid_untagged(old_addr, old_size) || 819 ((flags & MREMAP_FIXED) && 820 !guest_range_valid_untagged(new_addr, new_size)) || 821 ((flags & MREMAP_MAYMOVE) == 0 && 822 !guest_range_valid_untagged(old_addr, new_size))) { 823 errno = ENOMEM; 824 return -1; 825 } 826 827 mmap_lock(); 828 829 if (flags & MREMAP_FIXED) { 830 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 831 flags, g2h_untagged(new_addr)); 832 833 if (reserved_va && host_addr != MAP_FAILED) { 834 /* 835 * If new and old addresses overlap then the above mremap will 836 * already have failed with EINVAL. 837 */ 838 mmap_reserve_or_unmap(old_addr, old_size); 839 } 840 } else if (flags & MREMAP_MAYMOVE) { 841 abi_ulong mmap_start; 842 843 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 844 845 if (mmap_start == -1) { 846 errno = ENOMEM; 847 host_addr = MAP_FAILED; 848 } else { 849 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 850 flags | MREMAP_FIXED, 851 g2h_untagged(mmap_start)); 852 if (reserved_va) { 853 mmap_reserve_or_unmap(old_addr, old_size); 854 } 855 } 856 } else { 857 int prot = 0; 858 if (reserved_va && old_size < new_size) { 859 abi_ulong addr; 860 for (addr = old_addr + old_size; 861 addr < old_addr + new_size; 862 addr++) { 863 prot |= page_get_flags(addr); 864 } 865 } 866 if (prot == 0) { 867 host_addr = mremap(g2h_untagged(old_addr), 868 old_size, new_size, flags); 869 870 if (host_addr != MAP_FAILED) { 871 /* Check if address fits target address space */ 872 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 873 /* Revert mremap() changes */ 874 host_addr = mremap(g2h_untagged(old_addr), 875 new_size, old_size, flags); 876 errno = ENOMEM; 877 host_addr = MAP_FAILED; 878 } else if (reserved_va && old_size > new_size) { 879 mmap_reserve_or_unmap(old_addr + old_size, 880 old_size - new_size); 881 } 882 } 883 } else { 884 errno = ENOMEM; 885 host_addr = MAP_FAILED; 886 } 887 } 888 889 if (host_addr == MAP_FAILED) { 890 new_addr = -1; 891 } else { 892 new_addr = h2g(host_addr); 893 prot = page_get_flags(old_addr); 894 page_set_flags(old_addr, old_addr + old_size - 1, 0); 895 page_set_flags(new_addr, new_addr + new_size - 1, 896 prot | PAGE_VALID | PAGE_RESET); 897 } 898 mmap_unlock(); 899 return new_addr; 900 } 901 902 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 903 { 904 abi_ulong len; 905 int ret = 0; 906 907 if (start & ~TARGET_PAGE_MASK) { 908 return -TARGET_EINVAL; 909 } 910 if (len_in == 0) { 911 return 0; 912 } 913 len = TARGET_PAGE_ALIGN(len_in); 914 if (len == 0 || !guest_range_valid_untagged(start, len)) { 915 return -TARGET_EINVAL; 916 } 917 918 /* Translate for some architectures which have different MADV_xxx values */ 919 switch (advice) { 920 case TARGET_MADV_DONTNEED: /* alpha */ 921 advice = MADV_DONTNEED; 922 break; 923 case TARGET_MADV_WIPEONFORK: /* parisc */ 924 advice = MADV_WIPEONFORK; 925 break; 926 case TARGET_MADV_KEEPONFORK: /* parisc */ 927 advice = MADV_KEEPONFORK; 928 break; 929 /* we do not care about the other MADV_xxx values yet */ 930 } 931 932 /* 933 * Most advice values are hints, so ignoring and returning success is ok. 934 * 935 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 936 * MADV_KEEPONFORK are not hints and need to be emulated. 937 * 938 * A straight passthrough for those may not be safe because qemu sometimes 939 * turns private file-backed mappings into anonymous mappings. 940 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 941 * same semantics for the host as for the guest. 942 * 943 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 944 * return failure if not. 945 * 946 * MADV_DONTNEED is passed through as well, if possible. 947 * If passthrough isn't possible, we nevertheless (wrongly!) return 948 * success, which is broken but some userspace programs fail to work 949 * otherwise. Completely implementing such emulation is quite complicated 950 * though. 951 */ 952 mmap_lock(); 953 switch (advice) { 954 case MADV_WIPEONFORK: 955 case MADV_KEEPONFORK: 956 ret = -EINVAL; 957 /* fall through */ 958 case MADV_DONTNEED: 959 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 960 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 961 if ((advice == MADV_DONTNEED) && (ret == 0)) { 962 page_reset_target_data(start, start + len - 1); 963 } 964 } 965 } 966 mmap_unlock(); 967 968 return ret; 969 } 970