1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 if (--mmap_lock_count == 0) { 40 pthread_mutex_unlock(&mmap_mutex); 41 } 42 } 43 44 bool have_mmap_lock(void) 45 { 46 return mmap_lock_count > 0 ? true : false; 47 } 48 49 /* Grab lock to make sure things are in a consistent state after fork(). */ 50 void mmap_fork_start(void) 51 { 52 if (mmap_lock_count) 53 abort(); 54 pthread_mutex_lock(&mmap_mutex); 55 } 56 57 void mmap_fork_end(int child) 58 { 59 if (child) 60 pthread_mutex_init(&mmap_mutex, NULL); 61 else 62 pthread_mutex_unlock(&mmap_mutex); 63 } 64 65 /* 66 * Validate target prot bitmask. 67 * Return the prot bitmask for the host in *HOST_PROT. 68 * Return 0 if the target prot bitmask is invalid, otherwise 69 * the internal qemu page_flags (which will include PAGE_VALID). 70 */ 71 static int validate_prot_to_pageflags(int *host_prot, int prot) 72 { 73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 75 76 /* 77 * For the host, we need not pass anything except read/write/exec. 78 * While PROT_SEM is allowed by all hosts, it is also ignored, so 79 * don't bother transforming guest bit to host bit. Any other 80 * target-specific prot bits will not be understood by the host 81 * and will need to be encoded into page_flags for qemu emulation. 82 * 83 * Pages that are executable by the guest will never be executed 84 * by the host, but the host will need to be able to read them. 85 */ 86 *host_prot = (prot & (PROT_READ | PROT_WRITE)) 87 | (prot & PROT_EXEC ? PROT_READ : 0); 88 89 #ifdef TARGET_AARCH64 90 { 91 ARMCPU *cpu = ARM_CPU(thread_cpu); 92 93 /* 94 * The PROT_BTI bit is only accepted if the cpu supports the feature. 95 * Since this is the unusual case, don't bother checking unless 96 * the bit has been requested. If set and valid, record the bit 97 * within QEMU's page_flags. 98 */ 99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 100 valid |= TARGET_PROT_BTI; 101 page_flags |= PAGE_BTI; 102 } 103 /* Similarly for the PROT_MTE bit. */ 104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 105 valid |= TARGET_PROT_MTE; 106 page_flags |= PAGE_MTE; 107 } 108 } 109 #elif defined(TARGET_HPPA) 110 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 111 #endif 112 113 return prot & ~valid ? 0 : page_flags; 114 } 115 116 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 118 { 119 abi_ulong end, host_start, host_end, addr; 120 int prot1, ret, page_flags, host_prot; 121 122 trace_target_mprotect(start, len, target_prot); 123 124 if ((start & ~TARGET_PAGE_MASK) != 0) { 125 return -TARGET_EINVAL; 126 } 127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot); 128 if (!page_flags) { 129 return -TARGET_EINVAL; 130 } 131 len = TARGET_PAGE_ALIGN(len); 132 end = start + len; 133 if (!guest_range_valid_untagged(start, len)) { 134 return -TARGET_ENOMEM; 135 } 136 if (len == 0) { 137 return 0; 138 } 139 140 mmap_lock(); 141 host_start = start & qemu_host_page_mask; 142 host_end = HOST_PAGE_ALIGN(end); 143 if (start > host_start) { 144 /* handle host page containing start */ 145 prot1 = host_prot; 146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { 147 prot1 |= page_get_flags(addr); 148 } 149 if (host_end == host_start + qemu_host_page_size) { 150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 151 prot1 |= page_get_flags(addr); 152 } 153 end = host_end; 154 } 155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size, 156 prot1 & PAGE_BITS); 157 if (ret != 0) { 158 goto error; 159 } 160 host_start += qemu_host_page_size; 161 } 162 if (end < host_end) { 163 prot1 = host_prot; 164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 165 prot1 |= page_get_flags(addr); 166 } 167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size), 168 qemu_host_page_size, prot1 & PAGE_BITS); 169 if (ret != 0) { 170 goto error; 171 } 172 host_end -= qemu_host_page_size; 173 } 174 175 /* handle the pages in the middle */ 176 if (host_start < host_end) { 177 ret = mprotect(g2h_untagged(host_start), 178 host_end - host_start, host_prot); 179 if (ret != 0) { 180 goto error; 181 } 182 } 183 184 page_set_flags(start, start + len - 1, page_flags); 185 ret = 0; 186 187 error: 188 mmap_unlock(); 189 return ret; 190 } 191 192 /* map an incomplete host page */ 193 static int mmap_frag(abi_ulong real_start, 194 abi_ulong start, abi_ulong end, 195 int prot, int flags, int fd, abi_ulong offset) 196 { 197 abi_ulong real_end, addr; 198 void *host_start; 199 int prot1, prot_new; 200 201 real_end = real_start + qemu_host_page_size; 202 host_start = g2h_untagged(real_start); 203 204 /* get the protection of the target pages outside the mapping */ 205 prot1 = 0; 206 for(addr = real_start; addr < real_end; addr++) { 207 if (addr < start || addr >= end) 208 prot1 |= page_get_flags(addr); 209 } 210 211 if (prot1 == 0) { 212 /* no page was there, so we allocate one */ 213 void *p = mmap(host_start, qemu_host_page_size, prot, 214 flags | MAP_ANONYMOUS, -1, 0); 215 if (p == MAP_FAILED) 216 return -1; 217 prot1 = prot; 218 } 219 prot1 &= PAGE_BITS; 220 221 prot_new = prot | prot1; 222 if (!(flags & MAP_ANONYMOUS)) { 223 /* msync() won't work here, so we return an error if write is 224 possible while it is a shared mapping */ 225 if ((flags & MAP_TYPE) == MAP_SHARED && 226 (prot & PROT_WRITE)) 227 return -1; 228 229 /* adjust protection to be able to read */ 230 if (!(prot1 & PROT_WRITE)) 231 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE); 232 233 /* read the corresponding file data */ 234 if (pread(fd, g2h_untagged(start), end - start, offset) == -1) 235 return -1; 236 237 /* put final protection */ 238 if (prot_new != (prot1 | PROT_WRITE)) 239 mprotect(host_start, qemu_host_page_size, prot_new); 240 } else { 241 if (prot_new != prot1) { 242 mprotect(host_start, qemu_host_page_size, prot_new); 243 } 244 if (prot_new & PROT_WRITE) { 245 memset(g2h_untagged(start), 0, end - start); 246 } 247 } 248 return 0; 249 } 250 251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 252 #ifdef TARGET_AARCH64 253 # define TASK_UNMAPPED_BASE 0x5500000000 254 #else 255 # define TASK_UNMAPPED_BASE (1ul << 38) 256 #endif 257 #else 258 #ifdef TARGET_HPPA 259 # define TASK_UNMAPPED_BASE 0xfa000000 260 #else 261 # define TASK_UNMAPPED_BASE 0x40000000 262 #endif 263 #endif 264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 265 266 unsigned long last_brk; 267 268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk 269 of guest address space. */ 270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 271 abi_ulong align) 272 { 273 abi_ulong addr, end_addr, incr = qemu_host_page_size; 274 int prot; 275 bool looped = false; 276 277 if (size > reserved_va) { 278 return (abi_ulong)-1; 279 } 280 281 /* Note that start and size have already been aligned by mmap_find_vma. */ 282 283 end_addr = start + size; 284 /* 285 * Start at the top of the address space, ignoring the last page. 286 * If reserved_va == UINT32_MAX, then end_addr wraps to 0, 287 * throwing the rest of the calculations off. 288 * TODO: rewrite using last_addr instead. 289 * TODO: use the interval tree instead of probing every page. 290 */ 291 if (start > reserved_va - size) { 292 end_addr = ((reserved_va - size) & -align) + size; 293 looped = true; 294 } 295 296 /* Search downward from END_ADDR, checking to see if a page is in use. */ 297 addr = end_addr; 298 while (1) { 299 addr -= incr; 300 if (addr > end_addr) { 301 if (looped) { 302 /* Failure. The entire address space has been searched. */ 303 return (abi_ulong)-1; 304 } 305 /* Re-start at the top of the address space (see above). */ 306 addr = end_addr = ((reserved_va - size) & -align) + size; 307 looped = true; 308 } else { 309 prot = page_get_flags(addr); 310 if (prot) { 311 /* Page in use. Restart below this page. */ 312 addr = end_addr = ((addr - size) & -align) + size; 313 } else if (addr && addr + size == end_addr) { 314 /* Success! All pages between ADDR and END_ADDR are free. */ 315 if (start == mmap_next_start) { 316 mmap_next_start = addr; 317 } 318 return addr; 319 } 320 } 321 } 322 } 323 324 /* 325 * Find and reserve a free memory area of size 'size'. The search 326 * starts at 'start'. 327 * It must be called with mmap_lock() held. 328 * Return -1 if error. 329 */ 330 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 331 { 332 void *ptr, *prev; 333 abi_ulong addr; 334 int wrapped, repeat; 335 336 align = MAX(align, qemu_host_page_size); 337 338 /* If 'start' == 0, then a default start address is used. */ 339 if (start == 0) { 340 start = mmap_next_start; 341 } else { 342 start &= qemu_host_page_mask; 343 } 344 start = ROUND_UP(start, align); 345 346 size = HOST_PAGE_ALIGN(size); 347 348 if (reserved_va) { 349 return mmap_find_vma_reserved(start, size, align); 350 } 351 352 addr = start; 353 wrapped = repeat = 0; 354 prev = 0; 355 356 for (;; prev = ptr) { 357 /* 358 * Reserve needed memory area to avoid a race. 359 * It should be discarded using: 360 * - mmap() with MAP_FIXED flag 361 * - mremap() with MREMAP_FIXED flag 362 * - shmat() with SHM_REMAP flag 363 */ 364 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 365 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); 366 367 /* ENOMEM, if host address space has no memory */ 368 if (ptr == MAP_FAILED) { 369 return (abi_ulong)-1; 370 } 371 372 /* Count the number of sequential returns of the same address. 373 This is used to modify the search algorithm below. */ 374 repeat = (ptr == prev ? repeat + 1 : 0); 375 376 if (h2g_valid(ptr + size - 1)) { 377 addr = h2g(ptr); 378 379 if ((addr & (align - 1)) == 0) { 380 /* Success. */ 381 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 382 mmap_next_start = addr + size; 383 } 384 return addr; 385 } 386 387 /* The address is not properly aligned for the target. */ 388 switch (repeat) { 389 case 0: 390 /* Assume the result that the kernel gave us is the 391 first with enough free space, so start again at the 392 next higher target page. */ 393 addr = ROUND_UP(addr, align); 394 break; 395 case 1: 396 /* Sometimes the kernel decides to perform the allocation 397 at the top end of memory instead. */ 398 addr &= -align; 399 break; 400 case 2: 401 /* Start over at low memory. */ 402 addr = 0; 403 break; 404 default: 405 /* Fail. This unaligned block must the last. */ 406 addr = -1; 407 break; 408 } 409 } else { 410 /* Since the result the kernel gave didn't fit, start 411 again at low memory. If any repetition, fail. */ 412 addr = (repeat ? -1 : 0); 413 } 414 415 /* Unmap and try again. */ 416 munmap(ptr, size); 417 418 /* ENOMEM if we checked the whole of the target address space. */ 419 if (addr == (abi_ulong)-1) { 420 return (abi_ulong)-1; 421 } else if (addr == 0) { 422 if (wrapped) { 423 return (abi_ulong)-1; 424 } 425 wrapped = 1; 426 /* Don't actually use 0 when wrapping, instead indicate 427 that we'd truly like an allocation in low memory. */ 428 addr = (mmap_min_addr > TARGET_PAGE_SIZE 429 ? TARGET_PAGE_ALIGN(mmap_min_addr) 430 : TARGET_PAGE_SIZE); 431 } else if (wrapped && addr >= start) { 432 return (abi_ulong)-1; 433 } 434 } 435 } 436 437 /* NOTE: all the constants are the HOST ones */ 438 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 439 int flags, int fd, abi_ulong offset) 440 { 441 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len, 442 passthrough_start = -1, passthrough_end = -1; 443 int page_flags, host_prot; 444 445 mmap_lock(); 446 trace_target_mmap(start, len, target_prot, flags, fd, offset); 447 448 if (!len) { 449 errno = EINVAL; 450 goto fail; 451 } 452 453 page_flags = validate_prot_to_pageflags(&host_prot, target_prot); 454 if (!page_flags) { 455 errno = EINVAL; 456 goto fail; 457 } 458 459 /* Also check for overflows... */ 460 len = TARGET_PAGE_ALIGN(len); 461 if (!len) { 462 errno = ENOMEM; 463 goto fail; 464 } 465 466 if (offset & ~TARGET_PAGE_MASK) { 467 errno = EINVAL; 468 goto fail; 469 } 470 471 /* 472 * If we're mapping shared memory, ensure we generate code for parallel 473 * execution and flush old translations. This will work up to the level 474 * supported by the host -- anything that requires EXCP_ATOMIC will not 475 * be atomic with respect to an external process. 476 */ 477 if (flags & MAP_SHARED) { 478 CPUState *cpu = thread_cpu; 479 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 480 cpu->tcg_cflags |= CF_PARALLEL; 481 tb_flush(cpu); 482 } 483 } 484 485 real_start = start & qemu_host_page_mask; 486 host_offset = offset & qemu_host_page_mask; 487 488 /* If the user is asking for the kernel to find a location, do that 489 before we truncate the length for mapping files below. */ 490 if (!(flags & MAP_FIXED)) { 491 host_len = len + offset - host_offset; 492 host_len = HOST_PAGE_ALIGN(host_len); 493 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 494 if (start == (abi_ulong)-1) { 495 errno = ENOMEM; 496 goto fail; 497 } 498 } 499 500 /* When mapping files into a memory area larger than the file, accesses 501 to pages beyond the file size will cause a SIGBUS. 502 503 For example, if mmaping a file of 100 bytes on a host with 4K pages 504 emulating a target with 8K pages, the target expects to be able to 505 access the first 8K. But the host will trap us on any access beyond 506 4K. 507 508 When emulating a target with a larger page-size than the hosts, we 509 may need to truncate file maps at EOF and add extra anonymous pages 510 up to the targets page boundary. */ 511 512 if ((qemu_real_host_page_size() < qemu_host_page_size) && 513 !(flags & MAP_ANONYMOUS)) { 514 struct stat sb; 515 516 if (fstat (fd, &sb) == -1) 517 goto fail; 518 519 /* Are we trying to create a map beyond EOF?. */ 520 if (offset + len > sb.st_size) { 521 /* If so, truncate the file map at eof aligned with 522 the hosts real pagesize. Additional anonymous maps 523 will be created beyond EOF. */ 524 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 525 } 526 } 527 528 if (!(flags & MAP_FIXED)) { 529 unsigned long host_start; 530 void *p; 531 532 host_len = len + offset - host_offset; 533 host_len = HOST_PAGE_ALIGN(host_len); 534 535 /* Note: we prefer to control the mapping address. It is 536 especially important if qemu_host_page_size > 537 qemu_real_host_page_size */ 538 p = mmap(g2h_untagged(start), host_len, host_prot, 539 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 540 if (p == MAP_FAILED) { 541 goto fail; 542 } 543 /* update start so that it points to the file position at 'offset' */ 544 host_start = (unsigned long)p; 545 if (!(flags & MAP_ANONYMOUS)) { 546 p = mmap(g2h_untagged(start), len, host_prot, 547 flags | MAP_FIXED, fd, host_offset); 548 if (p == MAP_FAILED) { 549 munmap(g2h_untagged(start), host_len); 550 goto fail; 551 } 552 host_start += offset - host_offset; 553 } 554 start = h2g(host_start); 555 passthrough_start = start; 556 passthrough_end = start + len; 557 } else { 558 if (start & ~TARGET_PAGE_MASK) { 559 errno = EINVAL; 560 goto fail; 561 } 562 end = start + len; 563 real_end = HOST_PAGE_ALIGN(end); 564 565 /* 566 * Test if requested memory area fits target address space 567 * It can fail only on 64-bit host with 32-bit target. 568 * On any other target/host host mmap() handles this error correctly. 569 */ 570 if (end < start || !guest_range_valid_untagged(start, len)) { 571 errno = ENOMEM; 572 goto fail; 573 } 574 575 /* worst case: we cannot map the file because the offset is not 576 aligned, so we read it */ 577 if (!(flags & MAP_ANONYMOUS) && 578 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 579 /* msync() won't work here, so we return an error if write is 580 possible while it is a shared mapping */ 581 if ((flags & MAP_TYPE) == MAP_SHARED && 582 (host_prot & PROT_WRITE)) { 583 errno = EINVAL; 584 goto fail; 585 } 586 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 587 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 588 -1, 0); 589 if (retaddr == -1) 590 goto fail; 591 if (pread(fd, g2h_untagged(start), len, offset) == -1) 592 goto fail; 593 if (!(host_prot & PROT_WRITE)) { 594 ret = target_mprotect(start, len, target_prot); 595 assert(ret == 0); 596 } 597 goto the_end; 598 } 599 600 /* handle the start of the mapping */ 601 if (start > real_start) { 602 if (real_end == real_start + qemu_host_page_size) { 603 /* one single host page */ 604 ret = mmap_frag(real_start, start, end, 605 host_prot, flags, fd, offset); 606 if (ret == -1) 607 goto fail; 608 goto the_end1; 609 } 610 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size, 611 host_prot, flags, fd, offset); 612 if (ret == -1) 613 goto fail; 614 real_start += qemu_host_page_size; 615 } 616 /* handle the end of the mapping */ 617 if (end < real_end) { 618 ret = mmap_frag(real_end - qemu_host_page_size, 619 real_end - qemu_host_page_size, end, 620 host_prot, flags, fd, 621 offset + real_end - qemu_host_page_size - start); 622 if (ret == -1) 623 goto fail; 624 real_end -= qemu_host_page_size; 625 } 626 627 /* map the middle (easier) */ 628 if (real_start < real_end) { 629 void *p; 630 unsigned long offset1; 631 if (flags & MAP_ANONYMOUS) 632 offset1 = 0; 633 else 634 offset1 = offset + real_start - start; 635 p = mmap(g2h_untagged(real_start), real_end - real_start, 636 host_prot, flags, fd, offset1); 637 if (p == MAP_FAILED) 638 goto fail; 639 passthrough_start = real_start; 640 passthrough_end = real_end; 641 } 642 } 643 the_end1: 644 if (flags & MAP_ANONYMOUS) { 645 page_flags |= PAGE_ANON; 646 } 647 page_flags |= PAGE_RESET; 648 if (passthrough_start == passthrough_end) { 649 page_set_flags(start, start + len - 1, page_flags); 650 } else { 651 if (start < passthrough_start) { 652 page_set_flags(start, passthrough_start - 1, page_flags); 653 } 654 page_set_flags(passthrough_start, passthrough_end - 1, 655 page_flags | PAGE_PASSTHROUGH); 656 if (passthrough_end < start + len) { 657 page_set_flags(passthrough_end, start + len - 1, page_flags); 658 } 659 } 660 the_end: 661 trace_target_mmap_complete(start); 662 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 663 FILE *f = qemu_log_trylock(); 664 if (f) { 665 fprintf(f, "page layout changed following mmap\n"); 666 page_dump(f); 667 qemu_log_unlock(f); 668 } 669 } 670 mmap_unlock(); 671 return start; 672 fail: 673 mmap_unlock(); 674 return -1; 675 } 676 677 static void mmap_reserve(abi_ulong start, abi_ulong size) 678 { 679 abi_ulong real_start; 680 abi_ulong real_end; 681 abi_ulong addr; 682 abi_ulong end; 683 int prot; 684 685 real_start = start & qemu_host_page_mask; 686 real_end = HOST_PAGE_ALIGN(start + size); 687 end = start + size; 688 if (start > real_start) { 689 /* handle host page containing start */ 690 prot = 0; 691 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 692 prot |= page_get_flags(addr); 693 } 694 if (real_end == real_start + qemu_host_page_size) { 695 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 696 prot |= page_get_flags(addr); 697 } 698 end = real_end; 699 } 700 if (prot != 0) 701 real_start += qemu_host_page_size; 702 } 703 if (end < real_end) { 704 prot = 0; 705 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 706 prot |= page_get_flags(addr); 707 } 708 if (prot != 0) 709 real_end -= qemu_host_page_size; 710 } 711 if (real_start != real_end) { 712 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE, 713 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, 714 -1, 0); 715 } 716 } 717 718 int target_munmap(abi_ulong start, abi_ulong len) 719 { 720 abi_ulong end, real_start, real_end, addr; 721 int prot, ret; 722 723 trace_target_munmap(start, len); 724 725 if (start & ~TARGET_PAGE_MASK) 726 return -TARGET_EINVAL; 727 len = TARGET_PAGE_ALIGN(len); 728 if (len == 0 || !guest_range_valid_untagged(start, len)) { 729 return -TARGET_EINVAL; 730 } 731 732 mmap_lock(); 733 end = start + len; 734 real_start = start & qemu_host_page_mask; 735 real_end = HOST_PAGE_ALIGN(end); 736 737 if (start > real_start) { 738 /* handle host page containing start */ 739 prot = 0; 740 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 741 prot |= page_get_flags(addr); 742 } 743 if (real_end == real_start + qemu_host_page_size) { 744 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 745 prot |= page_get_flags(addr); 746 } 747 end = real_end; 748 } 749 if (prot != 0) 750 real_start += qemu_host_page_size; 751 } 752 if (end < real_end) { 753 prot = 0; 754 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 755 prot |= page_get_flags(addr); 756 } 757 if (prot != 0) 758 real_end -= qemu_host_page_size; 759 } 760 761 ret = 0; 762 /* unmap what we can */ 763 if (real_start < real_end) { 764 if (reserved_va) { 765 mmap_reserve(real_start, real_end - real_start); 766 } else { 767 ret = munmap(g2h_untagged(real_start), real_end - real_start); 768 } 769 } 770 771 if (ret == 0) { 772 page_set_flags(start, start + len - 1, 0); 773 } 774 mmap_unlock(); 775 return ret; 776 } 777 778 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 779 abi_ulong new_size, unsigned long flags, 780 abi_ulong new_addr) 781 { 782 int prot; 783 void *host_addr; 784 785 if (!guest_range_valid_untagged(old_addr, old_size) || 786 ((flags & MREMAP_FIXED) && 787 !guest_range_valid_untagged(new_addr, new_size)) || 788 ((flags & MREMAP_MAYMOVE) == 0 && 789 !guest_range_valid_untagged(old_addr, new_size))) { 790 errno = ENOMEM; 791 return -1; 792 } 793 794 mmap_lock(); 795 796 if (flags & MREMAP_FIXED) { 797 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 798 flags, g2h_untagged(new_addr)); 799 800 if (reserved_va && host_addr != MAP_FAILED) { 801 /* If new and old addresses overlap then the above mremap will 802 already have failed with EINVAL. */ 803 mmap_reserve(old_addr, old_size); 804 } 805 } else if (flags & MREMAP_MAYMOVE) { 806 abi_ulong mmap_start; 807 808 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 809 810 if (mmap_start == -1) { 811 errno = ENOMEM; 812 host_addr = MAP_FAILED; 813 } else { 814 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 815 flags | MREMAP_FIXED, 816 g2h_untagged(mmap_start)); 817 if (reserved_va) { 818 mmap_reserve(old_addr, old_size); 819 } 820 } 821 } else { 822 int prot = 0; 823 if (reserved_va && old_size < new_size) { 824 abi_ulong addr; 825 for (addr = old_addr + old_size; 826 addr < old_addr + new_size; 827 addr++) { 828 prot |= page_get_flags(addr); 829 } 830 } 831 if (prot == 0) { 832 host_addr = mremap(g2h_untagged(old_addr), 833 old_size, new_size, flags); 834 835 if (host_addr != MAP_FAILED) { 836 /* Check if address fits target address space */ 837 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 838 /* Revert mremap() changes */ 839 host_addr = mremap(g2h_untagged(old_addr), 840 new_size, old_size, flags); 841 errno = ENOMEM; 842 host_addr = MAP_FAILED; 843 } else if (reserved_va && old_size > new_size) { 844 mmap_reserve(old_addr + old_size, old_size - new_size); 845 } 846 } 847 } else { 848 errno = ENOMEM; 849 host_addr = MAP_FAILED; 850 } 851 } 852 853 if (host_addr == MAP_FAILED) { 854 new_addr = -1; 855 } else { 856 new_addr = h2g(host_addr); 857 prot = page_get_flags(old_addr); 858 page_set_flags(old_addr, old_addr + old_size - 1, 0); 859 page_set_flags(new_addr, new_addr + new_size - 1, 860 prot | PAGE_VALID | PAGE_RESET); 861 } 862 mmap_unlock(); 863 return new_addr; 864 } 865 866 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end) 867 { 868 ulong addr; 869 870 if ((start | end) & ~qemu_host_page_mask) { 871 return false; 872 } 873 874 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { 875 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) { 876 return false; 877 } 878 } 879 880 return true; 881 } 882 883 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 884 { 885 abi_ulong len, end; 886 int ret = 0; 887 888 if (start & ~TARGET_PAGE_MASK) { 889 return -TARGET_EINVAL; 890 } 891 len = TARGET_PAGE_ALIGN(len_in); 892 893 if (len_in && !len) { 894 return -TARGET_EINVAL; 895 } 896 897 end = start + len; 898 if (end < start) { 899 return -TARGET_EINVAL; 900 } 901 902 if (end == start) { 903 return 0; 904 } 905 906 if (!guest_range_valid_untagged(start, len)) { 907 return -TARGET_EINVAL; 908 } 909 910 /* Translate for some architectures which have different MADV_xxx values */ 911 switch (advice) { 912 case TARGET_MADV_DONTNEED: /* alpha */ 913 advice = MADV_DONTNEED; 914 break; 915 case TARGET_MADV_WIPEONFORK: /* parisc */ 916 advice = MADV_WIPEONFORK; 917 break; 918 case TARGET_MADV_KEEPONFORK: /* parisc */ 919 advice = MADV_KEEPONFORK; 920 break; 921 /* we do not care about the other MADV_xxx values yet */ 922 } 923 924 /* 925 * Most advice values are hints, so ignoring and returning success is ok. 926 * 927 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 928 * MADV_KEEPONFORK are not hints and need to be emulated. 929 * 930 * A straight passthrough for those may not be safe because qemu sometimes 931 * turns private file-backed mappings into anonymous mappings. 932 * can_passthrough_madvise() helps to check if a passthrough is possible by 933 * comparing mappings that are known to have the same semantics in the host 934 * and the guest. In this case passthrough is safe. 935 * 936 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 937 * return failure if not. 938 * 939 * MADV_DONTNEED is passed through as well, if possible. 940 * If passthrough isn't possible, we nevertheless (wrongly!) return 941 * success, which is broken but some userspace programs fail to work 942 * otherwise. Completely implementing such emulation is quite complicated 943 * though. 944 */ 945 mmap_lock(); 946 switch (advice) { 947 case MADV_WIPEONFORK: 948 case MADV_KEEPONFORK: 949 ret = -EINVAL; 950 /* fall through */ 951 case MADV_DONTNEED: 952 if (can_passthrough_madvise(start, end)) { 953 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 954 if ((advice == MADV_DONTNEED) && (ret == 0)) { 955 page_reset_target_data(start, start + len - 1); 956 } 957 } 958 } 959 mmap_unlock(); 960 961 return ret; 962 } 963