1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 if (--mmap_lock_count == 0) { 40 pthread_mutex_unlock(&mmap_mutex); 41 } 42 } 43 44 bool have_mmap_lock(void) 45 { 46 return mmap_lock_count > 0 ? true : false; 47 } 48 49 /* Grab lock to make sure things are in a consistent state after fork(). */ 50 void mmap_fork_start(void) 51 { 52 if (mmap_lock_count) 53 abort(); 54 pthread_mutex_lock(&mmap_mutex); 55 } 56 57 void mmap_fork_end(int child) 58 { 59 if (child) 60 pthread_mutex_init(&mmap_mutex, NULL); 61 else 62 pthread_mutex_unlock(&mmap_mutex); 63 } 64 65 /* 66 * Validate target prot bitmask. 67 * Return the prot bitmask for the host in *HOST_PROT. 68 * Return 0 if the target prot bitmask is invalid, otherwise 69 * the internal qemu page_flags (which will include PAGE_VALID). 70 */ 71 static int validate_prot_to_pageflags(int *host_prot, int prot) 72 { 73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 75 76 /* 77 * For the host, we need not pass anything except read/write/exec. 78 * While PROT_SEM is allowed by all hosts, it is also ignored, so 79 * don't bother transforming guest bit to host bit. Any other 80 * target-specific prot bits will not be understood by the host 81 * and will need to be encoded into page_flags for qemu emulation. 82 * 83 * Pages that are executable by the guest will never be executed 84 * by the host, but the host will need to be able to read them. 85 */ 86 *host_prot = (prot & (PROT_READ | PROT_WRITE)) 87 | (prot & PROT_EXEC ? PROT_READ : 0); 88 89 #ifdef TARGET_AARCH64 90 { 91 ARMCPU *cpu = ARM_CPU(thread_cpu); 92 93 /* 94 * The PROT_BTI bit is only accepted if the cpu supports the feature. 95 * Since this is the unusual case, don't bother checking unless 96 * the bit has been requested. If set and valid, record the bit 97 * within QEMU's page_flags. 98 */ 99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 100 valid |= TARGET_PROT_BTI; 101 page_flags |= PAGE_BTI; 102 } 103 /* Similarly for the PROT_MTE bit. */ 104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 105 valid |= TARGET_PROT_MTE; 106 page_flags |= PAGE_MTE; 107 } 108 } 109 #elif defined(TARGET_HPPA) 110 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 111 #endif 112 113 return prot & ~valid ? 0 : page_flags; 114 } 115 116 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 118 { 119 abi_ulong end, host_start, host_end, addr; 120 int prot1, ret, page_flags, host_prot; 121 122 trace_target_mprotect(start, len, target_prot); 123 124 if ((start & ~TARGET_PAGE_MASK) != 0) { 125 return -TARGET_EINVAL; 126 } 127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot); 128 if (!page_flags) { 129 return -TARGET_EINVAL; 130 } 131 len = TARGET_PAGE_ALIGN(len); 132 end = start + len; 133 if (!guest_range_valid_untagged(start, len)) { 134 return -TARGET_ENOMEM; 135 } 136 if (len == 0) { 137 return 0; 138 } 139 140 mmap_lock(); 141 host_start = start & qemu_host_page_mask; 142 host_end = HOST_PAGE_ALIGN(end); 143 if (start > host_start) { 144 /* handle host page containing start */ 145 prot1 = host_prot; 146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { 147 prot1 |= page_get_flags(addr); 148 } 149 if (host_end == host_start + qemu_host_page_size) { 150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 151 prot1 |= page_get_flags(addr); 152 } 153 end = host_end; 154 } 155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size, 156 prot1 & PAGE_BITS); 157 if (ret != 0) { 158 goto error; 159 } 160 host_start += qemu_host_page_size; 161 } 162 if (end < host_end) { 163 prot1 = host_prot; 164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 165 prot1 |= page_get_flags(addr); 166 } 167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size), 168 qemu_host_page_size, prot1 & PAGE_BITS); 169 if (ret != 0) { 170 goto error; 171 } 172 host_end -= qemu_host_page_size; 173 } 174 175 /* handle the pages in the middle */ 176 if (host_start < host_end) { 177 ret = mprotect(g2h_untagged(host_start), 178 host_end - host_start, host_prot); 179 if (ret != 0) { 180 goto error; 181 } 182 } 183 184 page_set_flags(start, start + len, page_flags); 185 ret = 0; 186 187 error: 188 mmap_unlock(); 189 return ret; 190 } 191 192 /* map an incomplete host page */ 193 static int mmap_frag(abi_ulong real_start, 194 abi_ulong start, abi_ulong end, 195 int prot, int flags, int fd, abi_ulong offset) 196 { 197 abi_ulong real_end, addr; 198 void *host_start; 199 int prot1, prot_new; 200 201 real_end = real_start + qemu_host_page_size; 202 host_start = g2h_untagged(real_start); 203 204 /* get the protection of the target pages outside the mapping */ 205 prot1 = 0; 206 for(addr = real_start; addr < real_end; addr++) { 207 if (addr < start || addr >= end) 208 prot1 |= page_get_flags(addr); 209 } 210 211 if (prot1 == 0) { 212 /* no page was there, so we allocate one */ 213 void *p = mmap(host_start, qemu_host_page_size, prot, 214 flags | MAP_ANONYMOUS, -1, 0); 215 if (p == MAP_FAILED) 216 return -1; 217 prot1 = prot; 218 } 219 prot1 &= PAGE_BITS; 220 221 prot_new = prot | prot1; 222 if (!(flags & MAP_ANONYMOUS)) { 223 /* msync() won't work here, so we return an error if write is 224 possible while it is a shared mapping */ 225 if ((flags & MAP_TYPE) == MAP_SHARED && 226 (prot & PROT_WRITE)) 227 return -1; 228 229 /* adjust protection to be able to read */ 230 if (!(prot1 & PROT_WRITE)) 231 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE); 232 233 /* read the corresponding file data */ 234 if (pread(fd, g2h_untagged(start), end - start, offset) == -1) 235 return -1; 236 237 /* put final protection */ 238 if (prot_new != (prot1 | PROT_WRITE)) 239 mprotect(host_start, qemu_host_page_size, prot_new); 240 } else { 241 if (prot_new != prot1) { 242 mprotect(host_start, qemu_host_page_size, prot_new); 243 } 244 if (prot_new & PROT_WRITE) { 245 memset(g2h_untagged(start), 0, end - start); 246 } 247 } 248 return 0; 249 } 250 251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 252 #ifdef TARGET_AARCH64 253 # define TASK_UNMAPPED_BASE 0x5500000000 254 #else 255 # define TASK_UNMAPPED_BASE (1ul << 38) 256 #endif 257 #else 258 #ifdef TARGET_HPPA 259 # define TASK_UNMAPPED_BASE 0xfa000000 260 #else 261 # define TASK_UNMAPPED_BASE 0x40000000 262 #endif 263 #endif 264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 265 266 unsigned long last_brk; 267 268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk 269 of guest address space. */ 270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 271 abi_ulong align) 272 { 273 abi_ulong addr, end_addr, incr = qemu_host_page_size; 274 int prot; 275 bool looped = false; 276 277 if (size > reserved_va) { 278 return (abi_ulong)-1; 279 } 280 281 /* Note that start and size have already been aligned by mmap_find_vma. */ 282 283 end_addr = start + size; 284 if (start > reserved_va - size) { 285 /* Start at the top of the address space. */ 286 end_addr = ((reserved_va - size) & -align) + size; 287 looped = true; 288 } 289 290 /* Search downward from END_ADDR, checking to see if a page is in use. */ 291 addr = end_addr; 292 while (1) { 293 addr -= incr; 294 if (addr > end_addr) { 295 if (looped) { 296 /* Failure. The entire address space has been searched. */ 297 return (abi_ulong)-1; 298 } 299 /* Re-start at the top of the address space. */ 300 addr = end_addr = ((reserved_va - size) & -align) + size; 301 looped = true; 302 } else { 303 prot = page_get_flags(addr); 304 if (prot) { 305 /* Page in use. Restart below this page. */ 306 addr = end_addr = ((addr - size) & -align) + size; 307 } else if (addr && addr + size == end_addr) { 308 /* Success! All pages between ADDR and END_ADDR are free. */ 309 if (start == mmap_next_start) { 310 mmap_next_start = addr; 311 } 312 return addr; 313 } 314 } 315 } 316 } 317 318 /* 319 * Find and reserve a free memory area of size 'size'. The search 320 * starts at 'start'. 321 * It must be called with mmap_lock() held. 322 * Return -1 if error. 323 */ 324 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 325 { 326 void *ptr, *prev; 327 abi_ulong addr; 328 int wrapped, repeat; 329 330 align = MAX(align, qemu_host_page_size); 331 332 /* If 'start' == 0, then a default start address is used. */ 333 if (start == 0) { 334 start = mmap_next_start; 335 } else { 336 start &= qemu_host_page_mask; 337 } 338 start = ROUND_UP(start, align); 339 340 size = HOST_PAGE_ALIGN(size); 341 342 if (reserved_va) { 343 return mmap_find_vma_reserved(start, size, align); 344 } 345 346 addr = start; 347 wrapped = repeat = 0; 348 prev = 0; 349 350 for (;; prev = ptr) { 351 /* 352 * Reserve needed memory area to avoid a race. 353 * It should be discarded using: 354 * - mmap() with MAP_FIXED flag 355 * - mremap() with MREMAP_FIXED flag 356 * - shmat() with SHM_REMAP flag 357 */ 358 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 359 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); 360 361 /* ENOMEM, if host address space has no memory */ 362 if (ptr == MAP_FAILED) { 363 return (abi_ulong)-1; 364 } 365 366 /* Count the number of sequential returns of the same address. 367 This is used to modify the search algorithm below. */ 368 repeat = (ptr == prev ? repeat + 1 : 0); 369 370 if (h2g_valid(ptr + size - 1)) { 371 addr = h2g(ptr); 372 373 if ((addr & (align - 1)) == 0) { 374 /* Success. */ 375 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 376 mmap_next_start = addr + size; 377 } 378 return addr; 379 } 380 381 /* The address is not properly aligned for the target. */ 382 switch (repeat) { 383 case 0: 384 /* Assume the result that the kernel gave us is the 385 first with enough free space, so start again at the 386 next higher target page. */ 387 addr = ROUND_UP(addr, align); 388 break; 389 case 1: 390 /* Sometimes the kernel decides to perform the allocation 391 at the top end of memory instead. */ 392 addr &= -align; 393 break; 394 case 2: 395 /* Start over at low memory. */ 396 addr = 0; 397 break; 398 default: 399 /* Fail. This unaligned block must the last. */ 400 addr = -1; 401 break; 402 } 403 } else { 404 /* Since the result the kernel gave didn't fit, start 405 again at low memory. If any repetition, fail. */ 406 addr = (repeat ? -1 : 0); 407 } 408 409 /* Unmap and try again. */ 410 munmap(ptr, size); 411 412 /* ENOMEM if we checked the whole of the target address space. */ 413 if (addr == (abi_ulong)-1) { 414 return (abi_ulong)-1; 415 } else if (addr == 0) { 416 if (wrapped) { 417 return (abi_ulong)-1; 418 } 419 wrapped = 1; 420 /* Don't actually use 0 when wrapping, instead indicate 421 that we'd truly like an allocation in low memory. */ 422 addr = (mmap_min_addr > TARGET_PAGE_SIZE 423 ? TARGET_PAGE_ALIGN(mmap_min_addr) 424 : TARGET_PAGE_SIZE); 425 } else if (wrapped && addr >= start) { 426 return (abi_ulong)-1; 427 } 428 } 429 } 430 431 /* NOTE: all the constants are the HOST ones */ 432 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 433 int flags, int fd, abi_ulong offset) 434 { 435 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len, 436 passthrough_start = -1, passthrough_end = -1; 437 int page_flags, host_prot; 438 439 mmap_lock(); 440 trace_target_mmap(start, len, target_prot, flags, fd, offset); 441 442 if (!len) { 443 errno = EINVAL; 444 goto fail; 445 } 446 447 page_flags = validate_prot_to_pageflags(&host_prot, target_prot); 448 if (!page_flags) { 449 errno = EINVAL; 450 goto fail; 451 } 452 453 /* Also check for overflows... */ 454 len = TARGET_PAGE_ALIGN(len); 455 if (!len) { 456 errno = ENOMEM; 457 goto fail; 458 } 459 460 if (offset & ~TARGET_PAGE_MASK) { 461 errno = EINVAL; 462 goto fail; 463 } 464 465 /* 466 * If we're mapping shared memory, ensure we generate code for parallel 467 * execution and flush old translations. This will work up to the level 468 * supported by the host -- anything that requires EXCP_ATOMIC will not 469 * be atomic with respect to an external process. 470 */ 471 if (flags & MAP_SHARED) { 472 CPUState *cpu = thread_cpu; 473 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 474 cpu->tcg_cflags |= CF_PARALLEL; 475 tb_flush(cpu); 476 } 477 } 478 479 real_start = start & qemu_host_page_mask; 480 host_offset = offset & qemu_host_page_mask; 481 482 /* If the user is asking for the kernel to find a location, do that 483 before we truncate the length for mapping files below. */ 484 if (!(flags & MAP_FIXED)) { 485 host_len = len + offset - host_offset; 486 host_len = HOST_PAGE_ALIGN(host_len); 487 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 488 if (start == (abi_ulong)-1) { 489 errno = ENOMEM; 490 goto fail; 491 } 492 } 493 494 /* When mapping files into a memory area larger than the file, accesses 495 to pages beyond the file size will cause a SIGBUS. 496 497 For example, if mmaping a file of 100 bytes on a host with 4K pages 498 emulating a target with 8K pages, the target expects to be able to 499 access the first 8K. But the host will trap us on any access beyond 500 4K. 501 502 When emulating a target with a larger page-size than the hosts, we 503 may need to truncate file maps at EOF and add extra anonymous pages 504 up to the targets page boundary. */ 505 506 if ((qemu_real_host_page_size() < qemu_host_page_size) && 507 !(flags & MAP_ANONYMOUS)) { 508 struct stat sb; 509 510 if (fstat (fd, &sb) == -1) 511 goto fail; 512 513 /* Are we trying to create a map beyond EOF?. */ 514 if (offset + len > sb.st_size) { 515 /* If so, truncate the file map at eof aligned with 516 the hosts real pagesize. Additional anonymous maps 517 will be created beyond EOF. */ 518 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 519 } 520 } 521 522 if (!(flags & MAP_FIXED)) { 523 unsigned long host_start; 524 void *p; 525 526 host_len = len + offset - host_offset; 527 host_len = HOST_PAGE_ALIGN(host_len); 528 529 /* Note: we prefer to control the mapping address. It is 530 especially important if qemu_host_page_size > 531 qemu_real_host_page_size */ 532 p = mmap(g2h_untagged(start), host_len, host_prot, 533 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 534 if (p == MAP_FAILED) { 535 goto fail; 536 } 537 /* update start so that it points to the file position at 'offset' */ 538 host_start = (unsigned long)p; 539 if (!(flags & MAP_ANONYMOUS)) { 540 p = mmap(g2h_untagged(start), len, host_prot, 541 flags | MAP_FIXED, fd, host_offset); 542 if (p == MAP_FAILED) { 543 munmap(g2h_untagged(start), host_len); 544 goto fail; 545 } 546 host_start += offset - host_offset; 547 } 548 start = h2g(host_start); 549 passthrough_start = start; 550 passthrough_end = start + len; 551 } else { 552 if (start & ~TARGET_PAGE_MASK) { 553 errno = EINVAL; 554 goto fail; 555 } 556 end = start + len; 557 real_end = HOST_PAGE_ALIGN(end); 558 559 /* 560 * Test if requested memory area fits target address space 561 * It can fail only on 64-bit host with 32-bit target. 562 * On any other target/host host mmap() handles this error correctly. 563 */ 564 if (end < start || !guest_range_valid_untagged(start, len)) { 565 errno = ENOMEM; 566 goto fail; 567 } 568 569 /* worst case: we cannot map the file because the offset is not 570 aligned, so we read it */ 571 if (!(flags & MAP_ANONYMOUS) && 572 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 573 /* msync() won't work here, so we return an error if write is 574 possible while it is a shared mapping */ 575 if ((flags & MAP_TYPE) == MAP_SHARED && 576 (host_prot & PROT_WRITE)) { 577 errno = EINVAL; 578 goto fail; 579 } 580 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 581 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 582 -1, 0); 583 if (retaddr == -1) 584 goto fail; 585 if (pread(fd, g2h_untagged(start), len, offset) == -1) 586 goto fail; 587 if (!(host_prot & PROT_WRITE)) { 588 ret = target_mprotect(start, len, target_prot); 589 assert(ret == 0); 590 } 591 goto the_end; 592 } 593 594 /* handle the start of the mapping */ 595 if (start > real_start) { 596 if (real_end == real_start + qemu_host_page_size) { 597 /* one single host page */ 598 ret = mmap_frag(real_start, start, end, 599 host_prot, flags, fd, offset); 600 if (ret == -1) 601 goto fail; 602 goto the_end1; 603 } 604 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size, 605 host_prot, flags, fd, offset); 606 if (ret == -1) 607 goto fail; 608 real_start += qemu_host_page_size; 609 } 610 /* handle the end of the mapping */ 611 if (end < real_end) { 612 ret = mmap_frag(real_end - qemu_host_page_size, 613 real_end - qemu_host_page_size, end, 614 host_prot, flags, fd, 615 offset + real_end - qemu_host_page_size - start); 616 if (ret == -1) 617 goto fail; 618 real_end -= qemu_host_page_size; 619 } 620 621 /* map the middle (easier) */ 622 if (real_start < real_end) { 623 void *p; 624 unsigned long offset1; 625 if (flags & MAP_ANONYMOUS) 626 offset1 = 0; 627 else 628 offset1 = offset + real_start - start; 629 p = mmap(g2h_untagged(real_start), real_end - real_start, 630 host_prot, flags, fd, offset1); 631 if (p == MAP_FAILED) 632 goto fail; 633 passthrough_start = real_start; 634 passthrough_end = real_end; 635 } 636 } 637 the_end1: 638 if (flags & MAP_ANONYMOUS) { 639 page_flags |= PAGE_ANON; 640 } 641 page_flags |= PAGE_RESET; 642 if (passthrough_start == passthrough_end) { 643 page_set_flags(start, start + len, page_flags); 644 } else { 645 if (start < passthrough_start) { 646 page_set_flags(start, passthrough_start, page_flags); 647 } 648 page_set_flags(passthrough_start, passthrough_end, 649 page_flags | PAGE_PASSTHROUGH); 650 if (passthrough_end < start + len) { 651 page_set_flags(passthrough_end, start + len, page_flags); 652 } 653 } 654 the_end: 655 trace_target_mmap_complete(start); 656 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 657 FILE *f = qemu_log_trylock(); 658 if (f) { 659 fprintf(f, "page layout changed following mmap\n"); 660 page_dump(f); 661 qemu_log_unlock(f); 662 } 663 } 664 mmap_unlock(); 665 return start; 666 fail: 667 mmap_unlock(); 668 return -1; 669 } 670 671 static void mmap_reserve(abi_ulong start, abi_ulong size) 672 { 673 abi_ulong real_start; 674 abi_ulong real_end; 675 abi_ulong addr; 676 abi_ulong end; 677 int prot; 678 679 real_start = start & qemu_host_page_mask; 680 real_end = HOST_PAGE_ALIGN(start + size); 681 end = start + size; 682 if (start > real_start) { 683 /* handle host page containing start */ 684 prot = 0; 685 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 686 prot |= page_get_flags(addr); 687 } 688 if (real_end == real_start + qemu_host_page_size) { 689 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 690 prot |= page_get_flags(addr); 691 } 692 end = real_end; 693 } 694 if (prot != 0) 695 real_start += qemu_host_page_size; 696 } 697 if (end < real_end) { 698 prot = 0; 699 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 700 prot |= page_get_flags(addr); 701 } 702 if (prot != 0) 703 real_end -= qemu_host_page_size; 704 } 705 if (real_start != real_end) { 706 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE, 707 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, 708 -1, 0); 709 } 710 } 711 712 int target_munmap(abi_ulong start, abi_ulong len) 713 { 714 abi_ulong end, real_start, real_end, addr; 715 int prot, ret; 716 717 trace_target_munmap(start, len); 718 719 if (start & ~TARGET_PAGE_MASK) 720 return -TARGET_EINVAL; 721 len = TARGET_PAGE_ALIGN(len); 722 if (len == 0 || !guest_range_valid_untagged(start, len)) { 723 return -TARGET_EINVAL; 724 } 725 726 mmap_lock(); 727 end = start + len; 728 real_start = start & qemu_host_page_mask; 729 real_end = HOST_PAGE_ALIGN(end); 730 731 if (start > real_start) { 732 /* handle host page containing start */ 733 prot = 0; 734 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 735 prot |= page_get_flags(addr); 736 } 737 if (real_end == real_start + qemu_host_page_size) { 738 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 739 prot |= page_get_flags(addr); 740 } 741 end = real_end; 742 } 743 if (prot != 0) 744 real_start += qemu_host_page_size; 745 } 746 if (end < real_end) { 747 prot = 0; 748 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 749 prot |= page_get_flags(addr); 750 } 751 if (prot != 0) 752 real_end -= qemu_host_page_size; 753 } 754 755 ret = 0; 756 /* unmap what we can */ 757 if (real_start < real_end) { 758 if (reserved_va) { 759 mmap_reserve(real_start, real_end - real_start); 760 } else { 761 ret = munmap(g2h_untagged(real_start), real_end - real_start); 762 } 763 } 764 765 if (ret == 0) { 766 page_set_flags(start, start + len, 0); 767 } 768 mmap_unlock(); 769 return ret; 770 } 771 772 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 773 abi_ulong new_size, unsigned long flags, 774 abi_ulong new_addr) 775 { 776 int prot; 777 void *host_addr; 778 779 if (!guest_range_valid_untagged(old_addr, old_size) || 780 ((flags & MREMAP_FIXED) && 781 !guest_range_valid_untagged(new_addr, new_size)) || 782 ((flags & MREMAP_MAYMOVE) == 0 && 783 !guest_range_valid_untagged(old_addr, new_size))) { 784 errno = ENOMEM; 785 return -1; 786 } 787 788 mmap_lock(); 789 790 if (flags & MREMAP_FIXED) { 791 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 792 flags, g2h_untagged(new_addr)); 793 794 if (reserved_va && host_addr != MAP_FAILED) { 795 /* If new and old addresses overlap then the above mremap will 796 already have failed with EINVAL. */ 797 mmap_reserve(old_addr, old_size); 798 } 799 } else if (flags & MREMAP_MAYMOVE) { 800 abi_ulong mmap_start; 801 802 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 803 804 if (mmap_start == -1) { 805 errno = ENOMEM; 806 host_addr = MAP_FAILED; 807 } else { 808 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 809 flags | MREMAP_FIXED, 810 g2h_untagged(mmap_start)); 811 if (reserved_va) { 812 mmap_reserve(old_addr, old_size); 813 } 814 } 815 } else { 816 int prot = 0; 817 if (reserved_va && old_size < new_size) { 818 abi_ulong addr; 819 for (addr = old_addr + old_size; 820 addr < old_addr + new_size; 821 addr++) { 822 prot |= page_get_flags(addr); 823 } 824 } 825 if (prot == 0) { 826 host_addr = mremap(g2h_untagged(old_addr), 827 old_size, new_size, flags); 828 829 if (host_addr != MAP_FAILED) { 830 /* Check if address fits target address space */ 831 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 832 /* Revert mremap() changes */ 833 host_addr = mremap(g2h_untagged(old_addr), 834 new_size, old_size, flags); 835 errno = ENOMEM; 836 host_addr = MAP_FAILED; 837 } else if (reserved_va && old_size > new_size) { 838 mmap_reserve(old_addr + old_size, old_size - new_size); 839 } 840 } 841 } else { 842 errno = ENOMEM; 843 host_addr = MAP_FAILED; 844 } 845 } 846 847 if (host_addr == MAP_FAILED) { 848 new_addr = -1; 849 } else { 850 new_addr = h2g(host_addr); 851 prot = page_get_flags(old_addr); 852 page_set_flags(old_addr, old_addr + old_size, 0); 853 page_set_flags(new_addr, new_addr + new_size, 854 prot | PAGE_VALID | PAGE_RESET); 855 } 856 mmap_unlock(); 857 return new_addr; 858 } 859 860 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end) 861 { 862 ulong addr; 863 864 if ((start | end) & ~qemu_host_page_mask) { 865 return false; 866 } 867 868 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { 869 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) { 870 return false; 871 } 872 } 873 874 return true; 875 } 876 877 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 878 { 879 abi_ulong len, end; 880 int ret = 0; 881 882 if (start & ~TARGET_PAGE_MASK) { 883 return -TARGET_EINVAL; 884 } 885 len = TARGET_PAGE_ALIGN(len_in); 886 887 if (len_in && !len) { 888 return -TARGET_EINVAL; 889 } 890 891 end = start + len; 892 if (end < start) { 893 return -TARGET_EINVAL; 894 } 895 896 if (end == start) { 897 return 0; 898 } 899 900 if (!guest_range_valid_untagged(start, len)) { 901 return -TARGET_EINVAL; 902 } 903 904 /* Translate for some architectures which have different MADV_xxx values */ 905 switch (advice) { 906 case TARGET_MADV_DONTNEED: /* alpha */ 907 advice = MADV_DONTNEED; 908 break; 909 case TARGET_MADV_WIPEONFORK: /* parisc */ 910 advice = MADV_WIPEONFORK; 911 break; 912 case TARGET_MADV_KEEPONFORK: /* parisc */ 913 advice = MADV_KEEPONFORK; 914 break; 915 /* we do not care about the other MADV_xxx values yet */ 916 } 917 918 /* 919 * Most advice values are hints, so ignoring and returning success is ok. 920 * 921 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 922 * MADV_KEEPONFORK are not hints and need to be emulated. 923 * 924 * A straight passthrough for those may not be safe because qemu sometimes 925 * turns private file-backed mappings into anonymous mappings. 926 * can_passthrough_madvise() helps to check if a passthrough is possible by 927 * comparing mappings that are known to have the same semantics in the host 928 * and the guest. In this case passthrough is safe. 929 * 930 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 931 * return failure if not. 932 * 933 * MADV_DONTNEED is passed through as well, if possible. 934 * If passthrough isn't possible, we nevertheless (wrongly!) return 935 * success, which is broken but some userspace programs fail to work 936 * otherwise. Completely implementing such emulation is quite complicated 937 * though. 938 */ 939 mmap_lock(); 940 switch (advice) { 941 case MADV_WIPEONFORK: 942 case MADV_KEEPONFORK: 943 ret = -EINVAL; 944 /* fall through */ 945 case MADV_DONTNEED: 946 if (can_passthrough_madvise(start, end)) { 947 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 948 if ((advice == MADV_DONTNEED) && (ret == 0)) { 949 page_reset_target_data(start, start + len); 950 } 951 } 952 } 953 mmap_unlock(); 954 955 return ret; 956 } 957