1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 24 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 25 static __thread int mmap_lock_count; 26 27 void mmap_lock(void) 28 { 29 if (mmap_lock_count++ == 0) { 30 pthread_mutex_lock(&mmap_mutex); 31 } 32 } 33 34 void mmap_unlock(void) 35 { 36 if (--mmap_lock_count == 0) { 37 pthread_mutex_unlock(&mmap_mutex); 38 } 39 } 40 41 bool have_mmap_lock(void) 42 { 43 return mmap_lock_count > 0 ? true : false; 44 } 45 46 /* Grab lock to make sure things are in a consistent state after fork(). */ 47 void mmap_fork_start(void) 48 { 49 if (mmap_lock_count) 50 abort(); 51 pthread_mutex_lock(&mmap_mutex); 52 } 53 54 void mmap_fork_end(int child) 55 { 56 if (child) 57 pthread_mutex_init(&mmap_mutex, NULL); 58 else 59 pthread_mutex_unlock(&mmap_mutex); 60 } 61 62 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 63 int target_mprotect(abi_ulong start, abi_ulong len, int prot) 64 { 65 abi_ulong end, host_start, host_end, addr; 66 int prot1, ret; 67 68 trace_target_mprotect(start, len, prot); 69 70 if ((start & ~TARGET_PAGE_MASK) != 0) 71 return -TARGET_EINVAL; 72 len = TARGET_PAGE_ALIGN(len); 73 end = start + len; 74 if (!guest_range_valid(start, len)) { 75 return -TARGET_ENOMEM; 76 } 77 prot &= PROT_READ | PROT_WRITE | PROT_EXEC; 78 if (len == 0) 79 return 0; 80 81 mmap_lock(); 82 host_start = start & qemu_host_page_mask; 83 host_end = HOST_PAGE_ALIGN(end); 84 if (start > host_start) { 85 /* handle host page containing start */ 86 prot1 = prot; 87 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { 88 prot1 |= page_get_flags(addr); 89 } 90 if (host_end == host_start + qemu_host_page_size) { 91 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 92 prot1 |= page_get_flags(addr); 93 } 94 end = host_end; 95 } 96 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS); 97 if (ret != 0) 98 goto error; 99 host_start += qemu_host_page_size; 100 } 101 if (end < host_end) { 102 prot1 = prot; 103 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 104 prot1 |= page_get_flags(addr); 105 } 106 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size, 107 prot1 & PAGE_BITS); 108 if (ret != 0) 109 goto error; 110 host_end -= qemu_host_page_size; 111 } 112 113 /* handle the pages in the middle */ 114 if (host_start < host_end) { 115 ret = mprotect(g2h(host_start), host_end - host_start, prot); 116 if (ret != 0) 117 goto error; 118 } 119 page_set_flags(start, start + len, prot | PAGE_VALID); 120 mmap_unlock(); 121 return 0; 122 error: 123 mmap_unlock(); 124 return ret; 125 } 126 127 /* map an incomplete host page */ 128 static int mmap_frag(abi_ulong real_start, 129 abi_ulong start, abi_ulong end, 130 int prot, int flags, int fd, abi_ulong offset) 131 { 132 abi_ulong real_end, addr; 133 void *host_start; 134 int prot1, prot_new; 135 136 real_end = real_start + qemu_host_page_size; 137 host_start = g2h(real_start); 138 139 /* get the protection of the target pages outside the mapping */ 140 prot1 = 0; 141 for(addr = real_start; addr < real_end; addr++) { 142 if (addr < start || addr >= end) 143 prot1 |= page_get_flags(addr); 144 } 145 146 if (prot1 == 0) { 147 /* no page was there, so we allocate one */ 148 void *p = mmap(host_start, qemu_host_page_size, prot, 149 flags | MAP_ANONYMOUS, -1, 0); 150 if (p == MAP_FAILED) 151 return -1; 152 prot1 = prot; 153 } 154 prot1 &= PAGE_BITS; 155 156 prot_new = prot | prot1; 157 if (!(flags & MAP_ANONYMOUS)) { 158 /* msync() won't work here, so we return an error if write is 159 possible while it is a shared mapping */ 160 if ((flags & MAP_TYPE) == MAP_SHARED && 161 (prot & PROT_WRITE)) 162 return -1; 163 164 /* adjust protection to be able to read */ 165 if (!(prot1 & PROT_WRITE)) 166 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE); 167 168 /* read the corresponding file data */ 169 if (pread(fd, g2h(start), end - start, offset) == -1) 170 return -1; 171 172 /* put final protection */ 173 if (prot_new != (prot1 | PROT_WRITE)) 174 mprotect(host_start, qemu_host_page_size, prot_new); 175 } else { 176 if (prot_new != prot1) { 177 mprotect(host_start, qemu_host_page_size, prot_new); 178 } 179 if (prot_new & PROT_WRITE) { 180 memset(g2h(start), 0, end - start); 181 } 182 } 183 return 0; 184 } 185 186 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 187 #ifdef TARGET_AARCH64 188 # define TASK_UNMAPPED_BASE 0x5500000000 189 #else 190 # define TASK_UNMAPPED_BASE (1ul << 38) 191 #endif 192 #else 193 # define TASK_UNMAPPED_BASE 0x40000000 194 #endif 195 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 196 197 unsigned long last_brk; 198 199 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk 200 of guest address space. */ 201 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 202 abi_ulong align) 203 { 204 abi_ulong addr, end_addr, incr = qemu_host_page_size; 205 int prot; 206 bool looped = false; 207 208 if (size > reserved_va) { 209 return (abi_ulong)-1; 210 } 211 212 /* Note that start and size have already been aligned by mmap_find_vma. */ 213 214 end_addr = start + size; 215 if (start > reserved_va - size) { 216 /* Start at the top of the address space. */ 217 end_addr = ((reserved_va - size) & -align) + size; 218 looped = true; 219 } 220 221 /* Search downward from END_ADDR, checking to see if a page is in use. */ 222 addr = end_addr; 223 while (1) { 224 addr -= incr; 225 if (addr > end_addr) { 226 if (looped) { 227 /* Failure. The entire address space has been searched. */ 228 return (abi_ulong)-1; 229 } 230 /* Re-start at the top of the address space. */ 231 addr = end_addr = ((reserved_va - size) & -align) + size; 232 looped = true; 233 } else { 234 prot = page_get_flags(addr); 235 if (prot) { 236 /* Page in use. Restart below this page. */ 237 addr = end_addr = ((addr - size) & -align) + size; 238 } else if (addr && addr + size == end_addr) { 239 /* Success! All pages between ADDR and END_ADDR are free. */ 240 if (start == mmap_next_start) { 241 mmap_next_start = addr; 242 } 243 return addr; 244 } 245 } 246 } 247 } 248 249 /* 250 * Find and reserve a free memory area of size 'size'. The search 251 * starts at 'start'. 252 * It must be called with mmap_lock() held. 253 * Return -1 if error. 254 */ 255 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 256 { 257 void *ptr, *prev; 258 abi_ulong addr; 259 int wrapped, repeat; 260 261 align = MAX(align, qemu_host_page_size); 262 263 /* If 'start' == 0, then a default start address is used. */ 264 if (start == 0) { 265 start = mmap_next_start; 266 } else { 267 start &= qemu_host_page_mask; 268 } 269 start = ROUND_UP(start, align); 270 271 size = HOST_PAGE_ALIGN(size); 272 273 if (reserved_va) { 274 return mmap_find_vma_reserved(start, size, align); 275 } 276 277 addr = start; 278 wrapped = repeat = 0; 279 prev = 0; 280 281 for (;; prev = ptr) { 282 /* 283 * Reserve needed memory area to avoid a race. 284 * It should be discarded using: 285 * - mmap() with MAP_FIXED flag 286 * - mremap() with MREMAP_FIXED flag 287 * - shmat() with SHM_REMAP flag 288 */ 289 ptr = mmap(g2h(addr), size, PROT_NONE, 290 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); 291 292 /* ENOMEM, if host address space has no memory */ 293 if (ptr == MAP_FAILED) { 294 return (abi_ulong)-1; 295 } 296 297 /* Count the number of sequential returns of the same address. 298 This is used to modify the search algorithm below. */ 299 repeat = (ptr == prev ? repeat + 1 : 0); 300 301 if (h2g_valid(ptr + size - 1)) { 302 addr = h2g(ptr); 303 304 if ((addr & (align - 1)) == 0) { 305 /* Success. */ 306 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 307 mmap_next_start = addr + size; 308 } 309 return addr; 310 } 311 312 /* The address is not properly aligned for the target. */ 313 switch (repeat) { 314 case 0: 315 /* Assume the result that the kernel gave us is the 316 first with enough free space, so start again at the 317 next higher target page. */ 318 addr = ROUND_UP(addr, align); 319 break; 320 case 1: 321 /* Sometimes the kernel decides to perform the allocation 322 at the top end of memory instead. */ 323 addr &= -align; 324 break; 325 case 2: 326 /* Start over at low memory. */ 327 addr = 0; 328 break; 329 default: 330 /* Fail. This unaligned block must the last. */ 331 addr = -1; 332 break; 333 } 334 } else { 335 /* Since the result the kernel gave didn't fit, start 336 again at low memory. If any repetition, fail. */ 337 addr = (repeat ? -1 : 0); 338 } 339 340 /* Unmap and try again. */ 341 munmap(ptr, size); 342 343 /* ENOMEM if we checked the whole of the target address space. */ 344 if (addr == (abi_ulong)-1) { 345 return (abi_ulong)-1; 346 } else if (addr == 0) { 347 if (wrapped) { 348 return (abi_ulong)-1; 349 } 350 wrapped = 1; 351 /* Don't actually use 0 when wrapping, instead indicate 352 that we'd truly like an allocation in low memory. */ 353 addr = (mmap_min_addr > TARGET_PAGE_SIZE 354 ? TARGET_PAGE_ALIGN(mmap_min_addr) 355 : TARGET_PAGE_SIZE); 356 } else if (wrapped && addr >= start) { 357 return (abi_ulong)-1; 358 } 359 } 360 } 361 362 /* NOTE: all the constants are the HOST ones */ 363 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, 364 int flags, int fd, abi_ulong offset) 365 { 366 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len; 367 368 mmap_lock(); 369 trace_target_mmap(start, len, prot, flags, fd, offset); 370 371 if (!len) { 372 errno = EINVAL; 373 goto fail; 374 } 375 376 /* Also check for overflows... */ 377 len = TARGET_PAGE_ALIGN(len); 378 if (!len) { 379 errno = ENOMEM; 380 goto fail; 381 } 382 383 if (offset & ~TARGET_PAGE_MASK) { 384 errno = EINVAL; 385 goto fail; 386 } 387 388 real_start = start & qemu_host_page_mask; 389 host_offset = offset & qemu_host_page_mask; 390 391 /* If the user is asking for the kernel to find a location, do that 392 before we truncate the length for mapping files below. */ 393 if (!(flags & MAP_FIXED)) { 394 host_len = len + offset - host_offset; 395 host_len = HOST_PAGE_ALIGN(host_len); 396 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 397 if (start == (abi_ulong)-1) { 398 errno = ENOMEM; 399 goto fail; 400 } 401 } 402 403 /* When mapping files into a memory area larger than the file, accesses 404 to pages beyond the file size will cause a SIGBUS. 405 406 For example, if mmaping a file of 100 bytes on a host with 4K pages 407 emulating a target with 8K pages, the target expects to be able to 408 access the first 8K. But the host will trap us on any access beyond 409 4K. 410 411 When emulating a target with a larger page-size than the hosts, we 412 may need to truncate file maps at EOF and add extra anonymous pages 413 up to the targets page boundary. */ 414 415 if ((qemu_real_host_page_size < qemu_host_page_size) && 416 !(flags & MAP_ANONYMOUS)) { 417 struct stat sb; 418 419 if (fstat (fd, &sb) == -1) 420 goto fail; 421 422 /* Are we trying to create a map beyond EOF?. */ 423 if (offset + len > sb.st_size) { 424 /* If so, truncate the file map at eof aligned with 425 the hosts real pagesize. Additional anonymous maps 426 will be created beyond EOF. */ 427 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 428 } 429 } 430 431 if (!(flags & MAP_FIXED)) { 432 unsigned long host_start; 433 void *p; 434 435 host_len = len + offset - host_offset; 436 host_len = HOST_PAGE_ALIGN(host_len); 437 438 /* Note: we prefer to control the mapping address. It is 439 especially important if qemu_host_page_size > 440 qemu_real_host_page_size */ 441 p = mmap(g2h(start), host_len, prot, 442 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 443 if (p == MAP_FAILED) 444 goto fail; 445 /* update start so that it points to the file position at 'offset' */ 446 host_start = (unsigned long)p; 447 if (!(flags & MAP_ANONYMOUS)) { 448 p = mmap(g2h(start), len, prot, 449 flags | MAP_FIXED, fd, host_offset); 450 if (p == MAP_FAILED) { 451 munmap(g2h(start), host_len); 452 goto fail; 453 } 454 host_start += offset - host_offset; 455 } 456 start = h2g(host_start); 457 } else { 458 if (start & ~TARGET_PAGE_MASK) { 459 errno = EINVAL; 460 goto fail; 461 } 462 end = start + len; 463 real_end = HOST_PAGE_ALIGN(end); 464 465 /* 466 * Test if requested memory area fits target address space 467 * It can fail only on 64-bit host with 32-bit target. 468 * On any other target/host host mmap() handles this error correctly. 469 */ 470 if (end < start || !guest_range_valid(start, len)) { 471 errno = ENOMEM; 472 goto fail; 473 } 474 475 /* worst case: we cannot map the file because the offset is not 476 aligned, so we read it */ 477 if (!(flags & MAP_ANONYMOUS) && 478 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 479 /* msync() won't work here, so we return an error if write is 480 possible while it is a shared mapping */ 481 if ((flags & MAP_TYPE) == MAP_SHARED && 482 (prot & PROT_WRITE)) { 483 errno = EINVAL; 484 goto fail; 485 } 486 retaddr = target_mmap(start, len, prot | PROT_WRITE, 487 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 488 -1, 0); 489 if (retaddr == -1) 490 goto fail; 491 if (pread(fd, g2h(start), len, offset) == -1) 492 goto fail; 493 if (!(prot & PROT_WRITE)) { 494 ret = target_mprotect(start, len, prot); 495 assert(ret == 0); 496 } 497 goto the_end; 498 } 499 500 /* handle the start of the mapping */ 501 if (start > real_start) { 502 if (real_end == real_start + qemu_host_page_size) { 503 /* one single host page */ 504 ret = mmap_frag(real_start, start, end, 505 prot, flags, fd, offset); 506 if (ret == -1) 507 goto fail; 508 goto the_end1; 509 } 510 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size, 511 prot, flags, fd, offset); 512 if (ret == -1) 513 goto fail; 514 real_start += qemu_host_page_size; 515 } 516 /* handle the end of the mapping */ 517 if (end < real_end) { 518 ret = mmap_frag(real_end - qemu_host_page_size, 519 real_end - qemu_host_page_size, end, 520 prot, flags, fd, 521 offset + real_end - qemu_host_page_size - start); 522 if (ret == -1) 523 goto fail; 524 real_end -= qemu_host_page_size; 525 } 526 527 /* map the middle (easier) */ 528 if (real_start < real_end) { 529 void *p; 530 unsigned long offset1; 531 if (flags & MAP_ANONYMOUS) 532 offset1 = 0; 533 else 534 offset1 = offset + real_start - start; 535 p = mmap(g2h(real_start), real_end - real_start, 536 prot, flags, fd, offset1); 537 if (p == MAP_FAILED) 538 goto fail; 539 } 540 } 541 the_end1: 542 page_set_flags(start, start + len, prot | PAGE_VALID); 543 the_end: 544 trace_target_mmap_complete(start); 545 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 546 log_page_dump(__func__); 547 } 548 tb_invalidate_phys_range(start, start + len); 549 mmap_unlock(); 550 return start; 551 fail: 552 mmap_unlock(); 553 return -1; 554 } 555 556 static void mmap_reserve(abi_ulong start, abi_ulong size) 557 { 558 abi_ulong real_start; 559 abi_ulong real_end; 560 abi_ulong addr; 561 abi_ulong end; 562 int prot; 563 564 real_start = start & qemu_host_page_mask; 565 real_end = HOST_PAGE_ALIGN(start + size); 566 end = start + size; 567 if (start > real_start) { 568 /* handle host page containing start */ 569 prot = 0; 570 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 571 prot |= page_get_flags(addr); 572 } 573 if (real_end == real_start + qemu_host_page_size) { 574 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 575 prot |= page_get_flags(addr); 576 } 577 end = real_end; 578 } 579 if (prot != 0) 580 real_start += qemu_host_page_size; 581 } 582 if (end < real_end) { 583 prot = 0; 584 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 585 prot |= page_get_flags(addr); 586 } 587 if (prot != 0) 588 real_end -= qemu_host_page_size; 589 } 590 if (real_start != real_end) { 591 mmap(g2h(real_start), real_end - real_start, PROT_NONE, 592 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, 593 -1, 0); 594 } 595 } 596 597 int target_munmap(abi_ulong start, abi_ulong len) 598 { 599 abi_ulong end, real_start, real_end, addr; 600 int prot, ret; 601 602 trace_target_munmap(start, len); 603 604 if (start & ~TARGET_PAGE_MASK) 605 return -TARGET_EINVAL; 606 len = TARGET_PAGE_ALIGN(len); 607 if (len == 0 || !guest_range_valid(start, len)) { 608 return -TARGET_EINVAL; 609 } 610 611 mmap_lock(); 612 end = start + len; 613 real_start = start & qemu_host_page_mask; 614 real_end = HOST_PAGE_ALIGN(end); 615 616 if (start > real_start) { 617 /* handle host page containing start */ 618 prot = 0; 619 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 620 prot |= page_get_flags(addr); 621 } 622 if (real_end == real_start + qemu_host_page_size) { 623 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 624 prot |= page_get_flags(addr); 625 } 626 end = real_end; 627 } 628 if (prot != 0) 629 real_start += qemu_host_page_size; 630 } 631 if (end < real_end) { 632 prot = 0; 633 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 634 prot |= page_get_flags(addr); 635 } 636 if (prot != 0) 637 real_end -= qemu_host_page_size; 638 } 639 640 ret = 0; 641 /* unmap what we can */ 642 if (real_start < real_end) { 643 if (reserved_va) { 644 mmap_reserve(real_start, real_end - real_start); 645 } else { 646 ret = munmap(g2h(real_start), real_end - real_start); 647 } 648 } 649 650 if (ret == 0) { 651 page_set_flags(start, start + len, 0); 652 tb_invalidate_phys_range(start, start + len); 653 } 654 mmap_unlock(); 655 return ret; 656 } 657 658 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 659 abi_ulong new_size, unsigned long flags, 660 abi_ulong new_addr) 661 { 662 int prot; 663 void *host_addr; 664 665 if (!guest_range_valid(old_addr, old_size) || 666 ((flags & MREMAP_FIXED) && 667 !guest_range_valid(new_addr, new_size))) { 668 errno = ENOMEM; 669 return -1; 670 } 671 672 mmap_lock(); 673 674 if (flags & MREMAP_FIXED) { 675 host_addr = mremap(g2h(old_addr), old_size, new_size, 676 flags, g2h(new_addr)); 677 678 if (reserved_va && host_addr != MAP_FAILED) { 679 /* If new and old addresses overlap then the above mremap will 680 already have failed with EINVAL. */ 681 mmap_reserve(old_addr, old_size); 682 } 683 } else if (flags & MREMAP_MAYMOVE) { 684 abi_ulong mmap_start; 685 686 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 687 688 if (mmap_start == -1) { 689 errno = ENOMEM; 690 host_addr = MAP_FAILED; 691 } else { 692 host_addr = mremap(g2h(old_addr), old_size, new_size, 693 flags | MREMAP_FIXED, g2h(mmap_start)); 694 if (reserved_va) { 695 mmap_reserve(old_addr, old_size); 696 } 697 } 698 } else { 699 int prot = 0; 700 if (reserved_va && old_size < new_size) { 701 abi_ulong addr; 702 for (addr = old_addr + old_size; 703 addr < old_addr + new_size; 704 addr++) { 705 prot |= page_get_flags(addr); 706 } 707 } 708 if (prot == 0) { 709 host_addr = mremap(g2h(old_addr), old_size, new_size, flags); 710 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) { 711 mmap_reserve(old_addr + old_size, old_size - new_size); 712 } 713 } else { 714 errno = ENOMEM; 715 host_addr = MAP_FAILED; 716 } 717 /* Check if address fits target address space */ 718 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) { 719 /* Revert mremap() changes */ 720 host_addr = mremap(g2h(old_addr), new_size, old_size, flags); 721 errno = ENOMEM; 722 host_addr = MAP_FAILED; 723 } 724 } 725 726 if (host_addr == MAP_FAILED) { 727 new_addr = -1; 728 } else { 729 new_addr = h2g(host_addr); 730 prot = page_get_flags(old_addr); 731 page_set_flags(old_addr, old_addr + old_size, 0); 732 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID); 733 } 734 tb_invalidate_phys_range(new_addr, new_addr + new_size); 735 mmap_unlock(); 736 return new_addr; 737 } 738