1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 24 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 25 static __thread int mmap_lock_count; 26 27 void mmap_lock(void) 28 { 29 if (mmap_lock_count++ == 0) { 30 pthread_mutex_lock(&mmap_mutex); 31 } 32 } 33 34 void mmap_unlock(void) 35 { 36 if (--mmap_lock_count == 0) { 37 pthread_mutex_unlock(&mmap_mutex); 38 } 39 } 40 41 bool have_mmap_lock(void) 42 { 43 return mmap_lock_count > 0 ? true : false; 44 } 45 46 /* Grab lock to make sure things are in a consistent state after fork(). */ 47 void mmap_fork_start(void) 48 { 49 if (mmap_lock_count) 50 abort(); 51 pthread_mutex_lock(&mmap_mutex); 52 } 53 54 void mmap_fork_end(int child) 55 { 56 if (child) 57 pthread_mutex_init(&mmap_mutex, NULL); 58 else 59 pthread_mutex_unlock(&mmap_mutex); 60 } 61 62 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 63 int target_mprotect(abi_ulong start, abi_ulong len, int prot) 64 { 65 abi_ulong end, host_start, host_end, addr; 66 int prot1, ret; 67 68 trace_target_mprotect(start, len, prot); 69 70 if ((start & ~TARGET_PAGE_MASK) != 0) 71 return -TARGET_EINVAL; 72 len = TARGET_PAGE_ALIGN(len); 73 end = start + len; 74 if (!guest_range_valid(start, len)) { 75 return -TARGET_ENOMEM; 76 } 77 prot &= PROT_READ | PROT_WRITE | PROT_EXEC; 78 if (len == 0) 79 return 0; 80 81 mmap_lock(); 82 host_start = start & qemu_host_page_mask; 83 host_end = HOST_PAGE_ALIGN(end); 84 if (start > host_start) { 85 /* handle host page containing start */ 86 prot1 = prot; 87 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { 88 prot1 |= page_get_flags(addr); 89 } 90 if (host_end == host_start + qemu_host_page_size) { 91 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 92 prot1 |= page_get_flags(addr); 93 } 94 end = host_end; 95 } 96 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS); 97 if (ret != 0) 98 goto error; 99 host_start += qemu_host_page_size; 100 } 101 if (end < host_end) { 102 prot1 = prot; 103 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { 104 prot1 |= page_get_flags(addr); 105 } 106 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size, 107 prot1 & PAGE_BITS); 108 if (ret != 0) 109 goto error; 110 host_end -= qemu_host_page_size; 111 } 112 113 /* handle the pages in the middle */ 114 if (host_start < host_end) { 115 ret = mprotect(g2h(host_start), host_end - host_start, prot); 116 if (ret != 0) 117 goto error; 118 } 119 page_set_flags(start, start + len, prot | PAGE_VALID); 120 mmap_unlock(); 121 return 0; 122 error: 123 mmap_unlock(); 124 return ret; 125 } 126 127 /* map an incomplete host page */ 128 static int mmap_frag(abi_ulong real_start, 129 abi_ulong start, abi_ulong end, 130 int prot, int flags, int fd, abi_ulong offset) 131 { 132 abi_ulong real_end, addr; 133 void *host_start; 134 int prot1, prot_new; 135 136 real_end = real_start + qemu_host_page_size; 137 host_start = g2h(real_start); 138 139 /* get the protection of the target pages outside the mapping */ 140 prot1 = 0; 141 for(addr = real_start; addr < real_end; addr++) { 142 if (addr < start || addr >= end) 143 prot1 |= page_get_flags(addr); 144 } 145 146 if (prot1 == 0) { 147 /* no page was there, so we allocate one */ 148 void *p = mmap(host_start, qemu_host_page_size, prot, 149 flags | MAP_ANONYMOUS, -1, 0); 150 if (p == MAP_FAILED) 151 return -1; 152 prot1 = prot; 153 } 154 prot1 &= PAGE_BITS; 155 156 prot_new = prot | prot1; 157 if (!(flags & MAP_ANONYMOUS)) { 158 /* msync() won't work here, so we return an error if write is 159 possible while it is a shared mapping */ 160 if ((flags & MAP_TYPE) == MAP_SHARED && 161 (prot & PROT_WRITE)) 162 return -1; 163 164 /* adjust protection to be able to read */ 165 if (!(prot1 & PROT_WRITE)) 166 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE); 167 168 /* read the corresponding file data */ 169 if (pread(fd, g2h(start), end - start, offset) == -1) 170 return -1; 171 172 /* put final protection */ 173 if (prot_new != (prot1 | PROT_WRITE)) 174 mprotect(host_start, qemu_host_page_size, prot_new); 175 } else { 176 if (prot_new != prot1) { 177 mprotect(host_start, qemu_host_page_size, prot_new); 178 } 179 if (prot_new & PROT_WRITE) { 180 memset(g2h(start), 0, end - start); 181 } 182 } 183 return 0; 184 } 185 186 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 187 # define TASK_UNMAPPED_BASE (1ul << 38) 188 #else 189 # define TASK_UNMAPPED_BASE 0x40000000 190 #endif 191 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 192 193 unsigned long last_brk; 194 195 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk 196 of guest address space. */ 197 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 198 abi_ulong align) 199 { 200 abi_ulong addr, end_addr, incr = qemu_host_page_size; 201 int prot; 202 bool looped = false; 203 204 if (size > reserved_va) { 205 return (abi_ulong)-1; 206 } 207 208 /* Note that start and size have already been aligned by mmap_find_vma. */ 209 210 end_addr = start + size; 211 if (start > reserved_va - size) { 212 /* Start at the top of the address space. */ 213 end_addr = ((reserved_va - size) & -align) + size; 214 looped = true; 215 } 216 217 /* Search downward from END_ADDR, checking to see if a page is in use. */ 218 addr = end_addr; 219 while (1) { 220 addr -= incr; 221 if (addr > end_addr) { 222 if (looped) { 223 /* Failure. The entire address space has been searched. */ 224 return (abi_ulong)-1; 225 } 226 /* Re-start at the top of the address space. */ 227 addr = end_addr = ((reserved_va - size) & -align) + size; 228 looped = true; 229 } else { 230 prot = page_get_flags(addr); 231 if (prot) { 232 /* Page in use. Restart below this page. */ 233 addr = end_addr = ((addr - size) & -align) + size; 234 } else if (addr && addr + size == end_addr) { 235 /* Success! All pages between ADDR and END_ADDR are free. */ 236 if (start == mmap_next_start) { 237 mmap_next_start = addr; 238 } 239 return addr; 240 } 241 } 242 } 243 } 244 245 /* 246 * Find and reserve a free memory area of size 'size'. The search 247 * starts at 'start'. 248 * It must be called with mmap_lock() held. 249 * Return -1 if error. 250 */ 251 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 252 { 253 void *ptr, *prev; 254 abi_ulong addr; 255 int wrapped, repeat; 256 257 align = MAX(align, qemu_host_page_size); 258 259 /* If 'start' == 0, then a default start address is used. */ 260 if (start == 0) { 261 start = mmap_next_start; 262 } else { 263 start &= qemu_host_page_mask; 264 } 265 start = ROUND_UP(start, align); 266 267 size = HOST_PAGE_ALIGN(size); 268 269 if (reserved_va) { 270 return mmap_find_vma_reserved(start, size, align); 271 } 272 273 addr = start; 274 wrapped = repeat = 0; 275 prev = 0; 276 277 for (;; prev = ptr) { 278 /* 279 * Reserve needed memory area to avoid a race. 280 * It should be discarded using: 281 * - mmap() with MAP_FIXED flag 282 * - mremap() with MREMAP_FIXED flag 283 * - shmat() with SHM_REMAP flag 284 */ 285 ptr = mmap(g2h(addr), size, PROT_NONE, 286 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); 287 288 /* ENOMEM, if host address space has no memory */ 289 if (ptr == MAP_FAILED) { 290 return (abi_ulong)-1; 291 } 292 293 /* Count the number of sequential returns of the same address. 294 This is used to modify the search algorithm below. */ 295 repeat = (ptr == prev ? repeat + 1 : 0); 296 297 if (h2g_valid(ptr + size - 1)) { 298 addr = h2g(ptr); 299 300 if ((addr & (align - 1)) == 0) { 301 /* Success. */ 302 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 303 mmap_next_start = addr + size; 304 } 305 return addr; 306 } 307 308 /* The address is not properly aligned for the target. */ 309 switch (repeat) { 310 case 0: 311 /* Assume the result that the kernel gave us is the 312 first with enough free space, so start again at the 313 next higher target page. */ 314 addr = ROUND_UP(addr, align); 315 break; 316 case 1: 317 /* Sometimes the kernel decides to perform the allocation 318 at the top end of memory instead. */ 319 addr &= -align; 320 break; 321 case 2: 322 /* Start over at low memory. */ 323 addr = 0; 324 break; 325 default: 326 /* Fail. This unaligned block must the last. */ 327 addr = -1; 328 break; 329 } 330 } else { 331 /* Since the result the kernel gave didn't fit, start 332 again at low memory. If any repetition, fail. */ 333 addr = (repeat ? -1 : 0); 334 } 335 336 /* Unmap and try again. */ 337 munmap(ptr, size); 338 339 /* ENOMEM if we checked the whole of the target address space. */ 340 if (addr == (abi_ulong)-1) { 341 return (abi_ulong)-1; 342 } else if (addr == 0) { 343 if (wrapped) { 344 return (abi_ulong)-1; 345 } 346 wrapped = 1; 347 /* Don't actually use 0 when wrapping, instead indicate 348 that we'd truly like an allocation in low memory. */ 349 addr = (mmap_min_addr > TARGET_PAGE_SIZE 350 ? TARGET_PAGE_ALIGN(mmap_min_addr) 351 : TARGET_PAGE_SIZE); 352 } else if (wrapped && addr >= start) { 353 return (abi_ulong)-1; 354 } 355 } 356 } 357 358 /* NOTE: all the constants are the HOST ones */ 359 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, 360 int flags, int fd, abi_ulong offset) 361 { 362 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len; 363 364 mmap_lock(); 365 trace_target_mmap(start, len, prot, flags, fd, offset); 366 367 if (!len) { 368 errno = EINVAL; 369 goto fail; 370 } 371 372 /* Also check for overflows... */ 373 len = TARGET_PAGE_ALIGN(len); 374 if (!len) { 375 errno = ENOMEM; 376 goto fail; 377 } 378 379 if (offset & ~TARGET_PAGE_MASK) { 380 errno = EINVAL; 381 goto fail; 382 } 383 384 real_start = start & qemu_host_page_mask; 385 host_offset = offset & qemu_host_page_mask; 386 387 /* If the user is asking for the kernel to find a location, do that 388 before we truncate the length for mapping files below. */ 389 if (!(flags & MAP_FIXED)) { 390 host_len = len + offset - host_offset; 391 host_len = HOST_PAGE_ALIGN(host_len); 392 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 393 if (start == (abi_ulong)-1) { 394 errno = ENOMEM; 395 goto fail; 396 } 397 } 398 399 /* When mapping files into a memory area larger than the file, accesses 400 to pages beyond the file size will cause a SIGBUS. 401 402 For example, if mmaping a file of 100 bytes on a host with 4K pages 403 emulating a target with 8K pages, the target expects to be able to 404 access the first 8K. But the host will trap us on any access beyond 405 4K. 406 407 When emulating a target with a larger page-size than the hosts, we 408 may need to truncate file maps at EOF and add extra anonymous pages 409 up to the targets page boundary. */ 410 411 if ((qemu_real_host_page_size < qemu_host_page_size) && 412 !(flags & MAP_ANONYMOUS)) { 413 struct stat sb; 414 415 if (fstat (fd, &sb) == -1) 416 goto fail; 417 418 /* Are we trying to create a map beyond EOF?. */ 419 if (offset + len > sb.st_size) { 420 /* If so, truncate the file map at eof aligned with 421 the hosts real pagesize. Additional anonymous maps 422 will be created beyond EOF. */ 423 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 424 } 425 } 426 427 if (!(flags & MAP_FIXED)) { 428 unsigned long host_start; 429 void *p; 430 431 host_len = len + offset - host_offset; 432 host_len = HOST_PAGE_ALIGN(host_len); 433 434 /* Note: we prefer to control the mapping address. It is 435 especially important if qemu_host_page_size > 436 qemu_real_host_page_size */ 437 p = mmap(g2h(start), host_len, prot, 438 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 439 if (p == MAP_FAILED) 440 goto fail; 441 /* update start so that it points to the file position at 'offset' */ 442 host_start = (unsigned long)p; 443 if (!(flags & MAP_ANONYMOUS)) { 444 p = mmap(g2h(start), len, prot, 445 flags | MAP_FIXED, fd, host_offset); 446 if (p == MAP_FAILED) { 447 munmap(g2h(start), host_len); 448 goto fail; 449 } 450 host_start += offset - host_offset; 451 } 452 start = h2g(host_start); 453 } else { 454 if (start & ~TARGET_PAGE_MASK) { 455 errno = EINVAL; 456 goto fail; 457 } 458 end = start + len; 459 real_end = HOST_PAGE_ALIGN(end); 460 461 /* 462 * Test if requested memory area fits target address space 463 * It can fail only on 64-bit host with 32-bit target. 464 * On any other target/host host mmap() handles this error correctly. 465 */ 466 if (!guest_range_valid(start, len)) { 467 errno = ENOMEM; 468 goto fail; 469 } 470 471 /* worst case: we cannot map the file because the offset is not 472 aligned, so we read it */ 473 if (!(flags & MAP_ANONYMOUS) && 474 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 475 /* msync() won't work here, so we return an error if write is 476 possible while it is a shared mapping */ 477 if ((flags & MAP_TYPE) == MAP_SHARED && 478 (prot & PROT_WRITE)) { 479 errno = EINVAL; 480 goto fail; 481 } 482 retaddr = target_mmap(start, len, prot | PROT_WRITE, 483 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 484 -1, 0); 485 if (retaddr == -1) 486 goto fail; 487 if (pread(fd, g2h(start), len, offset) == -1) 488 goto fail; 489 if (!(prot & PROT_WRITE)) { 490 ret = target_mprotect(start, len, prot); 491 assert(ret == 0); 492 } 493 goto the_end; 494 } 495 496 /* handle the start of the mapping */ 497 if (start > real_start) { 498 if (real_end == real_start + qemu_host_page_size) { 499 /* one single host page */ 500 ret = mmap_frag(real_start, start, end, 501 prot, flags, fd, offset); 502 if (ret == -1) 503 goto fail; 504 goto the_end1; 505 } 506 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size, 507 prot, flags, fd, offset); 508 if (ret == -1) 509 goto fail; 510 real_start += qemu_host_page_size; 511 } 512 /* handle the end of the mapping */ 513 if (end < real_end) { 514 ret = mmap_frag(real_end - qemu_host_page_size, 515 real_end - qemu_host_page_size, end, 516 prot, flags, fd, 517 offset + real_end - qemu_host_page_size - start); 518 if (ret == -1) 519 goto fail; 520 real_end -= qemu_host_page_size; 521 } 522 523 /* map the middle (easier) */ 524 if (real_start < real_end) { 525 void *p; 526 unsigned long offset1; 527 if (flags & MAP_ANONYMOUS) 528 offset1 = 0; 529 else 530 offset1 = offset + real_start - start; 531 p = mmap(g2h(real_start), real_end - real_start, 532 prot, flags, fd, offset1); 533 if (p == MAP_FAILED) 534 goto fail; 535 } 536 } 537 the_end1: 538 page_set_flags(start, start + len, prot | PAGE_VALID); 539 the_end: 540 trace_target_mmap_complete(start); 541 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 542 log_page_dump(__func__); 543 } 544 tb_invalidate_phys_range(start, start + len); 545 mmap_unlock(); 546 return start; 547 fail: 548 mmap_unlock(); 549 return -1; 550 } 551 552 static void mmap_reserve(abi_ulong start, abi_ulong size) 553 { 554 abi_ulong real_start; 555 abi_ulong real_end; 556 abi_ulong addr; 557 abi_ulong end; 558 int prot; 559 560 real_start = start & qemu_host_page_mask; 561 real_end = HOST_PAGE_ALIGN(start + size); 562 end = start + size; 563 if (start > real_start) { 564 /* handle host page containing start */ 565 prot = 0; 566 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 567 prot |= page_get_flags(addr); 568 } 569 if (real_end == real_start + qemu_host_page_size) { 570 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 571 prot |= page_get_flags(addr); 572 } 573 end = real_end; 574 } 575 if (prot != 0) 576 real_start += qemu_host_page_size; 577 } 578 if (end < real_end) { 579 prot = 0; 580 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 581 prot |= page_get_flags(addr); 582 } 583 if (prot != 0) 584 real_end -= qemu_host_page_size; 585 } 586 if (real_start != real_end) { 587 mmap(g2h(real_start), real_end - real_start, PROT_NONE, 588 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, 589 -1, 0); 590 } 591 } 592 593 int target_munmap(abi_ulong start, abi_ulong len) 594 { 595 abi_ulong end, real_start, real_end, addr; 596 int prot, ret; 597 598 trace_target_munmap(start, len); 599 600 if (start & ~TARGET_PAGE_MASK) 601 return -TARGET_EINVAL; 602 len = TARGET_PAGE_ALIGN(len); 603 if (len == 0 || !guest_range_valid(start, len)) { 604 return -TARGET_EINVAL; 605 } 606 607 mmap_lock(); 608 end = start + len; 609 real_start = start & qemu_host_page_mask; 610 real_end = HOST_PAGE_ALIGN(end); 611 612 if (start > real_start) { 613 /* handle host page containing start */ 614 prot = 0; 615 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 616 prot |= page_get_flags(addr); 617 } 618 if (real_end == real_start + qemu_host_page_size) { 619 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 620 prot |= page_get_flags(addr); 621 } 622 end = real_end; 623 } 624 if (prot != 0) 625 real_start += qemu_host_page_size; 626 } 627 if (end < real_end) { 628 prot = 0; 629 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 630 prot |= page_get_flags(addr); 631 } 632 if (prot != 0) 633 real_end -= qemu_host_page_size; 634 } 635 636 ret = 0; 637 /* unmap what we can */ 638 if (real_start < real_end) { 639 if (reserved_va) { 640 mmap_reserve(real_start, real_end - real_start); 641 } else { 642 ret = munmap(g2h(real_start), real_end - real_start); 643 } 644 } 645 646 if (ret == 0) { 647 page_set_flags(start, start + len, 0); 648 tb_invalidate_phys_range(start, start + len); 649 } 650 mmap_unlock(); 651 return ret; 652 } 653 654 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 655 abi_ulong new_size, unsigned long flags, 656 abi_ulong new_addr) 657 { 658 int prot; 659 void *host_addr; 660 661 if (!guest_range_valid(old_addr, old_size) || 662 ((flags & MREMAP_FIXED) && 663 !guest_range_valid(new_addr, new_size))) { 664 errno = ENOMEM; 665 return -1; 666 } 667 668 mmap_lock(); 669 670 if (flags & MREMAP_FIXED) { 671 host_addr = mremap(g2h(old_addr), old_size, new_size, 672 flags, g2h(new_addr)); 673 674 if (reserved_va && host_addr != MAP_FAILED) { 675 /* If new and old addresses overlap then the above mremap will 676 already have failed with EINVAL. */ 677 mmap_reserve(old_addr, old_size); 678 } 679 } else if (flags & MREMAP_MAYMOVE) { 680 abi_ulong mmap_start; 681 682 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 683 684 if (mmap_start == -1) { 685 errno = ENOMEM; 686 host_addr = MAP_FAILED; 687 } else { 688 host_addr = mremap(g2h(old_addr), old_size, new_size, 689 flags | MREMAP_FIXED, g2h(mmap_start)); 690 if (reserved_va) { 691 mmap_reserve(old_addr, old_size); 692 } 693 } 694 } else { 695 int prot = 0; 696 if (reserved_va && old_size < new_size) { 697 abi_ulong addr; 698 for (addr = old_addr + old_size; 699 addr < old_addr + new_size; 700 addr++) { 701 prot |= page_get_flags(addr); 702 } 703 } 704 if (prot == 0) { 705 host_addr = mremap(g2h(old_addr), old_size, new_size, flags); 706 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) { 707 mmap_reserve(old_addr + old_size, new_size - old_size); 708 } 709 } else { 710 errno = ENOMEM; 711 host_addr = MAP_FAILED; 712 } 713 /* Check if address fits target address space */ 714 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) { 715 /* Revert mremap() changes */ 716 host_addr = mremap(g2h(old_addr), new_size, old_size, flags); 717 errno = ENOMEM; 718 host_addr = MAP_FAILED; 719 } 720 } 721 722 if (host_addr == MAP_FAILED) { 723 new_addr = -1; 724 } else { 725 new_addr = h2g(host_addr); 726 prot = page_get_flags(old_addr); 727 page_set_flags(old_addr, old_addr + old_size, 0); 728 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID); 729 } 730 tb_invalidate_phys_range(new_addr, new_addr + new_size); 731 mmap_unlock(); 732 return new_addr; 733 } 734