1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include <sys/shm.h> 21 #include "trace.h" 22 #include "exec/log.h" 23 #include "qemu.h" 24 #include "user-internals.h" 25 #include "user-mmap.h" 26 #include "target_mman.h" 27 #include "qemu/interval-tree.h" 28 29 #ifdef TARGET_ARM 30 #include "target/arm/cpu-features.h" 31 #endif 32 33 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 34 static __thread int mmap_lock_count; 35 36 void mmap_lock(void) 37 { 38 if (mmap_lock_count++ == 0) { 39 pthread_mutex_lock(&mmap_mutex); 40 } 41 } 42 43 void mmap_unlock(void) 44 { 45 assert(mmap_lock_count > 0); 46 if (--mmap_lock_count == 0) { 47 pthread_mutex_unlock(&mmap_mutex); 48 } 49 } 50 51 bool have_mmap_lock(void) 52 { 53 return mmap_lock_count > 0 ? true : false; 54 } 55 56 /* Grab lock to make sure things are in a consistent state after fork(). */ 57 void mmap_fork_start(void) 58 { 59 if (mmap_lock_count) 60 abort(); 61 pthread_mutex_lock(&mmap_mutex); 62 } 63 64 void mmap_fork_end(int child) 65 { 66 if (child) { 67 pthread_mutex_init(&mmap_mutex, NULL); 68 } else { 69 pthread_mutex_unlock(&mmap_mutex); 70 } 71 } 72 73 /* Protected by mmap_lock. */ 74 static IntervalTreeRoot shm_regions; 75 76 static void shm_region_add(abi_ptr start, abi_ptr last) 77 { 78 IntervalTreeNode *i = g_new0(IntervalTreeNode, 1); 79 80 i->start = start; 81 i->last = last; 82 interval_tree_insert(i, &shm_regions); 83 } 84 85 static abi_ptr shm_region_find(abi_ptr start) 86 { 87 IntervalTreeNode *i; 88 89 for (i = interval_tree_iter_first(&shm_regions, start, start); i; 90 i = interval_tree_iter_next(i, start, start)) { 91 if (i->start == start) { 92 return i->last; 93 } 94 } 95 return 0; 96 } 97 98 static void shm_region_rm_complete(abi_ptr start, abi_ptr last) 99 { 100 IntervalTreeNode *i, *n; 101 102 for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) { 103 n = interval_tree_iter_next(i, start, last); 104 if (i->start >= start && i->last <= last) { 105 interval_tree_remove(i, &shm_regions); 106 g_free(i); 107 } 108 } 109 } 110 111 /* 112 * Validate target prot bitmask. 113 * Return the prot bitmask for the host in *HOST_PROT. 114 * Return 0 if the target prot bitmask is invalid, otherwise 115 * the internal qemu page_flags (which will include PAGE_VALID). 116 */ 117 static int validate_prot_to_pageflags(int prot) 118 { 119 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 120 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 121 122 #ifdef TARGET_AARCH64 123 { 124 ARMCPU *cpu = ARM_CPU(thread_cpu); 125 126 /* 127 * The PROT_BTI bit is only accepted if the cpu supports the feature. 128 * Since this is the unusual case, don't bother checking unless 129 * the bit has been requested. If set and valid, record the bit 130 * within QEMU's page_flags. 131 */ 132 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 133 valid |= TARGET_PROT_BTI; 134 page_flags |= PAGE_BTI; 135 } 136 /* Similarly for the PROT_MTE bit. */ 137 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 138 valid |= TARGET_PROT_MTE; 139 page_flags |= PAGE_MTE; 140 } 141 } 142 #elif defined(TARGET_HPPA) 143 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 144 #endif 145 146 return prot & ~valid ? 0 : page_flags; 147 } 148 149 /* 150 * For the host, we need not pass anything except read/write/exec. 151 * While PROT_SEM is allowed by all hosts, it is also ignored, so 152 * don't bother transforming guest bit to host bit. Any other 153 * target-specific prot bits will not be understood by the host 154 * and will need to be encoded into page_flags for qemu emulation. 155 * 156 * Pages that are executable by the guest will never be executed 157 * by the host, but the host will need to be able to read them. 158 */ 159 static int target_to_host_prot(int prot) 160 { 161 return (prot & (PROT_READ | PROT_WRITE)) | 162 (prot & PROT_EXEC ? PROT_READ : 0); 163 } 164 165 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 166 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 167 { 168 int host_page_size = qemu_real_host_page_size(); 169 abi_ulong starts[3]; 170 abi_ulong lens[3]; 171 int prots[3]; 172 abi_ulong host_start, host_last, last; 173 int prot1, ret, page_flags, nranges; 174 175 trace_target_mprotect(start, len, target_prot); 176 177 if ((start & ~TARGET_PAGE_MASK) != 0) { 178 return -TARGET_EINVAL; 179 } 180 page_flags = validate_prot_to_pageflags(target_prot); 181 if (!page_flags) { 182 return -TARGET_EINVAL; 183 } 184 if (len == 0) { 185 return 0; 186 } 187 len = TARGET_PAGE_ALIGN(len); 188 if (!guest_range_valid_untagged(start, len)) { 189 return -TARGET_ENOMEM; 190 } 191 192 last = start + len - 1; 193 host_start = start & -host_page_size; 194 host_last = ROUND_UP(last, host_page_size) - 1; 195 nranges = 0; 196 197 mmap_lock(); 198 199 if (host_last - host_start < host_page_size) { 200 /* Single host page contains all guest pages: sum the prot. */ 201 prot1 = target_prot; 202 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 203 prot1 |= page_get_flags(a); 204 } 205 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 206 prot1 |= page_get_flags(a + 1); 207 } 208 starts[nranges] = host_start; 209 lens[nranges] = host_page_size; 210 prots[nranges] = prot1; 211 nranges++; 212 } else { 213 if (host_start < start) { 214 /* Host page contains more than one guest page: sum the prot. */ 215 prot1 = target_prot; 216 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 217 prot1 |= page_get_flags(a); 218 } 219 /* If the resulting sum differs, create a new range. */ 220 if (prot1 != target_prot) { 221 starts[nranges] = host_start; 222 lens[nranges] = host_page_size; 223 prots[nranges] = prot1; 224 nranges++; 225 host_start += host_page_size; 226 } 227 } 228 229 if (last < host_last) { 230 /* Host page contains more than one guest page: sum the prot. */ 231 prot1 = target_prot; 232 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 233 prot1 |= page_get_flags(a + 1); 234 } 235 /* If the resulting sum differs, create a new range. */ 236 if (prot1 != target_prot) { 237 host_last -= host_page_size; 238 starts[nranges] = host_last + 1; 239 lens[nranges] = host_page_size; 240 prots[nranges] = prot1; 241 nranges++; 242 } 243 } 244 245 /* Create a range for the middle, if any remains. */ 246 if (host_start < host_last) { 247 starts[nranges] = host_start; 248 lens[nranges] = host_last - host_start + 1; 249 prots[nranges] = target_prot; 250 nranges++; 251 } 252 } 253 254 for (int i = 0; i < nranges; ++i) { 255 ret = mprotect(g2h_untagged(starts[i]), lens[i], 256 target_to_host_prot(prots[i])); 257 if (ret != 0) { 258 goto error; 259 } 260 } 261 262 page_set_flags(start, last, page_flags); 263 ret = 0; 264 265 error: 266 mmap_unlock(); 267 return ret; 268 } 269 270 /* 271 * Perform munmap on behalf of the target, with host parameters. 272 * If reserved_va, we must replace the memory reservation. 273 */ 274 static int do_munmap(void *addr, size_t len) 275 { 276 if (reserved_va) { 277 void *ptr = mmap(addr, len, PROT_NONE, 278 MAP_FIXED | MAP_ANONYMOUS 279 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 280 return ptr == addr ? 0 : -1; 281 } 282 return munmap(addr, len); 283 } 284 285 /* 286 * Map an incomplete host page. 287 * 288 * Here be dragons. This case will not work if there is an existing 289 * overlapping host page, which is file mapped, and for which the mapping 290 * is beyond the end of the file. In that case, we will see SIGBUS when 291 * trying to write a portion of this page. 292 * 293 * FIXME: Work around this with a temporary signal handler and longjmp. 294 */ 295 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 296 int prot, int flags, int fd, off_t offset) 297 { 298 int host_page_size = qemu_real_host_page_size(); 299 abi_ulong real_last; 300 void *host_start; 301 int prot_old, prot_new; 302 int host_prot_old, host_prot_new; 303 304 if (!(flags & MAP_ANONYMOUS) 305 && (flags & MAP_TYPE) == MAP_SHARED 306 && (prot & PROT_WRITE)) { 307 /* 308 * msync() won't work with the partial page, so we return an 309 * error if write is possible while it is a shared mapping. 310 */ 311 errno = EINVAL; 312 return false; 313 } 314 315 real_last = real_start + host_page_size - 1; 316 host_start = g2h_untagged(real_start); 317 318 /* Get the protection of the target pages outside the mapping. */ 319 prot_old = 0; 320 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 321 prot_old |= page_get_flags(a); 322 } 323 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 324 prot_old |= page_get_flags(a); 325 } 326 327 if (prot_old == 0) { 328 /* 329 * Since !(prot_old & PAGE_VALID), there were no guest pages 330 * outside of the fragment we need to map. Allocate a new host 331 * page to cover, discarding whatever else may have been present. 332 */ 333 void *p = mmap(host_start, host_page_size, 334 target_to_host_prot(prot), 335 flags | MAP_ANONYMOUS, -1, 0); 336 if (p != host_start) { 337 if (p != MAP_FAILED) { 338 do_munmap(p, host_page_size); 339 errno = EEXIST; 340 } 341 return false; 342 } 343 prot_old = prot; 344 } 345 prot_new = prot | prot_old; 346 347 host_prot_old = target_to_host_prot(prot_old); 348 host_prot_new = target_to_host_prot(prot_new); 349 350 /* Adjust protection to be able to write. */ 351 if (!(host_prot_old & PROT_WRITE)) { 352 host_prot_old |= PROT_WRITE; 353 mprotect(host_start, host_page_size, host_prot_old); 354 } 355 356 /* Read or zero the new guest pages. */ 357 if (flags & MAP_ANONYMOUS) { 358 memset(g2h_untagged(start), 0, last - start + 1); 359 } else { 360 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 361 return false; 362 } 363 } 364 365 /* Put final protection */ 366 if (host_prot_new != host_prot_old) { 367 mprotect(host_start, host_page_size, host_prot_new); 368 } 369 return true; 370 } 371 372 abi_ulong task_unmapped_base; 373 abi_ulong elf_et_dyn_base; 374 abi_ulong mmap_next_start; 375 376 /* 377 * Subroutine of mmap_find_vma, used when we have pre-allocated 378 * a chunk of guest address space. 379 */ 380 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 381 abi_ulong align) 382 { 383 target_ulong ret; 384 385 ret = page_find_range_empty(start, reserved_va, size, align); 386 if (ret == -1 && start > mmap_min_addr) { 387 /* Restart at the beginning of the address space. */ 388 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 389 } 390 391 return ret; 392 } 393 394 /* 395 * Find and reserve a free memory area of size 'size'. The search 396 * starts at 'start'. 397 * It must be called with mmap_lock() held. 398 * Return -1 if error. 399 */ 400 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 401 { 402 int host_page_size = qemu_real_host_page_size(); 403 void *ptr, *prev; 404 abi_ulong addr; 405 int wrapped, repeat; 406 407 align = MAX(align, host_page_size); 408 409 /* If 'start' == 0, then a default start address is used. */ 410 if (start == 0) { 411 start = mmap_next_start; 412 } else { 413 start &= -host_page_size; 414 } 415 start = ROUND_UP(start, align); 416 size = ROUND_UP(size, host_page_size); 417 418 if (reserved_va) { 419 return mmap_find_vma_reserved(start, size, align); 420 } 421 422 addr = start; 423 wrapped = repeat = 0; 424 prev = 0; 425 426 for (;; prev = ptr) { 427 /* 428 * Reserve needed memory area to avoid a race. 429 * It should be discarded using: 430 * - mmap() with MAP_FIXED flag 431 * - mremap() with MREMAP_FIXED flag 432 * - shmat() with SHM_REMAP flag 433 */ 434 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 435 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 436 437 /* ENOMEM, if host address space has no memory */ 438 if (ptr == MAP_FAILED) { 439 return (abi_ulong)-1; 440 } 441 442 /* 443 * Count the number of sequential returns of the same address. 444 * This is used to modify the search algorithm below. 445 */ 446 repeat = (ptr == prev ? repeat + 1 : 0); 447 448 if (h2g_valid(ptr + size - 1)) { 449 addr = h2g(ptr); 450 451 if ((addr & (align - 1)) == 0) { 452 /* Success. */ 453 if (start == mmap_next_start && addr >= task_unmapped_base) { 454 mmap_next_start = addr + size; 455 } 456 return addr; 457 } 458 459 /* The address is not properly aligned for the target. */ 460 switch (repeat) { 461 case 0: 462 /* 463 * Assume the result that the kernel gave us is the 464 * first with enough free space, so start again at the 465 * next higher target page. 466 */ 467 addr = ROUND_UP(addr, align); 468 break; 469 case 1: 470 /* 471 * Sometimes the kernel decides to perform the allocation 472 * at the top end of memory instead. 473 */ 474 addr &= -align; 475 break; 476 case 2: 477 /* Start over at low memory. */ 478 addr = 0; 479 break; 480 default: 481 /* Fail. This unaligned block must the last. */ 482 addr = -1; 483 break; 484 } 485 } else { 486 /* 487 * Since the result the kernel gave didn't fit, start 488 * again at low memory. If any repetition, fail. 489 */ 490 addr = (repeat ? -1 : 0); 491 } 492 493 /* Unmap and try again. */ 494 munmap(ptr, size); 495 496 /* ENOMEM if we checked the whole of the target address space. */ 497 if (addr == (abi_ulong)-1) { 498 return (abi_ulong)-1; 499 } else if (addr == 0) { 500 if (wrapped) { 501 return (abi_ulong)-1; 502 } 503 wrapped = 1; 504 /* 505 * Don't actually use 0 when wrapping, instead indicate 506 * that we'd truly like an allocation in low memory. 507 */ 508 addr = (mmap_min_addr > TARGET_PAGE_SIZE 509 ? TARGET_PAGE_ALIGN(mmap_min_addr) 510 : TARGET_PAGE_SIZE); 511 } else if (wrapped && addr >= start) { 512 return (abi_ulong)-1; 513 } 514 } 515 } 516 517 /* 518 * Record a successful mmap within the user-exec interval tree. 519 */ 520 static abi_long mmap_end(abi_ulong start, abi_ulong last, 521 abi_ulong passthrough_start, 522 abi_ulong passthrough_last, 523 int flags, int page_flags) 524 { 525 if (flags & MAP_ANONYMOUS) { 526 page_flags |= PAGE_ANON; 527 } 528 page_flags |= PAGE_RESET; 529 if (passthrough_start > passthrough_last) { 530 page_set_flags(start, last, page_flags); 531 } else { 532 if (start < passthrough_start) { 533 page_set_flags(start, passthrough_start - 1, page_flags); 534 } 535 page_set_flags(passthrough_start, passthrough_last, 536 page_flags | PAGE_PASSTHROUGH); 537 if (passthrough_last < last) { 538 page_set_flags(passthrough_last + 1, last, page_flags); 539 } 540 } 541 shm_region_rm_complete(start, last); 542 trace_target_mmap_complete(start); 543 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 544 FILE *f = qemu_log_trylock(); 545 if (f) { 546 fprintf(f, "page layout changed following mmap\n"); 547 page_dump(f); 548 qemu_log_unlock(f); 549 } 550 } 551 return start; 552 } 553 554 /* 555 * Special case host page size == target page size, 556 * where there are no edge conditions. 557 */ 558 static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len, 559 int host_prot, int flags, int page_flags, 560 int fd, off_t offset) 561 { 562 void *p, *want_p = g2h_untagged(start); 563 abi_ulong last; 564 565 p = mmap(want_p, len, host_prot, flags, fd, offset); 566 if (p == MAP_FAILED) { 567 return -1; 568 } 569 /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */ 570 if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) { 571 do_munmap(p, len); 572 errno = EEXIST; 573 return -1; 574 } 575 576 start = h2g(p); 577 last = start + len - 1; 578 return mmap_end(start, last, start, last, flags, page_flags); 579 } 580 581 /* 582 * Special case host page size < target page size. 583 * 584 * The two special cases are increased guest alignment, and mapping 585 * past the end of a file. 586 * 587 * When mapping files into a memory area larger than the file, 588 * accesses to pages beyond the file size will cause a SIGBUS. 589 * 590 * For example, if mmaping a file of 100 bytes on a host with 4K 591 * pages emulating a target with 8K pages, the target expects to 592 * be able to access the first 8K. But the host will trap us on 593 * any access beyond 4K. 594 * 595 * When emulating a target with a larger page-size than the hosts, 596 * we may need to truncate file maps at EOF and add extra anonymous 597 * pages up to the targets page boundary. 598 * 599 * This workaround only works for files that do not change. 600 * If the file is later extended (e.g. ftruncate), the SIGBUS 601 * vanishes and the proper behaviour is that changes within the 602 * anon page should be reflected in the file. 603 * 604 * However, this case is rather common with executable images, 605 * so the workaround is important for even trivial tests, whereas 606 * the mmap of of a file being extended is less common. 607 */ 608 static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot, 609 int mmap_flags, int page_flags, int fd, 610 off_t offset, int host_page_size) 611 { 612 void *p, *want_p = g2h_untagged(start); 613 off_t fileend_adj = 0; 614 int flags = mmap_flags; 615 abi_ulong last, pass_last; 616 617 if (!(flags & MAP_ANONYMOUS)) { 618 struct stat sb; 619 620 if (fstat(fd, &sb) == -1) { 621 return -1; 622 } 623 if (offset >= sb.st_size) { 624 /* 625 * The entire map is beyond the end of the file. 626 * Transform it to an anonymous mapping. 627 */ 628 flags |= MAP_ANONYMOUS; 629 fd = -1; 630 offset = 0; 631 } else if (offset + len > sb.st_size) { 632 /* 633 * A portion of the map is beyond the end of the file. 634 * Truncate the file portion of the allocation. 635 */ 636 fileend_adj = offset + len - sb.st_size; 637 } 638 } 639 640 if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { 641 if (fileend_adj) { 642 p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0); 643 } else { 644 p = mmap(want_p, len, host_prot, flags, fd, offset); 645 } 646 if (p != want_p) { 647 if (p != MAP_FAILED) { 648 /* Host does not support MAP_FIXED_NOREPLACE: emulate. */ 649 do_munmap(p, len); 650 errno = EEXIST; 651 } 652 return -1; 653 } 654 655 if (fileend_adj) { 656 void *t = mmap(p, len - fileend_adj, host_prot, 657 (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, 658 fd, offset); 659 660 if (t == MAP_FAILED) { 661 int save_errno = errno; 662 663 /* 664 * We failed a map over the top of the successful anonymous 665 * mapping above. The only failure mode is running out of VMAs, 666 * and there's nothing that we can do to detect that earlier. 667 * If we have replaced an existing mapping with MAP_FIXED, 668 * then we cannot properly recover. It's a coin toss whether 669 * it would be better to exit or continue here. 670 */ 671 if (!(flags & MAP_FIXED_NOREPLACE) && 672 !page_check_range_empty(start, start + len - 1)) { 673 qemu_log("QEMU target_mmap late failure: %s", 674 strerror(save_errno)); 675 } 676 677 do_munmap(want_p, len); 678 errno = save_errno; 679 return -1; 680 } 681 } 682 } else { 683 size_t host_len, part_len; 684 685 /* 686 * Take care to align the host memory. Perform a larger anonymous 687 * allocation and extract the aligned portion. Remap the file on 688 * top of that. 689 */ 690 host_len = len + TARGET_PAGE_SIZE - host_page_size; 691 p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0); 692 if (p == MAP_FAILED) { 693 return -1; 694 } 695 696 part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1); 697 if (part_len) { 698 part_len = TARGET_PAGE_SIZE - part_len; 699 do_munmap(p, part_len); 700 p += part_len; 701 host_len -= part_len; 702 } 703 if (len < host_len) { 704 do_munmap(p + len, host_len - len); 705 } 706 707 if (!(flags & MAP_ANONYMOUS)) { 708 void *t = mmap(p, len - fileend_adj, host_prot, 709 flags | MAP_FIXED, fd, offset); 710 711 if (t == MAP_FAILED) { 712 int save_errno = errno; 713 do_munmap(p, len); 714 errno = save_errno; 715 return -1; 716 } 717 } 718 719 start = h2g(p); 720 } 721 722 last = start + len - 1; 723 if (fileend_adj) { 724 pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1; 725 } else { 726 pass_last = last; 727 } 728 return mmap_end(start, last, start, pass_last, mmap_flags, page_flags); 729 } 730 731 /* 732 * Special case host page size > target page size. 733 * 734 * The two special cases are address and file offsets that are valid 735 * for the guest that cannot be directly represented by the host. 736 */ 737 static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len, 738 int target_prot, int host_prot, 739 int flags, int page_flags, int fd, 740 off_t offset, int host_page_size) 741 { 742 void *p, *want_p = g2h_untagged(start); 743 off_t host_offset = offset & -host_page_size; 744 abi_ulong last, real_start, real_last; 745 bool misaligned_offset = false; 746 size_t host_len; 747 748 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 749 /* 750 * Adjust the offset to something representable on the host. 751 */ 752 host_len = len + offset - host_offset; 753 p = mmap(want_p, host_len, host_prot, flags, fd, host_offset); 754 if (p == MAP_FAILED) { 755 return -1; 756 } 757 758 /* Update start to the file position at offset. */ 759 p += offset - host_offset; 760 761 start = h2g(p); 762 last = start + len - 1; 763 return mmap_end(start, last, start, last, flags, page_flags); 764 } 765 766 if (!(flags & MAP_ANONYMOUS)) { 767 misaligned_offset = (start ^ offset) & (host_page_size - 1); 768 769 /* 770 * The fallback for misalignment is a private mapping + read. 771 * This carries none of semantics required of MAP_SHARED. 772 */ 773 if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) { 774 errno = EINVAL; 775 return -1; 776 } 777 } 778 779 last = start + len - 1; 780 real_start = start & -host_page_size; 781 real_last = ROUND_UP(last, host_page_size) - 1; 782 783 /* 784 * Handle the start and end of the mapping. 785 */ 786 if (real_start < start) { 787 abi_ulong real_page_last = real_start + host_page_size - 1; 788 if (last <= real_page_last) { 789 /* Entire allocation a subset of one host page. */ 790 if (!mmap_frag(real_start, start, last, target_prot, 791 flags, fd, offset)) { 792 return -1; 793 } 794 return mmap_end(start, last, -1, 0, flags, page_flags); 795 } 796 797 if (!mmap_frag(real_start, start, real_page_last, target_prot, 798 flags, fd, offset)) { 799 return -1; 800 } 801 real_start = real_page_last + 1; 802 } 803 804 if (last < real_last) { 805 abi_ulong real_page_start = real_last - host_page_size + 1; 806 if (!mmap_frag(real_page_start, real_page_start, last, 807 target_prot, flags, fd, 808 offset + real_page_start - start)) { 809 return -1; 810 } 811 real_last = real_page_start - 1; 812 } 813 814 if (real_start > real_last) { 815 return mmap_end(start, last, -1, 0, flags, page_flags); 816 } 817 818 /* 819 * Handle the middle of the mapping. 820 */ 821 822 host_len = real_last - real_start + 1; 823 want_p += real_start - start; 824 825 if (flags & MAP_ANONYMOUS) { 826 p = mmap(want_p, host_len, host_prot, flags, -1, 0); 827 } else if (!misaligned_offset) { 828 p = mmap(want_p, host_len, host_prot, flags, fd, 829 offset + real_start - start); 830 } else { 831 p = mmap(want_p, host_len, host_prot | PROT_WRITE, 832 flags | MAP_ANONYMOUS, -1, 0); 833 } 834 if (p != want_p) { 835 if (p != MAP_FAILED) { 836 do_munmap(p, host_len); 837 errno = EEXIST; 838 } 839 return -1; 840 } 841 842 if (misaligned_offset) { 843 /* TODO: The read could be short. */ 844 if (pread(fd, p, host_len, offset + real_start - start) != host_len) { 845 do_munmap(p, host_len); 846 return -1; 847 } 848 if (!(host_prot & PROT_WRITE)) { 849 mprotect(p, host_len, host_prot); 850 } 851 } 852 853 return mmap_end(start, last, -1, 0, flags, page_flags); 854 } 855 856 static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, 857 int target_prot, int flags, int page_flags, 858 int fd, off_t offset) 859 { 860 int host_page_size = qemu_real_host_page_size(); 861 int host_prot; 862 863 /* 864 * For reserved_va, we are in full control of the allocation. 865 * Find a suitable hole and convert to MAP_FIXED. 866 */ 867 if (reserved_va) { 868 if (flags & MAP_FIXED_NOREPLACE) { 869 /* Validate that the chosen range is empty. */ 870 if (!page_check_range_empty(start, start + len - 1)) { 871 errno = EEXIST; 872 return -1; 873 } 874 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 875 } else if (!(flags & MAP_FIXED)) { 876 abi_ulong real_start = start & -host_page_size; 877 off_t host_offset = offset & -host_page_size; 878 size_t real_len = len + offset - host_offset; 879 abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE); 880 881 start = mmap_find_vma(real_start, real_len, align); 882 if (start == (abi_ulong)-1) { 883 errno = ENOMEM; 884 return -1; 885 } 886 start += offset - host_offset; 887 flags |= MAP_FIXED; 888 } 889 } 890 891 host_prot = target_to_host_prot(target_prot); 892 893 if (host_page_size == TARGET_PAGE_SIZE) { 894 return mmap_h_eq_g(start, len, host_prot, flags, 895 page_flags, fd, offset); 896 } else if (host_page_size < TARGET_PAGE_SIZE) { 897 return mmap_h_lt_g(start, len, host_prot, flags, 898 page_flags, fd, offset, host_page_size); 899 } else { 900 return mmap_h_gt_g(start, len, target_prot, host_prot, flags, 901 page_flags, fd, offset, host_page_size); 902 } 903 } 904 905 /* NOTE: all the constants are the HOST ones */ 906 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 907 int flags, int fd, off_t offset) 908 { 909 abi_long ret; 910 int page_flags; 911 912 trace_target_mmap(start, len, target_prot, flags, fd, offset); 913 914 if (!len) { 915 errno = EINVAL; 916 return -1; 917 } 918 919 page_flags = validate_prot_to_pageflags(target_prot); 920 if (!page_flags) { 921 errno = EINVAL; 922 return -1; 923 } 924 925 /* Also check for overflows... */ 926 len = TARGET_PAGE_ALIGN(len); 927 if (!len || len != (size_t)len) { 928 errno = ENOMEM; 929 return -1; 930 } 931 932 if (offset & ~TARGET_PAGE_MASK) { 933 errno = EINVAL; 934 return -1; 935 } 936 if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { 937 if (start & ~TARGET_PAGE_MASK) { 938 errno = EINVAL; 939 return -1; 940 } 941 if (!guest_range_valid_untagged(start, len)) { 942 errno = ENOMEM; 943 return -1; 944 } 945 } 946 947 mmap_lock(); 948 949 ret = target_mmap__locked(start, len, target_prot, flags, 950 page_flags, fd, offset); 951 952 mmap_unlock(); 953 954 /* 955 * If we're mapping shared memory, ensure we generate code for parallel 956 * execution and flush old translations. This will work up to the level 957 * supported by the host -- anything that requires EXCP_ATOMIC will not 958 * be atomic with respect to an external process. 959 */ 960 if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) { 961 CPUState *cpu = thread_cpu; 962 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 963 cpu->tcg_cflags |= CF_PARALLEL; 964 tb_flush(cpu); 965 } 966 } 967 968 return ret; 969 } 970 971 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 972 { 973 int host_page_size = qemu_real_host_page_size(); 974 abi_ulong real_start; 975 abi_ulong real_last; 976 abi_ulong real_len; 977 abi_ulong last; 978 abi_ulong a; 979 void *host_start; 980 int prot; 981 982 last = start + len - 1; 983 real_start = start & -host_page_size; 984 real_last = ROUND_UP(last, host_page_size) - 1; 985 986 /* 987 * If guest pages remain on the first or last host pages, 988 * adjust the deallocation to retain those guest pages. 989 * The single page special case is required for the last page, 990 * lest real_start overflow to zero. 991 */ 992 if (real_last - real_start < host_page_size) { 993 prot = 0; 994 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 995 prot |= page_get_flags(a); 996 } 997 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 998 prot |= page_get_flags(a + 1); 999 } 1000 if (prot != 0) { 1001 return 0; 1002 } 1003 } else { 1004 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 1005 prot |= page_get_flags(a); 1006 } 1007 if (prot != 0) { 1008 real_start += host_page_size; 1009 } 1010 1011 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 1012 prot |= page_get_flags(a + 1); 1013 } 1014 if (prot != 0) { 1015 real_last -= host_page_size; 1016 } 1017 1018 if (real_last < real_start) { 1019 return 0; 1020 } 1021 } 1022 1023 real_len = real_last - real_start + 1; 1024 host_start = g2h_untagged(real_start); 1025 1026 return do_munmap(host_start, real_len); 1027 } 1028 1029 int target_munmap(abi_ulong start, abi_ulong len) 1030 { 1031 int ret; 1032 1033 trace_target_munmap(start, len); 1034 1035 if (start & ~TARGET_PAGE_MASK) { 1036 errno = EINVAL; 1037 return -1; 1038 } 1039 len = TARGET_PAGE_ALIGN(len); 1040 if (len == 0 || !guest_range_valid_untagged(start, len)) { 1041 errno = EINVAL; 1042 return -1; 1043 } 1044 1045 mmap_lock(); 1046 ret = mmap_reserve_or_unmap(start, len); 1047 if (likely(ret == 0)) { 1048 page_set_flags(start, start + len - 1, 0); 1049 shm_region_rm_complete(start, start + len - 1); 1050 } 1051 mmap_unlock(); 1052 1053 return ret; 1054 } 1055 1056 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 1057 abi_ulong new_size, unsigned long flags, 1058 abi_ulong new_addr) 1059 { 1060 int prot; 1061 void *host_addr; 1062 1063 if (!guest_range_valid_untagged(old_addr, old_size) || 1064 ((flags & MREMAP_FIXED) && 1065 !guest_range_valid_untagged(new_addr, new_size)) || 1066 ((flags & MREMAP_MAYMOVE) == 0 && 1067 !guest_range_valid_untagged(old_addr, new_size))) { 1068 errno = ENOMEM; 1069 return -1; 1070 } 1071 1072 mmap_lock(); 1073 1074 if (flags & MREMAP_FIXED) { 1075 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 1076 flags, g2h_untagged(new_addr)); 1077 1078 if (reserved_va && host_addr != MAP_FAILED) { 1079 /* 1080 * If new and old addresses overlap then the above mremap will 1081 * already have failed with EINVAL. 1082 */ 1083 mmap_reserve_or_unmap(old_addr, old_size); 1084 } 1085 } else if (flags & MREMAP_MAYMOVE) { 1086 abi_ulong mmap_start; 1087 1088 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 1089 1090 if (mmap_start == -1) { 1091 errno = ENOMEM; 1092 host_addr = MAP_FAILED; 1093 } else { 1094 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 1095 flags | MREMAP_FIXED, 1096 g2h_untagged(mmap_start)); 1097 if (reserved_va) { 1098 mmap_reserve_or_unmap(old_addr, old_size); 1099 } 1100 } 1101 } else { 1102 int page_flags = 0; 1103 if (reserved_va && old_size < new_size) { 1104 abi_ulong addr; 1105 for (addr = old_addr + old_size; 1106 addr < old_addr + new_size; 1107 addr++) { 1108 page_flags |= page_get_flags(addr); 1109 } 1110 } 1111 if (page_flags == 0) { 1112 host_addr = mremap(g2h_untagged(old_addr), 1113 old_size, new_size, flags); 1114 1115 if (host_addr != MAP_FAILED) { 1116 /* Check if address fits target address space */ 1117 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 1118 /* Revert mremap() changes */ 1119 host_addr = mremap(g2h_untagged(old_addr), 1120 new_size, old_size, flags); 1121 errno = ENOMEM; 1122 host_addr = MAP_FAILED; 1123 } else if (reserved_va && old_size > new_size) { 1124 mmap_reserve_or_unmap(old_addr + old_size, 1125 old_size - new_size); 1126 } 1127 } 1128 } else { 1129 errno = ENOMEM; 1130 host_addr = MAP_FAILED; 1131 } 1132 } 1133 1134 if (host_addr == MAP_FAILED) { 1135 new_addr = -1; 1136 } else { 1137 new_addr = h2g(host_addr); 1138 prot = page_get_flags(old_addr); 1139 page_set_flags(old_addr, old_addr + old_size - 1, 0); 1140 shm_region_rm_complete(old_addr, old_addr + old_size - 1); 1141 page_set_flags(new_addr, new_addr + new_size - 1, 1142 prot | PAGE_VALID | PAGE_RESET); 1143 shm_region_rm_complete(new_addr, new_addr + new_size - 1); 1144 } 1145 mmap_unlock(); 1146 return new_addr; 1147 } 1148 1149 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 1150 { 1151 abi_ulong len; 1152 int ret = 0; 1153 1154 if (start & ~TARGET_PAGE_MASK) { 1155 return -TARGET_EINVAL; 1156 } 1157 if (len_in == 0) { 1158 return 0; 1159 } 1160 len = TARGET_PAGE_ALIGN(len_in); 1161 if (len == 0 || !guest_range_valid_untagged(start, len)) { 1162 return -TARGET_EINVAL; 1163 } 1164 1165 /* Translate for some architectures which have different MADV_xxx values */ 1166 switch (advice) { 1167 case TARGET_MADV_DONTNEED: /* alpha */ 1168 advice = MADV_DONTNEED; 1169 break; 1170 case TARGET_MADV_WIPEONFORK: /* parisc */ 1171 advice = MADV_WIPEONFORK; 1172 break; 1173 case TARGET_MADV_KEEPONFORK: /* parisc */ 1174 advice = MADV_KEEPONFORK; 1175 break; 1176 /* we do not care about the other MADV_xxx values yet */ 1177 } 1178 1179 /* 1180 * Most advice values are hints, so ignoring and returning success is ok. 1181 * 1182 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 1183 * MADV_KEEPONFORK are not hints and need to be emulated. 1184 * 1185 * A straight passthrough for those may not be safe because qemu sometimes 1186 * turns private file-backed mappings into anonymous mappings. 1187 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 1188 * same semantics for the host as for the guest. 1189 * 1190 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 1191 * return failure if not. 1192 * 1193 * MADV_DONTNEED is passed through as well, if possible. 1194 * If passthrough isn't possible, we nevertheless (wrongly!) return 1195 * success, which is broken but some userspace programs fail to work 1196 * otherwise. Completely implementing such emulation is quite complicated 1197 * though. 1198 */ 1199 mmap_lock(); 1200 switch (advice) { 1201 case MADV_WIPEONFORK: 1202 case MADV_KEEPONFORK: 1203 ret = -EINVAL; 1204 /* fall through */ 1205 case MADV_DONTNEED: 1206 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 1207 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 1208 if ((advice == MADV_DONTNEED) && (ret == 0)) { 1209 page_reset_target_data(start, start + len - 1); 1210 } 1211 } 1212 } 1213 mmap_unlock(); 1214 1215 return ret; 1216 } 1217 1218 #ifndef TARGET_FORCE_SHMLBA 1219 /* 1220 * For most architectures, SHMLBA is the same as the page size; 1221 * some architectures have larger values, in which case they should 1222 * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function. 1223 * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA 1224 * and defining its own value for SHMLBA. 1225 * 1226 * The kernel also permits SHMLBA to be set by the architecture to a 1227 * value larger than the page size without setting __ARCH_FORCE_SHMLBA; 1228 * this means that addresses are rounded to the large size if 1229 * SHM_RND is set but addresses not aligned to that size are not rejected 1230 * as long as they are at least page-aligned. Since the only architecture 1231 * which uses this is ia64 this code doesn't provide for that oddity. 1232 */ 1233 static inline abi_ulong target_shmlba(CPUArchState *cpu_env) 1234 { 1235 return TARGET_PAGE_SIZE; 1236 } 1237 #endif 1238 1239 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, 1240 abi_ulong shmaddr, int shmflg) 1241 { 1242 CPUState *cpu = env_cpu(cpu_env); 1243 abi_ulong raddr; 1244 struct shmid_ds shm_info; 1245 int ret; 1246 abi_ulong shmlba; 1247 1248 /* shmat pointers are always untagged */ 1249 1250 /* find out the length of the shared memory segment */ 1251 ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); 1252 if (is_error(ret)) { 1253 /* can't get length, bail out */ 1254 return ret; 1255 } 1256 1257 shmlba = target_shmlba(cpu_env); 1258 1259 if (shmaddr & (shmlba - 1)) { 1260 if (shmflg & SHM_RND) { 1261 shmaddr &= ~(shmlba - 1); 1262 } else { 1263 return -TARGET_EINVAL; 1264 } 1265 } 1266 if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { 1267 return -TARGET_EINVAL; 1268 } 1269 1270 WITH_MMAP_LOCK_GUARD() { 1271 void *host_raddr; 1272 abi_ulong last; 1273 1274 if (shmaddr) { 1275 host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); 1276 } else { 1277 abi_ulong mmap_start; 1278 1279 /* In order to use the host shmat, we need to honor host SHMLBA. */ 1280 mmap_start = mmap_find_vma(0, shm_info.shm_segsz, 1281 MAX(SHMLBA, shmlba)); 1282 1283 if (mmap_start == -1) { 1284 return -TARGET_ENOMEM; 1285 } 1286 host_raddr = shmat(shmid, g2h_untagged(mmap_start), 1287 shmflg | SHM_REMAP); 1288 } 1289 1290 if (host_raddr == (void *)-1) { 1291 return get_errno(-1); 1292 } 1293 raddr = h2g(host_raddr); 1294 last = raddr + shm_info.shm_segsz - 1; 1295 1296 page_set_flags(raddr, last, 1297 PAGE_VALID | PAGE_RESET | PAGE_READ | 1298 (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); 1299 1300 shm_region_rm_complete(raddr, last); 1301 shm_region_add(raddr, last); 1302 } 1303 1304 /* 1305 * We're mapping shared memory, so ensure we generate code for parallel 1306 * execution and flush old translations. This will work up to the level 1307 * supported by the host -- anything that requires EXCP_ATOMIC will not 1308 * be atomic with respect to an external process. 1309 */ 1310 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 1311 cpu->tcg_cflags |= CF_PARALLEL; 1312 tb_flush(cpu); 1313 } 1314 1315 return raddr; 1316 } 1317 1318 abi_long target_shmdt(abi_ulong shmaddr) 1319 { 1320 abi_long rv; 1321 1322 /* shmdt pointers are always untagged */ 1323 1324 WITH_MMAP_LOCK_GUARD() { 1325 abi_ulong last = shm_region_find(shmaddr); 1326 if (last == 0) { 1327 return -TARGET_EINVAL; 1328 } 1329 1330 rv = get_errno(shmdt(g2h_untagged(shmaddr))); 1331 if (rv == 0) { 1332 abi_ulong size = last - shmaddr + 1; 1333 1334 page_set_flags(shmaddr, last, 0); 1335 shm_region_rm_complete(shmaddr, last); 1336 mmap_reserve_or_unmap(shmaddr, size); 1337 } 1338 } 1339 return rv; 1340 } 1341