1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include <sys/shm.h> 21 #include "trace.h" 22 #include "exec/log.h" 23 #include "qemu.h" 24 #include "user-internals.h" 25 #include "user-mmap.h" 26 #include "target_mman.h" 27 #include "qemu/interval-tree.h" 28 29 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 30 static __thread int mmap_lock_count; 31 32 void mmap_lock(void) 33 { 34 if (mmap_lock_count++ == 0) { 35 pthread_mutex_lock(&mmap_mutex); 36 } 37 } 38 39 void mmap_unlock(void) 40 { 41 assert(mmap_lock_count > 0); 42 if (--mmap_lock_count == 0) { 43 pthread_mutex_unlock(&mmap_mutex); 44 } 45 } 46 47 bool have_mmap_lock(void) 48 { 49 return mmap_lock_count > 0 ? true : false; 50 } 51 52 /* Grab lock to make sure things are in a consistent state after fork(). */ 53 void mmap_fork_start(void) 54 { 55 if (mmap_lock_count) 56 abort(); 57 pthread_mutex_lock(&mmap_mutex); 58 } 59 60 void mmap_fork_end(int child) 61 { 62 if (child) { 63 pthread_mutex_init(&mmap_mutex, NULL); 64 } else { 65 pthread_mutex_unlock(&mmap_mutex); 66 } 67 } 68 69 /* Protected by mmap_lock. */ 70 static IntervalTreeRoot shm_regions; 71 72 static void shm_region_add(abi_ptr start, abi_ptr last) 73 { 74 IntervalTreeNode *i = g_new0(IntervalTreeNode, 1); 75 76 i->start = start; 77 i->last = last; 78 interval_tree_insert(i, &shm_regions); 79 } 80 81 static abi_ptr shm_region_find(abi_ptr start) 82 { 83 IntervalTreeNode *i; 84 85 for (i = interval_tree_iter_first(&shm_regions, start, start); i; 86 i = interval_tree_iter_next(i, start, start)) { 87 if (i->start == start) { 88 return i->last; 89 } 90 } 91 return 0; 92 } 93 94 static void shm_region_rm_complete(abi_ptr start, abi_ptr last) 95 { 96 IntervalTreeNode *i, *n; 97 98 for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) { 99 n = interval_tree_iter_next(i, start, last); 100 if (i->start >= start && i->last <= last) { 101 interval_tree_remove(i, &shm_regions); 102 g_free(i); 103 } 104 } 105 } 106 107 /* 108 * Validate target prot bitmask. 109 * Return the prot bitmask for the host in *HOST_PROT. 110 * Return 0 if the target prot bitmask is invalid, otherwise 111 * the internal qemu page_flags (which will include PAGE_VALID). 112 */ 113 static int validate_prot_to_pageflags(int prot) 114 { 115 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 116 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 117 118 #ifdef TARGET_AARCH64 119 { 120 ARMCPU *cpu = ARM_CPU(thread_cpu); 121 122 /* 123 * The PROT_BTI bit is only accepted if the cpu supports the feature. 124 * Since this is the unusual case, don't bother checking unless 125 * the bit has been requested. If set and valid, record the bit 126 * within QEMU's page_flags. 127 */ 128 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 129 valid |= TARGET_PROT_BTI; 130 page_flags |= PAGE_BTI; 131 } 132 /* Similarly for the PROT_MTE bit. */ 133 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 134 valid |= TARGET_PROT_MTE; 135 page_flags |= PAGE_MTE; 136 } 137 } 138 #elif defined(TARGET_HPPA) 139 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 140 #endif 141 142 return prot & ~valid ? 0 : page_flags; 143 } 144 145 /* 146 * For the host, we need not pass anything except read/write/exec. 147 * While PROT_SEM is allowed by all hosts, it is also ignored, so 148 * don't bother transforming guest bit to host bit. Any other 149 * target-specific prot bits will not be understood by the host 150 * and will need to be encoded into page_flags for qemu emulation. 151 * 152 * Pages that are executable by the guest will never be executed 153 * by the host, but the host will need to be able to read them. 154 */ 155 static int target_to_host_prot(int prot) 156 { 157 return (prot & (PROT_READ | PROT_WRITE)) | 158 (prot & PROT_EXEC ? PROT_READ : 0); 159 } 160 161 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 162 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 163 { 164 abi_ulong starts[3]; 165 abi_ulong lens[3]; 166 int prots[3]; 167 abi_ulong host_start, host_last, last; 168 int prot1, ret, page_flags, nranges; 169 170 trace_target_mprotect(start, len, target_prot); 171 172 if ((start & ~TARGET_PAGE_MASK) != 0) { 173 return -TARGET_EINVAL; 174 } 175 page_flags = validate_prot_to_pageflags(target_prot); 176 if (!page_flags) { 177 return -TARGET_EINVAL; 178 } 179 if (len == 0) { 180 return 0; 181 } 182 len = TARGET_PAGE_ALIGN(len); 183 if (!guest_range_valid_untagged(start, len)) { 184 return -TARGET_ENOMEM; 185 } 186 187 last = start + len - 1; 188 host_start = start & qemu_host_page_mask; 189 host_last = HOST_PAGE_ALIGN(last) - 1; 190 nranges = 0; 191 192 mmap_lock(); 193 194 if (host_last - host_start < qemu_host_page_size) { 195 /* Single host page contains all guest pages: sum the prot. */ 196 prot1 = target_prot; 197 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 198 prot1 |= page_get_flags(a); 199 } 200 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 201 prot1 |= page_get_flags(a + 1); 202 } 203 starts[nranges] = host_start; 204 lens[nranges] = qemu_host_page_size; 205 prots[nranges] = prot1; 206 nranges++; 207 } else { 208 if (host_start < start) { 209 /* Host page contains more than one guest page: sum the prot. */ 210 prot1 = target_prot; 211 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 212 prot1 |= page_get_flags(a); 213 } 214 /* If the resulting sum differs, create a new range. */ 215 if (prot1 != target_prot) { 216 starts[nranges] = host_start; 217 lens[nranges] = qemu_host_page_size; 218 prots[nranges] = prot1; 219 nranges++; 220 host_start += qemu_host_page_size; 221 } 222 } 223 224 if (last < host_last) { 225 /* Host page contains more than one guest page: sum the prot. */ 226 prot1 = target_prot; 227 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 228 prot1 |= page_get_flags(a + 1); 229 } 230 /* If the resulting sum differs, create a new range. */ 231 if (prot1 != target_prot) { 232 host_last -= qemu_host_page_size; 233 starts[nranges] = host_last + 1; 234 lens[nranges] = qemu_host_page_size; 235 prots[nranges] = prot1; 236 nranges++; 237 } 238 } 239 240 /* Create a range for the middle, if any remains. */ 241 if (host_start < host_last) { 242 starts[nranges] = host_start; 243 lens[nranges] = host_last - host_start + 1; 244 prots[nranges] = target_prot; 245 nranges++; 246 } 247 } 248 249 for (int i = 0; i < nranges; ++i) { 250 ret = mprotect(g2h_untagged(starts[i]), lens[i], 251 target_to_host_prot(prots[i])); 252 if (ret != 0) { 253 goto error; 254 } 255 } 256 257 page_set_flags(start, last, page_flags); 258 ret = 0; 259 260 error: 261 mmap_unlock(); 262 return ret; 263 } 264 265 /* map an incomplete host page */ 266 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 267 int prot, int flags, int fd, off_t offset) 268 { 269 abi_ulong real_last; 270 void *host_start; 271 int prot_old, prot_new; 272 int host_prot_old, host_prot_new; 273 274 if (!(flags & MAP_ANONYMOUS) 275 && (flags & MAP_TYPE) == MAP_SHARED 276 && (prot & PROT_WRITE)) { 277 /* 278 * msync() won't work with the partial page, so we return an 279 * error if write is possible while it is a shared mapping. 280 */ 281 errno = EINVAL; 282 return false; 283 } 284 285 real_last = real_start + qemu_host_page_size - 1; 286 host_start = g2h_untagged(real_start); 287 288 /* Get the protection of the target pages outside the mapping. */ 289 prot_old = 0; 290 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 291 prot_old |= page_get_flags(a); 292 } 293 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 294 prot_old |= page_get_flags(a); 295 } 296 297 if (prot_old == 0) { 298 /* 299 * Since !(prot_old & PAGE_VALID), there were no guest pages 300 * outside of the fragment we need to map. Allocate a new host 301 * page to cover, discarding whatever else may have been present. 302 */ 303 void *p = mmap(host_start, qemu_host_page_size, 304 target_to_host_prot(prot), 305 flags | MAP_ANONYMOUS, -1, 0); 306 if (p != host_start) { 307 if (p != MAP_FAILED) { 308 munmap(p, qemu_host_page_size); 309 errno = EEXIST; 310 } 311 return false; 312 } 313 prot_old = prot; 314 } 315 prot_new = prot | prot_old; 316 317 host_prot_old = target_to_host_prot(prot_old); 318 host_prot_new = target_to_host_prot(prot_new); 319 320 /* Adjust protection to be able to write. */ 321 if (!(host_prot_old & PROT_WRITE)) { 322 host_prot_old |= PROT_WRITE; 323 mprotect(host_start, qemu_host_page_size, host_prot_old); 324 } 325 326 /* Read or zero the new guest pages. */ 327 if (flags & MAP_ANONYMOUS) { 328 memset(g2h_untagged(start), 0, last - start + 1); 329 } else { 330 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 331 return false; 332 } 333 } 334 335 /* Put final protection */ 336 if (host_prot_new != host_prot_old) { 337 mprotect(host_start, qemu_host_page_size, host_prot_new); 338 } 339 return true; 340 } 341 342 abi_ulong task_unmapped_base; 343 abi_ulong elf_et_dyn_base; 344 abi_ulong mmap_next_start; 345 346 /* 347 * Subroutine of mmap_find_vma, used when we have pre-allocated 348 * a chunk of guest address space. 349 */ 350 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 351 abi_ulong align) 352 { 353 target_ulong ret; 354 355 ret = page_find_range_empty(start, reserved_va, size, align); 356 if (ret == -1 && start > mmap_min_addr) { 357 /* Restart at the beginning of the address space. */ 358 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 359 } 360 361 return ret; 362 } 363 364 /* 365 * Find and reserve a free memory area of size 'size'. The search 366 * starts at 'start'. 367 * It must be called with mmap_lock() held. 368 * Return -1 if error. 369 */ 370 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 371 { 372 void *ptr, *prev; 373 abi_ulong addr; 374 int wrapped, repeat; 375 376 align = MAX(align, qemu_host_page_size); 377 378 /* If 'start' == 0, then a default start address is used. */ 379 if (start == 0) { 380 start = mmap_next_start; 381 } else { 382 start &= qemu_host_page_mask; 383 } 384 start = ROUND_UP(start, align); 385 386 size = HOST_PAGE_ALIGN(size); 387 388 if (reserved_va) { 389 return mmap_find_vma_reserved(start, size, align); 390 } 391 392 addr = start; 393 wrapped = repeat = 0; 394 prev = 0; 395 396 for (;; prev = ptr) { 397 /* 398 * Reserve needed memory area to avoid a race. 399 * It should be discarded using: 400 * - mmap() with MAP_FIXED flag 401 * - mremap() with MREMAP_FIXED flag 402 * - shmat() with SHM_REMAP flag 403 */ 404 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 405 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 406 407 /* ENOMEM, if host address space has no memory */ 408 if (ptr == MAP_FAILED) { 409 return (abi_ulong)-1; 410 } 411 412 /* 413 * Count the number of sequential returns of the same address. 414 * This is used to modify the search algorithm below. 415 */ 416 repeat = (ptr == prev ? repeat + 1 : 0); 417 418 if (h2g_valid(ptr + size - 1)) { 419 addr = h2g(ptr); 420 421 if ((addr & (align - 1)) == 0) { 422 /* Success. */ 423 if (start == mmap_next_start && addr >= task_unmapped_base) { 424 mmap_next_start = addr + size; 425 } 426 return addr; 427 } 428 429 /* The address is not properly aligned for the target. */ 430 switch (repeat) { 431 case 0: 432 /* 433 * Assume the result that the kernel gave us is the 434 * first with enough free space, so start again at the 435 * next higher target page. 436 */ 437 addr = ROUND_UP(addr, align); 438 break; 439 case 1: 440 /* 441 * Sometimes the kernel decides to perform the allocation 442 * at the top end of memory instead. 443 */ 444 addr &= -align; 445 break; 446 case 2: 447 /* Start over at low memory. */ 448 addr = 0; 449 break; 450 default: 451 /* Fail. This unaligned block must the last. */ 452 addr = -1; 453 break; 454 } 455 } else { 456 /* 457 * Since the result the kernel gave didn't fit, start 458 * again at low memory. If any repetition, fail. 459 */ 460 addr = (repeat ? -1 : 0); 461 } 462 463 /* Unmap and try again. */ 464 munmap(ptr, size); 465 466 /* ENOMEM if we checked the whole of the target address space. */ 467 if (addr == (abi_ulong)-1) { 468 return (abi_ulong)-1; 469 } else if (addr == 0) { 470 if (wrapped) { 471 return (abi_ulong)-1; 472 } 473 wrapped = 1; 474 /* 475 * Don't actually use 0 when wrapping, instead indicate 476 * that we'd truly like an allocation in low memory. 477 */ 478 addr = (mmap_min_addr > TARGET_PAGE_SIZE 479 ? TARGET_PAGE_ALIGN(mmap_min_addr) 480 : TARGET_PAGE_SIZE); 481 } else if (wrapped && addr >= start) { 482 return (abi_ulong)-1; 483 } 484 } 485 } 486 487 /* NOTE: all the constants are the HOST ones */ 488 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 489 int flags, int fd, off_t offset) 490 { 491 abi_ulong ret, last, real_start, real_last, retaddr, host_len; 492 abi_ulong passthrough_start = -1, passthrough_last = 0; 493 int page_flags; 494 off_t host_offset; 495 496 mmap_lock(); 497 trace_target_mmap(start, len, target_prot, flags, fd, offset); 498 499 if (!len) { 500 errno = EINVAL; 501 goto fail; 502 } 503 504 page_flags = validate_prot_to_pageflags(target_prot); 505 if (!page_flags) { 506 errno = EINVAL; 507 goto fail; 508 } 509 510 /* Also check for overflows... */ 511 len = TARGET_PAGE_ALIGN(len); 512 if (!len) { 513 errno = ENOMEM; 514 goto fail; 515 } 516 517 if (offset & ~TARGET_PAGE_MASK) { 518 errno = EINVAL; 519 goto fail; 520 } 521 522 /* 523 * If we're mapping shared memory, ensure we generate code for parallel 524 * execution and flush old translations. This will work up to the level 525 * supported by the host -- anything that requires EXCP_ATOMIC will not 526 * be atomic with respect to an external process. 527 */ 528 if (flags & MAP_SHARED) { 529 CPUState *cpu = thread_cpu; 530 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 531 cpu->tcg_cflags |= CF_PARALLEL; 532 tb_flush(cpu); 533 } 534 } 535 536 real_start = start & qemu_host_page_mask; 537 host_offset = offset & qemu_host_page_mask; 538 539 /* 540 * If the user is asking for the kernel to find a location, do that 541 * before we truncate the length for mapping files below. 542 */ 543 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 544 host_len = len + offset - host_offset; 545 host_len = HOST_PAGE_ALIGN(host_len); 546 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 547 if (start == (abi_ulong)-1) { 548 errno = ENOMEM; 549 goto fail; 550 } 551 } 552 553 /* 554 * When mapping files into a memory area larger than the file, accesses 555 * to pages beyond the file size will cause a SIGBUS. 556 * 557 * For example, if mmaping a file of 100 bytes on a host with 4K pages 558 * emulating a target with 8K pages, the target expects to be able to 559 * access the first 8K. But the host will trap us on any access beyond 560 * 4K. 561 * 562 * When emulating a target with a larger page-size than the hosts, we 563 * may need to truncate file maps at EOF and add extra anonymous pages 564 * up to the targets page boundary. 565 */ 566 if ((qemu_real_host_page_size() < qemu_host_page_size) && 567 !(flags & MAP_ANONYMOUS)) { 568 struct stat sb; 569 570 if (fstat(fd, &sb) == -1) { 571 goto fail; 572 } 573 574 /* Are we trying to create a map beyond EOF?. */ 575 if (offset + len > sb.st_size) { 576 /* 577 * If so, truncate the file map at eof aligned with 578 * the hosts real pagesize. Additional anonymous maps 579 * will be created beyond EOF. 580 */ 581 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 582 } 583 } 584 585 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 586 uintptr_t host_start; 587 int host_prot; 588 void *p; 589 590 host_len = len + offset - host_offset; 591 host_len = HOST_PAGE_ALIGN(host_len); 592 host_prot = target_to_host_prot(target_prot); 593 594 /* 595 * Note: we prefer to control the mapping address. It is 596 * especially important if qemu_host_page_size > 597 * qemu_real_host_page_size. 598 */ 599 p = mmap(g2h_untagged(start), host_len, host_prot, 600 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 601 if (p == MAP_FAILED) { 602 goto fail; 603 } 604 /* update start so that it points to the file position at 'offset' */ 605 host_start = (uintptr_t)p; 606 if (!(flags & MAP_ANONYMOUS)) { 607 p = mmap(g2h_untagged(start), len, host_prot, 608 flags | MAP_FIXED, fd, host_offset); 609 if (p == MAP_FAILED) { 610 munmap(g2h_untagged(start), host_len); 611 goto fail; 612 } 613 host_start += offset - host_offset; 614 } 615 start = h2g(host_start); 616 last = start + len - 1; 617 passthrough_start = start; 618 passthrough_last = last; 619 } else { 620 if (start & ~TARGET_PAGE_MASK) { 621 errno = EINVAL; 622 goto fail; 623 } 624 last = start + len - 1; 625 real_last = HOST_PAGE_ALIGN(last) - 1; 626 627 /* 628 * Test if requested memory area fits target address space 629 * It can fail only on 64-bit host with 32-bit target. 630 * On any other target/host host mmap() handles this error correctly. 631 */ 632 if (last < start || !guest_range_valid_untagged(start, len)) { 633 errno = ENOMEM; 634 goto fail; 635 } 636 637 if (flags & MAP_FIXED_NOREPLACE) { 638 /* Validate that the chosen range is empty. */ 639 if (!page_check_range_empty(start, last)) { 640 errno = EEXIST; 641 goto fail; 642 } 643 644 /* 645 * With reserved_va, the entire address space is mmaped in the 646 * host to ensure it isn't accidentally used for something else. 647 * We have just checked that the guest address is not mapped 648 * within the guest, but need to replace the host reservation. 649 * 650 * Without reserved_va, despite the guest address check above, 651 * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite 652 * any host address mappings. 653 */ 654 if (reserved_va) { 655 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 656 } 657 } 658 659 /* 660 * worst case: we cannot map the file because the offset is not 661 * aligned, so we read it 662 */ 663 if (!(flags & MAP_ANONYMOUS) && 664 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 665 /* 666 * msync() won't work here, so we return an error if write is 667 * possible while it is a shared mapping 668 */ 669 if ((flags & MAP_TYPE) == MAP_SHARED 670 && (target_prot & PROT_WRITE)) { 671 errno = EINVAL; 672 goto fail; 673 } 674 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 675 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 676 | MAP_PRIVATE | MAP_ANONYMOUS, 677 -1, 0); 678 if (retaddr == -1) { 679 goto fail; 680 } 681 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 682 goto fail; 683 } 684 if (!(target_prot & PROT_WRITE)) { 685 ret = target_mprotect(start, len, target_prot); 686 assert(ret == 0); 687 } 688 goto the_end; 689 } 690 691 /* handle the start of the mapping */ 692 if (start > real_start) { 693 if (real_last == real_start + qemu_host_page_size - 1) { 694 /* one single host page */ 695 if (!mmap_frag(real_start, start, last, 696 target_prot, flags, fd, offset)) { 697 goto fail; 698 } 699 goto the_end1; 700 } 701 if (!mmap_frag(real_start, start, 702 real_start + qemu_host_page_size - 1, 703 target_prot, flags, fd, offset)) { 704 goto fail; 705 } 706 real_start += qemu_host_page_size; 707 } 708 /* handle the end of the mapping */ 709 if (last < real_last) { 710 abi_ulong real_page = real_last - qemu_host_page_size + 1; 711 if (!mmap_frag(real_page, real_page, last, 712 target_prot, flags, fd, 713 offset + real_page - start)) { 714 goto fail; 715 } 716 real_last -= qemu_host_page_size; 717 } 718 719 /* map the middle (easier) */ 720 if (real_start < real_last) { 721 void *p, *want_p; 722 off_t offset1; 723 size_t len1; 724 725 if (flags & MAP_ANONYMOUS) { 726 offset1 = 0; 727 } else { 728 offset1 = offset + real_start - start; 729 } 730 len1 = real_last - real_start + 1; 731 want_p = g2h_untagged(real_start); 732 733 p = mmap(want_p, len1, target_to_host_prot(target_prot), 734 flags, fd, offset1); 735 if (p != want_p) { 736 if (p != MAP_FAILED) { 737 munmap(p, len1); 738 errno = EEXIST; 739 } 740 goto fail; 741 } 742 passthrough_start = real_start; 743 passthrough_last = real_last; 744 } 745 } 746 the_end1: 747 if (flags & MAP_ANONYMOUS) { 748 page_flags |= PAGE_ANON; 749 } 750 page_flags |= PAGE_RESET; 751 if (passthrough_start > passthrough_last) { 752 page_set_flags(start, last, page_flags); 753 } else { 754 if (start < passthrough_start) { 755 page_set_flags(start, passthrough_start - 1, page_flags); 756 } 757 page_set_flags(passthrough_start, passthrough_last, 758 page_flags | PAGE_PASSTHROUGH); 759 if (passthrough_last < last) { 760 page_set_flags(passthrough_last + 1, last, page_flags); 761 } 762 } 763 shm_region_rm_complete(start, last); 764 the_end: 765 trace_target_mmap_complete(start); 766 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 767 FILE *f = qemu_log_trylock(); 768 if (f) { 769 fprintf(f, "page layout changed following mmap\n"); 770 page_dump(f); 771 qemu_log_unlock(f); 772 } 773 } 774 mmap_unlock(); 775 return start; 776 fail: 777 mmap_unlock(); 778 return -1; 779 } 780 781 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 782 { 783 abi_ulong real_start; 784 abi_ulong real_last; 785 abi_ulong real_len; 786 abi_ulong last; 787 abi_ulong a; 788 void *host_start; 789 int prot; 790 791 last = start + len - 1; 792 real_start = start & qemu_host_page_mask; 793 real_last = HOST_PAGE_ALIGN(last) - 1; 794 795 /* 796 * If guest pages remain on the first or last host pages, 797 * adjust the deallocation to retain those guest pages. 798 * The single page special case is required for the last page, 799 * lest real_start overflow to zero. 800 */ 801 if (real_last - real_start < qemu_host_page_size) { 802 prot = 0; 803 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 804 prot |= page_get_flags(a); 805 } 806 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 807 prot |= page_get_flags(a + 1); 808 } 809 if (prot != 0) { 810 return 0; 811 } 812 } else { 813 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 814 prot |= page_get_flags(a); 815 } 816 if (prot != 0) { 817 real_start += qemu_host_page_size; 818 } 819 820 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 821 prot |= page_get_flags(a + 1); 822 } 823 if (prot != 0) { 824 real_last -= qemu_host_page_size; 825 } 826 827 if (real_last < real_start) { 828 return 0; 829 } 830 } 831 832 real_len = real_last - real_start + 1; 833 host_start = g2h_untagged(real_start); 834 835 if (reserved_va) { 836 void *ptr = mmap(host_start, real_len, PROT_NONE, 837 MAP_FIXED | MAP_ANONYMOUS 838 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 839 return ptr == host_start ? 0 : -1; 840 } 841 return munmap(host_start, real_len); 842 } 843 844 int target_munmap(abi_ulong start, abi_ulong len) 845 { 846 int ret; 847 848 trace_target_munmap(start, len); 849 850 if (start & ~TARGET_PAGE_MASK) { 851 errno = EINVAL; 852 return -1; 853 } 854 len = TARGET_PAGE_ALIGN(len); 855 if (len == 0 || !guest_range_valid_untagged(start, len)) { 856 errno = EINVAL; 857 return -1; 858 } 859 860 mmap_lock(); 861 ret = mmap_reserve_or_unmap(start, len); 862 if (likely(ret == 0)) { 863 page_set_flags(start, start + len - 1, 0); 864 shm_region_rm_complete(start, start + len - 1); 865 } 866 mmap_unlock(); 867 868 return ret; 869 } 870 871 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 872 abi_ulong new_size, unsigned long flags, 873 abi_ulong new_addr) 874 { 875 int prot; 876 void *host_addr; 877 878 if (!guest_range_valid_untagged(old_addr, old_size) || 879 ((flags & MREMAP_FIXED) && 880 !guest_range_valid_untagged(new_addr, new_size)) || 881 ((flags & MREMAP_MAYMOVE) == 0 && 882 !guest_range_valid_untagged(old_addr, new_size))) { 883 errno = ENOMEM; 884 return -1; 885 } 886 887 mmap_lock(); 888 889 if (flags & MREMAP_FIXED) { 890 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 891 flags, g2h_untagged(new_addr)); 892 893 if (reserved_va && host_addr != MAP_FAILED) { 894 /* 895 * If new and old addresses overlap then the above mremap will 896 * already have failed with EINVAL. 897 */ 898 mmap_reserve_or_unmap(old_addr, old_size); 899 } 900 } else if (flags & MREMAP_MAYMOVE) { 901 abi_ulong mmap_start; 902 903 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 904 905 if (mmap_start == -1) { 906 errno = ENOMEM; 907 host_addr = MAP_FAILED; 908 } else { 909 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 910 flags | MREMAP_FIXED, 911 g2h_untagged(mmap_start)); 912 if (reserved_va) { 913 mmap_reserve_or_unmap(old_addr, old_size); 914 } 915 } 916 } else { 917 int page_flags = 0; 918 if (reserved_va && old_size < new_size) { 919 abi_ulong addr; 920 for (addr = old_addr + old_size; 921 addr < old_addr + new_size; 922 addr++) { 923 page_flags |= page_get_flags(addr); 924 } 925 } 926 if (page_flags == 0) { 927 host_addr = mremap(g2h_untagged(old_addr), 928 old_size, new_size, flags); 929 930 if (host_addr != MAP_FAILED) { 931 /* Check if address fits target address space */ 932 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 933 /* Revert mremap() changes */ 934 host_addr = mremap(g2h_untagged(old_addr), 935 new_size, old_size, flags); 936 errno = ENOMEM; 937 host_addr = MAP_FAILED; 938 } else if (reserved_va && old_size > new_size) { 939 mmap_reserve_or_unmap(old_addr + old_size, 940 old_size - new_size); 941 } 942 } 943 } else { 944 errno = ENOMEM; 945 host_addr = MAP_FAILED; 946 } 947 } 948 949 if (host_addr == MAP_FAILED) { 950 new_addr = -1; 951 } else { 952 new_addr = h2g(host_addr); 953 prot = page_get_flags(old_addr); 954 page_set_flags(old_addr, old_addr + old_size - 1, 0); 955 shm_region_rm_complete(old_addr, old_addr + old_size - 1); 956 page_set_flags(new_addr, new_addr + new_size - 1, 957 prot | PAGE_VALID | PAGE_RESET); 958 shm_region_rm_complete(new_addr, new_addr + new_size - 1); 959 } 960 mmap_unlock(); 961 return new_addr; 962 } 963 964 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 965 { 966 abi_ulong len; 967 int ret = 0; 968 969 if (start & ~TARGET_PAGE_MASK) { 970 return -TARGET_EINVAL; 971 } 972 if (len_in == 0) { 973 return 0; 974 } 975 len = TARGET_PAGE_ALIGN(len_in); 976 if (len == 0 || !guest_range_valid_untagged(start, len)) { 977 return -TARGET_EINVAL; 978 } 979 980 /* Translate for some architectures which have different MADV_xxx values */ 981 switch (advice) { 982 case TARGET_MADV_DONTNEED: /* alpha */ 983 advice = MADV_DONTNEED; 984 break; 985 case TARGET_MADV_WIPEONFORK: /* parisc */ 986 advice = MADV_WIPEONFORK; 987 break; 988 case TARGET_MADV_KEEPONFORK: /* parisc */ 989 advice = MADV_KEEPONFORK; 990 break; 991 /* we do not care about the other MADV_xxx values yet */ 992 } 993 994 /* 995 * Most advice values are hints, so ignoring and returning success is ok. 996 * 997 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 998 * MADV_KEEPONFORK are not hints and need to be emulated. 999 * 1000 * A straight passthrough for those may not be safe because qemu sometimes 1001 * turns private file-backed mappings into anonymous mappings. 1002 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 1003 * same semantics for the host as for the guest. 1004 * 1005 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 1006 * return failure if not. 1007 * 1008 * MADV_DONTNEED is passed through as well, if possible. 1009 * If passthrough isn't possible, we nevertheless (wrongly!) return 1010 * success, which is broken but some userspace programs fail to work 1011 * otherwise. Completely implementing such emulation is quite complicated 1012 * though. 1013 */ 1014 mmap_lock(); 1015 switch (advice) { 1016 case MADV_WIPEONFORK: 1017 case MADV_KEEPONFORK: 1018 ret = -EINVAL; 1019 /* fall through */ 1020 case MADV_DONTNEED: 1021 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 1022 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 1023 if ((advice == MADV_DONTNEED) && (ret == 0)) { 1024 page_reset_target_data(start, start + len - 1); 1025 } 1026 } 1027 } 1028 mmap_unlock(); 1029 1030 return ret; 1031 } 1032 1033 #ifndef TARGET_FORCE_SHMLBA 1034 /* 1035 * For most architectures, SHMLBA is the same as the page size; 1036 * some architectures have larger values, in which case they should 1037 * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function. 1038 * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA 1039 * and defining its own value for SHMLBA. 1040 * 1041 * The kernel also permits SHMLBA to be set by the architecture to a 1042 * value larger than the page size without setting __ARCH_FORCE_SHMLBA; 1043 * this means that addresses are rounded to the large size if 1044 * SHM_RND is set but addresses not aligned to that size are not rejected 1045 * as long as they are at least page-aligned. Since the only architecture 1046 * which uses this is ia64 this code doesn't provide for that oddity. 1047 */ 1048 static inline abi_ulong target_shmlba(CPUArchState *cpu_env) 1049 { 1050 return TARGET_PAGE_SIZE; 1051 } 1052 #endif 1053 1054 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, 1055 abi_ulong shmaddr, int shmflg) 1056 { 1057 CPUState *cpu = env_cpu(cpu_env); 1058 abi_ulong raddr; 1059 struct shmid_ds shm_info; 1060 int ret; 1061 abi_ulong shmlba; 1062 1063 /* shmat pointers are always untagged */ 1064 1065 /* find out the length of the shared memory segment */ 1066 ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); 1067 if (is_error(ret)) { 1068 /* can't get length, bail out */ 1069 return ret; 1070 } 1071 1072 shmlba = target_shmlba(cpu_env); 1073 1074 if (shmaddr & (shmlba - 1)) { 1075 if (shmflg & SHM_RND) { 1076 shmaddr &= ~(shmlba - 1); 1077 } else { 1078 return -TARGET_EINVAL; 1079 } 1080 } 1081 if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { 1082 return -TARGET_EINVAL; 1083 } 1084 1085 WITH_MMAP_LOCK_GUARD() { 1086 void *host_raddr; 1087 abi_ulong last; 1088 1089 if (shmaddr) { 1090 host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); 1091 } else { 1092 abi_ulong mmap_start; 1093 1094 /* In order to use the host shmat, we need to honor host SHMLBA. */ 1095 mmap_start = mmap_find_vma(0, shm_info.shm_segsz, 1096 MAX(SHMLBA, shmlba)); 1097 1098 if (mmap_start == -1) { 1099 return -TARGET_ENOMEM; 1100 } 1101 host_raddr = shmat(shmid, g2h_untagged(mmap_start), 1102 shmflg | SHM_REMAP); 1103 } 1104 1105 if (host_raddr == (void *)-1) { 1106 return get_errno(-1); 1107 } 1108 raddr = h2g(host_raddr); 1109 last = raddr + shm_info.shm_segsz - 1; 1110 1111 page_set_flags(raddr, last, 1112 PAGE_VALID | PAGE_RESET | PAGE_READ | 1113 (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); 1114 1115 shm_region_rm_complete(raddr, last); 1116 shm_region_add(raddr, last); 1117 } 1118 1119 /* 1120 * We're mapping shared memory, so ensure we generate code for parallel 1121 * execution and flush old translations. This will work up to the level 1122 * supported by the host -- anything that requires EXCP_ATOMIC will not 1123 * be atomic with respect to an external process. 1124 */ 1125 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 1126 cpu->tcg_cflags |= CF_PARALLEL; 1127 tb_flush(cpu); 1128 } 1129 1130 return raddr; 1131 } 1132 1133 abi_long target_shmdt(abi_ulong shmaddr) 1134 { 1135 abi_long rv; 1136 1137 /* shmdt pointers are always untagged */ 1138 1139 WITH_MMAP_LOCK_GUARD() { 1140 abi_ulong last = shm_region_find(shmaddr); 1141 if (last == 0) { 1142 return -TARGET_EINVAL; 1143 } 1144 1145 rv = get_errno(shmdt(g2h_untagged(shmaddr))); 1146 if (rv == 0) { 1147 abi_ulong size = last - shmaddr + 1; 1148 1149 page_set_flags(shmaddr, last, 0); 1150 shm_region_rm_complete(shmaddr, last); 1151 mmap_reserve_or_unmap(shmaddr, size); 1152 } 1153 } 1154 return rv; 1155 } 1156